fix: make sqlite session entry writes atomic

fix(memory-wiki): render native links relative to generated pages
test: isolate ACP Matrix plugin routing
2026-06-26 09:12:13 +08:00 · 2026-06-08 12:09:37 -07:00 · 2026-06-09 02:04:26 +09:00 · 2026-06-08 17:53:18 +01:00 · 2026-06-08 17:53:18 +01:00 · 2026-06-08 22:16:33 +05:30
1093 changed files with 111231 additions and 7365 deletions
--- a/apps/android/app/src/main/AndroidManifest.xml
+++ b/apps/android/app/src/main/AndroidManifest.xml
@@ -1,8 +1,9 @@
 <manifest xmlns:android="http://schemas.android.com/apk/res/android">
    <uses-permission android:name="android.permission.INTERNET" />
    <uses-permission android:name="android.permission.ACCESS_NETWORK_STATE" />
+    <uses-permission android:name="android.permission.CHANGE_NETWORK_STATE" />
    <uses-permission android:name="android.permission.FOREGROUND_SERVICE" />
-    <uses-permission android:name="android.permission.FOREGROUND_SERVICE_DATA_SYNC" />
+    <uses-permission android:name="android.permission.FOREGROUND_SERVICE_CONNECTED_DEVICE" />
    <uses-permission android:name="android.permission.FOREGROUND_SERVICE_MICROPHONE" />
    <uses-permission android:name="android.permission.POST_NOTIFICATIONS" />
    <uses-permission
@@ -50,7 +51,7 @@
        <service
            android:name=".NodeForegroundService"
            android:exported="false"
-            android:foregroundServiceType="dataSync|microphone" />
+            android:foregroundServiceType="connectedDevice|microphone" />
        <service
            android:name=".node.DeviceNotificationListenerService"
            android:label="@string/app_name"
--- a/apps/android/app/src/main/java/ai/openclaw/app/AppearanceThemeMode.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/AppearanceThemeMode.kt
@@ -0,0 +1,25 @@
+package ai.openclaw.app
+
+/** User-selectable app theme mode for Android appearance settings. */
+enum class AppearanceThemeMode(
+  val rawValue: String,
+  val displayLabel: String,
+) {
+  System(rawValue = "system", displayLabel = "System"),
+  Dark(rawValue = "dark", displayLabel = "Dark"),
+  Light(rawValue = "light", displayLabel = "Light"),
+  ;
+
+  fun isDark(systemDark: Boolean): Boolean =
+    when (this) {
+      System -> systemDark
+      Dark -> true
+      Light -> false
+    }
+
+  companion object {
+    fun fromRawValue(value: String?): AppearanceThemeMode = entries.firstOrNull { it.rawValue == value?.trim()?.lowercase() } ?: Dark
+
+    fun fromDisplayLabel(label: String): AppearanceThemeMode = entries.firstOrNull { it.displayLabel.equals(label.trim(), ignoreCase = true) } ?: Dark
+  }
+}
--- a/apps/android/app/src/main/java/ai/openclaw/app/MainActivity.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/MainActivity.kt
@@ -14,6 +14,7 @@ import androidx.compose.material3.Surface
 import androidx.compose.material3.Text
 import androidx.compose.runtime.Composable
 import androidx.compose.runtime.LaunchedEffect
+import androidx.compose.runtime.collectAsState
 import androidx.compose.runtime.getValue
 import androidx.compose.runtime.mutableStateOf
 import androidx.compose.runtime.remember
@@ -64,8 +65,16 @@ class MainActivity : ComponentActivity() {
        activeViewModel = readyViewModel
      }

-      OpenClawTheme {
-        activeViewModel?.let { RootScreen(viewModel = it) } ?: StartupSurface()
+      val currentViewModel = activeViewModel
+      if (currentViewModel == null) {
+        OpenClawTheme {
+          StartupSurface()
+        }
+      } else {
+        val appearanceThemeMode by currentViewModel.appearanceThemeMode.collectAsState()
+        OpenClawTheme(themeMode = appearanceThemeMode) {
+          RootScreen(viewModel = currentViewModel)
+        }
      }
    }
  }
--- a/apps/android/app/src/main/java/ai/openclaw/app/MainViewModel.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/MainViewModel.kt
@@ -172,6 +172,7 @@ class MainViewModel(
  val canvasDebugStatusEnabled: StateFlow<Boolean> = prefs.canvasDebugStatusEnabled
  val installedAppsSharingEnabled: StateFlow<Boolean> = prefs.installedAppsSharingEnabled
  val speakerEnabled: StateFlow<Boolean> = prefs.speakerEnabled
+  val appearanceThemeMode: StateFlow<AppearanceThemeMode> = prefs.appearanceThemeMode
  val voiceCaptureMode: StateFlow<VoiceCaptureMode> = runtimeState(initial = VoiceCaptureMode.Off) { it.voiceCaptureMode }
  val micEnabled: StateFlow<Boolean> = runtimeState(initial = false) { it.micEnabled }

@@ -440,6 +441,10 @@ class MainViewModel(
    ensureRuntime().setSpeakerEnabled(enabled)
  }

+  fun setAppearanceThemeMode(mode: AppearanceThemeMode) {
+    prefs.setAppearanceThemeMode(mode)
+  }
+
  fun refreshGatewayConnection() {
    viewModelScope.launch(Dispatchers.Default) {
      ensureRuntime().refreshGatewayConnection()
--- a/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt
@@ -23,7 +23,6 @@ import kotlinx.coroutines.launch
 class NodeForegroundService : Service() {
  private val scope: CoroutineScope = CoroutineScope(SupervisorJob() + Dispatchers.Main)
  private var notificationJob: Job? = null
-  private var didStartForeground = false
  private var voiceCaptureMode = VoiceCaptureMode.Off

  override fun onCreate() {
@@ -183,13 +182,7 @@ class NodeForegroundService : Service() {

  private fun startForegroundWithTypes(notification: Notification) {
    val serviceTypes = foregroundServiceTypesForVoiceMode(voiceCaptureMode)
-    if (didStartForeground) {
-      // Re-issue startForeground when Talk mode toggles so Android sees the microphone service type.
-      ServiceCompat.startForeground(this, NOTIFICATION_ID, notification, serviceTypes)
-      return
-    }
    ServiceCompat.startForeground(this, NOTIFICATION_ID, notification, serviceTypes)
-    didStartForeground = true
  }

  companion object {
@@ -200,19 +193,16 @@ class NodeForegroundService : Service() {
    private const val ACTION_SET_VOICE_CAPTURE_MODE = "ai.openclaw.app.action.SET_VOICE_CAPTURE_MODE"
    private const val EXTRA_VOICE_CAPTURE_MODE = "ai.openclaw.app.extra.VOICE_CAPTURE_MODE"

-    /** Starts the persistent node foreground service from UI lifecycle code. */
    fun start(context: Context) {
      val intent = Intent(context, NodeForegroundService::class.java)
      context.startForegroundService(intent)
    }

-    /** Requests disconnect through the service action path so notification actions and UI share behavior. */
    fun stop(context: Context) {
      val intent = Intent(context, NodeForegroundService::class.java).setAction(ACTION_STOP)
      context.startService(intent)
    }

-    /** Updates Android's foreground-service type before voice capture mode changes require microphone access. */
    fun setVoiceCaptureMode(
      context: Context,
      mode: VoiceCaptureMode,
@@ -231,11 +221,8 @@ class NodeForegroundService : Service() {
  }
 }

-/**
- * Foreground-service type mask required by Android for the current voice capture mode.
- */
 internal fun foregroundServiceTypesForVoiceMode(mode: VoiceCaptureMode): Int {
-  val base = ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC
+  val base = ServiceInfo.FOREGROUND_SERVICE_TYPE_CONNECTED_DEVICE
  return if (mode == VoiceCaptureMode.TalkMode) {
    base or ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE
  } else {
@@ -243,9 +230,6 @@ internal fun foregroundServiceTypesForVoiceMode(mode: VoiceCaptureMode): Int {
  }
 }

-/**
- * Compact notification suffix for voice state; kept pure for service-notification tests.
- */
 internal fun voiceNotificationSuffix(
  mode: VoiceCaptureMode,
  manualMicEnabled: Boolean,
--- a/apps/android/app/src/main/java/ai/openclaw/app/SecurePrefs.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/SecurePrefs.kt
@@ -42,6 +42,7 @@ class SecurePrefs(
    private const val notificationsForwardingSessionKeyKey = "notifications.forwarding.sessionKey"
    private const val installedAppsSharingEnabledKey = "device.apps.sharing.enabled"
    private const val voiceMicEnabledKey = "voice.micEnabled"
+    private const val appearanceThemeModeKey = "appearance.themeMode"
  }

  private val appContext = context.applicationContext
@@ -181,6 +182,10 @@ class SecurePrefs(
  private val _speakerEnabled = MutableStateFlow(plainPrefs.getBoolean("voice.speakerEnabled", true))
  val speakerEnabled: StateFlow<Boolean> = _speakerEnabled

+  private val _appearanceThemeMode =
+    MutableStateFlow(AppearanceThemeMode.fromRawValue(plainPrefs.getString(appearanceThemeModeKey, null)))
+  val appearanceThemeMode: StateFlow<AppearanceThemeMode> = _appearanceThemeMode
+
  fun setLastDiscoveredStableId(value: String) {
    val trimmed = value.trim()
    plainPrefs.edit { putString("gateway.lastDiscoveredStableID", trimmed) }
@@ -525,6 +530,11 @@ class SecurePrefs(
    _speakerEnabled.value = value
  }

+  fun setAppearanceThemeMode(mode: AppearanceThemeMode) {
+    plainPrefs.edit { putString(appearanceThemeModeKey, mode.rawValue) }
+    _appearanceThemeMode.value = mode
+  }
+
  private fun loadNotificationForwardingPackages(): Set<String> {
    val raw = plainPrefs.getString(notificationsForwardingPackagesKey, null)?.trim()
    if (raw.isNullOrEmpty()) {
--- a/apps/android/app/src/main/java/ai/openclaw/app/ui/MobileUiTokens.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/ui/MobileUiTokens.kt
@@ -41,27 +41,27 @@ internal data class MobileColors(

 internal fun lightMobileColors() =
  MobileColors(
-    surface = Color(0xFFF6F7FA),
-    surfaceStrong = Color(0xFFECEEF3),
+    surface = Color(0xFFFAFBFC),
+    surfaceStrong = Color(0xFFEFF3F8),
    cardSurface = Color(0xFFFFFFFF),
-    border = Color(0xFFE5E7EC),
-    borderStrong = Color(0xFFD6DAE2),
-    text = Color(0xFF17181C),
-    textSecondary = Color(0xFF5D6472),
-    textTertiary = Color(0xFF99A0AE),
-    accent = Color(0xFF1D5DD8),
-    accentSoft = Color(0xFFECF3FF),
-    accentBorderStrong = Color(0xFF184DAF),
-    success = Color(0xFF2F8C5A),
-    successSoft = Color(0xFFEEF9F3),
-    warning = Color(0xFFC8841A),
-    warningSoft = Color(0xFFFFF8EC),
-    danger = Color(0xFFD04B4B),
-    dangerSoft = Color(0xFFFFF2F2),
-    codeBg = Color(0xFF15171B),
-    codeText = Color(0xFFE8EAEE),
-    codeBorder = Color(0xFF2B2E35),
-    codeAccent = Color(0xFF3FC97A),
+    border = Color(0xFFDDE3EC),
+    borderStrong = Color(0xFFC7D0DC),
+    text = Color(0xFF16181D),
+    textSecondary = Color(0xFF505B6A),
+    textTertiary = Color(0xFF8E98A7),
+    accent = Color(0xFF1B5ACB),
+    accentSoft = Color(0xFFEAF2FF),
+    accentBorderStrong = Color(0xFF174CA9),
+    success = Color(0xFF287F52),
+    successSoft = Color(0xFFEAF7F0),
+    warning = Color(0xFFAF7418),
+    warningSoft = Color(0xFFFFF4DF),
+    danger = Color(0xFFC94343),
+    dangerSoft = Color(0xFFFFECEC),
+    codeBg = Color(0xFFEFF3F8),
+    codeText = Color(0xFF172033),
+    codeBorder = Color(0xFFD7DDE7),
+    codeAccent = Color(0xFF287F52),
    chipBorderConnected = Color(0xFFCFEBD8),
    chipBorderConnecting = Color(0xFFD5E2FA),
    chipBorderWarning = Color(0xFFEED8B8),
--- a/apps/android/app/src/main/java/ai/openclaw/app/ui/OnboardingFlow.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/ui/OnboardingFlow.kt
@@ -34,6 +34,7 @@ import androidx.compose.foundation.BorderStroke
 import androidx.compose.foundation.Canvas
 import androidx.compose.foundation.Image
 import androidx.compose.foundation.background
+import androidx.compose.foundation.isSystemInDarkTheme
 import androidx.compose.foundation.layout.Arrangement
 import androidx.compose.foundation.layout.Box
 import androidx.compose.foundation.layout.Column
@@ -130,7 +131,9 @@ fun OnboardingFlow(
  viewModel: MainViewModel,
  modifier: Modifier = Modifier,
 ) {
-  ClawDesignTheme {
+  val appearanceThemeMode by viewModel.appearanceThemeMode.collectAsState()
+  val onboardingDark = appearanceThemeMode.isDark(systemDark = isSystemInDarkTheme())
+  ClawDesignTheme(dark = onboardingDark) {
    val context = LocalContext.current
    val statusText by viewModel.statusText.collectAsState()
    val gatewayConnectionProblem by viewModel.gatewayConnectionProblem.collectAsState()
@@ -159,6 +162,8 @@ fun OnboardingFlow(
    var connectAttemptStartedAtMs by rememberSaveable { mutableLongStateOf(0L) }
    var recoveryNowMs by remember { mutableLongStateOf(SystemClock.elapsedRealtime()) }

+    OpenClawSystemBarAppearance(lightAppearance = !onboardingDark && step != OnboardingStep.Welcome)
+
    val qrScannerOptions =
      remember {
        GmsBarcodeScannerOptions
@@ -223,10 +228,12 @@ fun OnboardingFlow(

    when (step) {
      OnboardingStep.Welcome ->
-        WelcomeScreen(
-          modifier = modifier,
-          onConnect = { step = OnboardingStep.Gateway },
-        )
+        ClawDesignTheme(dark = true) {
+          WelcomeScreen(
+            modifier = modifier,
+            onConnect = { step = OnboardingStep.Gateway },
+          )
+        }
      OnboardingStep.Gateway ->
        GatewaySetupScreen(
          modifier = modifier,
--- a/apps/android/app/src/main/java/ai/openclaw/app/ui/OpenClawTheme.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/ui/OpenClawTheme.kt
@@ -1,5 +1,6 @@
 package ai.openclaw.app.ui

+import ai.openclaw.app.AppearanceThemeMode
 import android.app.Activity
 import androidx.compose.foundation.isSystemInDarkTheme
 import androidx.compose.material3.MaterialTheme
@@ -8,34 +9,51 @@ import androidx.compose.material3.dynamicLightColorScheme
 import androidx.compose.runtime.Composable
 import androidx.compose.runtime.CompositionLocalProvider
 import androidx.compose.runtime.SideEffect
+import androidx.compose.runtime.staticCompositionLocalOf
 import androidx.compose.ui.graphics.Color
 import androidx.compose.ui.platform.LocalContext
 import androidx.compose.ui.platform.LocalView
 import androidx.core.view.WindowCompat

+private val LocalOpenClawDarkTheme = staticCompositionLocalOf { true }
+
 /**
 * App theme wrapper that installs dynamic Material colors and legacy mobile color tokens.
 */
@Composable
-fun OpenClawTheme(content: @Composable () -> Unit) {
+fun OpenClawTheme(
+  themeMode: AppearanceThemeMode = AppearanceThemeMode.Dark,
+  content: @Composable () -> Unit,
+) {
  val context = LocalContext.current
-  val isDark = isSystemInDarkTheme()
+  val isDark = themeMode.isDark(systemDark = isSystemInDarkTheme())
  val colorScheme = if (isDark) dynamicDarkColorScheme(context) else dynamicLightColorScheme(context)
  val mobileColors = if (isDark) darkMobileColors() else lightMobileColors()

+  OpenClawSystemBarAppearance(lightAppearance = !isDark)
+
+  CompositionLocalProvider(
+    LocalMobileColors provides mobileColors,
+    LocalOpenClawDarkTheme provides isDark,
+  ) {
+    MaterialTheme(colorScheme = colorScheme, content = content)
+  }
+}
+
+@Composable
+internal fun OpenClawSystemBarAppearance(lightAppearance: Boolean) {
  val view = LocalView.current
  if (!view.isInEditMode) {
    SideEffect {
-      val window = (view.context as Activity).window
+      val window = (view.context as? Activity)?.window ?: return@SideEffect
      WindowCompat
        .getInsetsController(window, window.decorView)
-        .isAppearanceLightStatusBars = !isDark
+        .isAppearanceLightStatusBars = lightAppearance
+      WindowCompat
+        .getInsetsController(window, window.decorView)
+        .isAppearanceLightNavigationBars = lightAppearance
    }
  }
-
-  CompositionLocalProvider(LocalMobileColors provides mobileColors) {
-    MaterialTheme(colorScheme = colorScheme, content = content)
-  }
 }

 /**
@@ -44,9 +62,9 @@ fun OpenClawTheme(content: @Composable () -> Unit) {
@Composable
 fun overlayContainerColor(): Color {
  val scheme = MaterialTheme.colorScheme
-  val isDark = isSystemInDarkTheme()
+  val isDark = LocalOpenClawDarkTheme.current
  val base = if (isDark) scheme.surfaceContainerLow else scheme.surfaceContainerHigh
-  // Light mode: background stays dark (canvas), so clamp overlays away from pure-white glare.
+  // Light mode keeps overlays away from pure-white glare on the app canvas.
  return if (isDark) base else base.copy(alpha = 0.88f)
 }

--- a/apps/android/app/src/main/java/ai/openclaw/app/ui/SessionsScreen.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/ui/SessionsScreen.kt
@@ -217,7 +217,7 @@ private fun SessionRow(
  compact: Boolean,
  onClick: () -> Unit,
 ) {
-  Surface(onClick = onClick, color = ClawTheme.colors.canvas, contentColor = ClawTheme.colors.text) {
+  Surface(onClick = onClick, color = Color.Transparent, contentColor = ClawTheme.colors.text) {
    Column {
      Row(
        modifier = Modifier.fillMaxWidth().heightIn(min = 58.dp).padding(vertical = 5.dp),
--- a/apps/android/app/src/main/java/ai/openclaw/app/ui/SettingsScreens.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/ui/SettingsScreens.kt
@@ -1,5 +1,6 @@
 package ai.openclaw.app.ui

+import ai.openclaw.app.AppearanceThemeMode
 import ai.openclaw.app.BuildConfig
 import ai.openclaw.app.GatewayAgentSummary
 import ai.openclaw.app.GatewayCronJobSummary
@@ -146,7 +147,7 @@ internal fun SettingsDetailScreen(
    SettingsRoute.Notifications -> NotificationSettingsScreen(viewModel = viewModel, onBack = onBack)
    SettingsRoute.PhoneCapabilities -> PhoneCapabilitiesScreen(viewModel = viewModel, onBack = onBack)
    SettingsRoute.Gateway -> GatewaySettingsScreen(viewModel = viewModel, onBack = onBack)
-    SettingsRoute.Appearance -> AppearanceSettingsScreen(onBack = onBack)
+    SettingsRoute.Appearance -> AppearanceSettingsScreen(viewModel = viewModel, onBack = onBack)
    SettingsRoute.Health -> HealthLogsSettingsScreen(viewModel = viewModel, onBack = onBack)
    SettingsRoute.About -> AboutSettingsScreen(viewModel = viewModel, onBack = onBack)
  }
@@ -914,22 +915,40 @@ private fun GatewaySettingsScreen(
 }

@Composable
-private fun AppearanceSettingsScreen(onBack: () -> Unit) {
+private fun AppearanceSettingsScreen(
+  viewModel: MainViewModel,
+  onBack: () -> Unit,
+) {
+  val themeMode by viewModel.appearanceThemeMode.collectAsState()
+
  SettingsDetailFrame(title = "Appearance", subtitle = "A calm, high-contrast OpenClaw interface.", icon = Icons.Default.Palette, onBack = onBack) {
    SettingsMetricPanel(
      rows =
        listOf(
-          SettingsMetric("Theme", "Dark"),
+          SettingsMetric("Theme", appearanceThemeSummary(themeMode)),
          SettingsMetric("Contrast", "High"),
          SettingsMetric("Typography", "Readable"),
        ),
    )
    ClawPanel {
-      Text(text = "OpenClaw uses a fixed premium dark theme so it stays consistent across devices.", style = ClawTheme.type.body, color = ClawTheme.colors.textMuted)
+      Column(verticalArrangement = Arrangement.spacedBy(10.dp)) {
+        Text(text = "Theme", style = ClawTheme.type.section, color = ClawTheme.colors.text)
+        ClawSegmentedControl(
+          options = appearanceThemeOptions(),
+          selected = appearanceThemeSummary(themeMode),
+          onSelect = { selected -> viewModel.setAppearanceThemeMode(appearanceThemeModeForLabel(selected)) },
+        )
+      }
    }
  }
 }

+internal fun appearanceThemeSummary(mode: AppearanceThemeMode): String = mode.displayLabel
+
+internal fun appearanceThemeOptions(): List<String> = AppearanceThemeMode.entries.map { it.displayLabel }
+
+internal fun appearanceThemeModeForLabel(label: String): AppearanceThemeMode = AppearanceThemeMode.fromDisplayLabel(label)
+
 /** Converts raw gateway connection text into stable settings metric labels. */
 private fun gatewayStatusLabel(
  statusText: String,
--- a/apps/android/app/src/main/java/ai/openclaw/app/ui/ShellScreen.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/ui/ShellScreen.kt
@@ -22,6 +22,7 @@ import androidx.activity.compose.BackHandler
 import androidx.compose.foundation.BorderStroke
 import androidx.compose.foundation.background
 import androidx.compose.foundation.clickable
+import androidx.compose.foundation.isSystemInDarkTheme
 import androidx.compose.foundation.layout.Arrangement
 import androidx.compose.foundation.layout.Box
 import androidx.compose.foundation.layout.Column
@@ -114,7 +115,10 @@ fun ShellScreen(
  viewModel: MainViewModel,
  modifier: Modifier = Modifier,
 ) {
-  ClawDesignTheme {
+  val appearanceThemeMode by viewModel.appearanceThemeMode.collectAsState()
+  val shellDark = appearanceThemeMode.isDark(systemDark = isSystemInDarkTheme())
+  OpenClawSystemBarAppearance(lightAppearance = !shellDark)
+  ClawDesignTheme(dark = shellDark) {
    var activeTab by rememberSaveable { mutableStateOf(Tab.Overview) }
    var settingsRoute by rememberSaveable { mutableStateOf(SettingsRoute.Home) }
    var returnToOverviewFromSettings by rememberSaveable { mutableStateOf(false) }
@@ -751,7 +755,7 @@ private fun RecentSessionRowContent(
  metadata: String,
  onClick: () -> Unit,
 ) {
-  Surface(color = ClawTheme.colors.canvas, contentColor = ClawTheme.colors.text) {
+  Surface(color = Color.Transparent, contentColor = ClawTheme.colors.text) {
    Row(
      modifier =
        Modifier
@@ -849,6 +853,7 @@ private fun SettingsShellScreen(
  val nodesDevicesSummary by viewModel.nodesDevicesSummary.collectAsState()
  val channelsSummary by viewModel.channelsSummary.collectAsState()
  val dreamingSummary by viewModel.dreamingSummary.collectAsState()
+  val appearanceThemeMode by viewModel.appearanceThemeMode.collectAsState()

  LaunchedEffect(isConnected) {
    if (isConnected) {
@@ -910,7 +915,7 @@ private fun SettingsShellScreen(
              SettingsRow("Notifications", if (notificationForwardingEnabled) "Smart delivery" else "Off", Icons.Default.Notifications, route = SettingsRoute.Notifications),
              SettingsRow("Phone Capabilities", if (cameraEnabled) "Camera enabled" else "Locked", Icons.Default.Lock, status = !cameraEnabled, route = SettingsRoute.PhoneCapabilities),
              SettingsRow("Gateway", gatewaySummary(statusText, isConnected), Icons.Default.Cloud, status = isConnected, route = SettingsRoute.Gateway),
-              SettingsRow("Appearance", "Dark", Icons.Default.Palette, route = SettingsRoute.Appearance),
+              SettingsRow("Appearance", appearanceThemeSummary(appearanceThemeMode), Icons.Default.Palette, route = SettingsRoute.Appearance),
              SettingsRow("Health", "Diagnostics", Icons.Default.Settings, status = isConnected, route = SettingsRoute.Health),
              SettingsRow("About", "Version and update", Icons.Default.Storage, route = SettingsRoute.About),
            ),
--- a/apps/android/app/src/main/java/ai/openclaw/app/ui/design/ClawTheme.kt
+++ b/apps/android/app/src/main/java/ai/openclaw/app/ui/design/ClawTheme.kt
@@ -1,5 +1,8 @@
 package ai.openclaw.app.ui.design

+import ai.openclaw.app.ui.LocalMobileColors
+import ai.openclaw.app.ui.darkMobileColors
+import ai.openclaw.app.ui.lightMobileColors
 import ai.openclaw.app.ui.mobileFontFamily
 import androidx.compose.foundation.isSystemInDarkTheme
 import androidx.compose.material3.MaterialTheme
@@ -110,22 +113,22 @@ private val ClawDarkColors =

 private val ClawLightColors =
  ClawColors(
-    canvas = Color(0xFFF7F7F7),
-    surface = Color(0xFFFFFFFF),
+    canvas = Color(0xFFFAFBFC),
+    surface = Color(0xFFFFFEFB),
    surfaceRaised = Color(0xFFFFFFFF),
-    surfacePressed = Color(0xFFEDEDED),
-    border = Color(0xFFE0E0E0),
-    borderStrong = Color(0xFFBDBDBD),
-    text = Color(0xFF070707),
-    textMuted = Color(0xFF595959),
-    textSubtle = Color(0xFF8A8A8A),
-    primary = Color(0xFF050505),
+    surfacePressed = Color(0xFFE9EDF3),
+    border = Color(0xFFDDE3EC),
+    borderStrong = Color(0xFFC7D0DC),
+    text = Color(0xFF111318),
+    textMuted = Color(0xFF505865),
+    textSubtle = Color(0xFF8993A2),
+    primary = Color(0xFF111827),
    primaryText = Color(0xFFFFFFFF),
-    success = Color(0xFF157A3E),
-    successSoft = Color(0xFFEAF8EF),
-    warning = Color(0xFF9A6A12),
-    warningSoft = Color(0xFFFFF5DD),
-    danger = Color(0xFFB42323),
+    success = Color(0xFF217747),
+    successSoft = Color(0xFFE9F7EF),
+    warning = Color(0xFFA56F17),
+    warningSoft = Color(0xFFFFF3DC),
+    danger = Color(0xFFB82929),
    dangerSoft = Color(0xFFFFE9E9),
  )

@@ -168,10 +171,12 @@ internal fun ClawDesignTheme(
  content: @Composable () -> Unit,
 ) {
  val colors = if (dark) ClawDarkColors else ClawLightColors
+  val mobileColors = if (dark) darkMobileColors() else lightMobileColors()
  val typography = clawTypography(mobileFontFamily)

  CompositionLocalProvider(
    LocalClawColors provides colors,
+    LocalMobileColors provides mobileColors,
    LocalClawSpacing provides ClawSpacing(),
    LocalClawRadii provides ClawRadii(),
    LocalClawTypography provides typography,
--- a/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt
+++ b/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt
@@ -34,15 +34,15 @@ class NodeForegroundServiceTest {
  @Test
  fun foregroundServiceTypesForVoiceMode_addsMicrophoneOnlyForTalkMode() {
    assertEquals(
-      ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC,
+      ServiceInfo.FOREGROUND_SERVICE_TYPE_CONNECTED_DEVICE,
      foregroundServiceTypesForVoiceMode(VoiceCaptureMode.Off),
    )
    assertEquals(
-      ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC,
+      ServiceInfo.FOREGROUND_SERVICE_TYPE_CONNECTED_DEVICE,
      foregroundServiceTypesForVoiceMode(VoiceCaptureMode.ManualMic),
    )
    assertEquals(
-      ServiceInfo.FOREGROUND_SERVICE_TYPE_DATA_SYNC or ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE,
+      ServiceInfo.FOREGROUND_SERVICE_TYPE_CONNECTED_DEVICE or ServiceInfo.FOREGROUND_SERVICE_TYPE_MICROPHONE,
      foregroundServiceTypesForVoiceMode(VoiceCaptureMode.TalkMode),
    )
  }
--- a/apps/android/app/src/test/java/ai/openclaw/app/SecurePrefsTest.kt
+++ b/apps/android/app/src/test/java/ai/openclaw/app/SecurePrefsTest.kt
@@ -77,6 +77,31 @@ class SecurePrefsTest {
    assertTrue(plainPrefs.getBoolean("device.apps.sharing.enabled", false))
  }

+  @Test
+  fun appearanceThemeMode_defaultsDarkForExistingInstalls() {
+    val context = RuntimeEnvironment.getApplication()
+    val plainPrefs = context.getSharedPreferences("openclaw.node", Context.MODE_PRIVATE)
+    plainPrefs.edit().clear().commit()
+    val prefs = SecurePrefs(context)
+
+    assertEquals(AppearanceThemeMode.Dark, prefs.appearanceThemeMode.value)
+    assertFalse(plainPrefs.contains("appearance.themeMode"))
+  }
+
+  @Test
+  fun setAppearanceThemeMode_persistsSelectedMode() {
+    val context = RuntimeEnvironment.getApplication()
+    val plainPrefs = context.getSharedPreferences("openclaw.node", Context.MODE_PRIVATE)
+    plainPrefs.edit().clear().commit()
+    val prefs = SecurePrefs(context)
+
+    prefs.setAppearanceThemeMode(AppearanceThemeMode.Light)
+
+    assertEquals(AppearanceThemeMode.Light, prefs.appearanceThemeMode.value)
+    assertEquals("light", plainPrefs.getString("appearance.themeMode", null))
+    assertEquals(AppearanceThemeMode.Light, SecurePrefs(context).appearanceThemeMode.value)
+  }
+
  @Test
  fun saveGatewayBootstrapToken_persistsSeparatelyFromSharedToken() {
    val context = RuntimeEnvironment.getApplication()
--- a/apps/android/app/src/test/java/ai/openclaw/app/ui/ShellScreenLogicTest.kt
+++ b/apps/android/app/src/test/java/ai/openclaw/app/ui/ShellScreenLogicTest.kt
@@ -1,5 +1,6 @@
 package ai.openclaw.app.ui

+import ai.openclaw.app.AppearanceThemeMode
 import ai.openclaw.app.GatewayChannelSummary
 import ai.openclaw.app.GatewayChannelsSummary
 import ai.openclaw.app.GatewayNodesDevicesSummary
@@ -17,6 +18,28 @@ class ShellScreenLogicTest {
    assertFalse(shellBottomNavVisible(keyboardVisible = false, commandOpen = true))
  }

+  @Test
+  fun appearanceThemeModeDefaultsToDarkForExistingInstalls() {
+    assertEquals(AppearanceThemeMode.Dark, AppearanceThemeMode.fromRawValue(null))
+    assertEquals(AppearanceThemeMode.Dark, AppearanceThemeMode.fromRawValue("unknown"))
+  }
+
+  @Test
+  fun appearanceThemeLabelsRoundTripFromSettingsOptions() {
+    assertEquals(listOf("System", "Dark", "Light"), appearanceThemeOptions())
+    assertEquals(AppearanceThemeMode.System, appearanceThemeModeForLabel("System"))
+    assertEquals(AppearanceThemeMode.Dark, appearanceThemeModeForLabel("Dark"))
+    assertEquals(AppearanceThemeMode.Light, appearanceThemeModeForLabel("Light"))
+  }
+
+  @Test
+  fun appearanceThemeModeResolvesAgainstSystemPreference() {
+    assertFalse(AppearanceThemeMode.System.isDark(systemDark = false))
+    assertTrue(AppearanceThemeMode.System.isDark(systemDark = true))
+    assertTrue(AppearanceThemeMode.Dark.isDark(systemDark = false))
+    assertFalse(AppearanceThemeMode.Light.isDark(systemDark = true))
+  }
+
  @Test
  fun homeAttentionRowsSurfaceGatewayWhenDisconnected() {
    val rows =
--- a/apps/ios/Sources/Design/SettingsProTab.swift
+++ b/apps/ios/Sources/Design/SettingsProTab.swift
@@ -57,6 +57,7 @@ struct SettingsProTab: View {
    @State var notificationActionText = "Request Access"
    @State var diagnosticsLastRunText = "Not run"
    @State var diagnosticsIssueCount: Int?
+    @State var showTalkIssueDetails = false

    var body: some View {
        NavigationStack {
@@ -129,6 +130,11 @@ struct SettingsProTab: View {
                    })
            }
        }
+        .sheet(isPresented: self.$showTalkIssueDetails) {
+            if let issue = self.appModel.talkMode.gatewayTalkCurrentFallbackIssue {
+                TalkRuntimeIssueDetailsSheet(issue: issue)
+            }
+        }
        .sheet(isPresented: self.$showQRScanner) {
            NavigationStack {
                QRScannerView(
--- a/apps/ios/Sources/Design/SettingsProTabActions.swift
+++ b/apps/ios/Sources/Design/SettingsProTabActions.swift
@@ -610,6 +610,21 @@ extension SettingsProTab {
        return self.appModel.talkMode.gatewayTalkApiKeyConfigured ? "Configured" : "Not configured"
    }

+    var gatewayTalkActiveVoiceDetail: String {
+        let title = self.appModel.talkMode.gatewayTalkActiveModeTitle.trimmingCharacters(in: .whitespacesAndNewlines)
+        let subtitle = (self.appModel.talkMode.gatewayTalkActiveModeSubtitle ?? "")
+            .trimmingCharacters(in: .whitespacesAndNewlines)
+        if title.isEmpty { return "Not active" }
+        if subtitle.isEmpty { return title }
+        return "\(title) • \(subtitle)"
+    }
+
+    var gatewayTalkLastIssueDetail: String? {
+        let detail = (self.appModel.talkMode.gatewayTalkLastIssueText ?? "")
+            .trimmingCharacters(in: .whitespacesAndNewlines)
+        return detail.isEmpty ? nil : detail
+    }
+
    func gatewayDetailLines(_ gateway: GatewayDiscoveryModel.DiscoveredGateway) -> [String] {
        var lines: [String] = []
        if let lanHost = gateway.lanHost { lines.append("LAN: \(lanHost)") }
--- a/apps/ios/Sources/Design/SettingsProTabSections.swift
+++ b/apps/ios/Sources/Design/SettingsProTabSections.swift
@@ -792,26 +792,44 @@ extension SettingsProTab {
    }

    var talkVoiceSettingsCard: some View {
-        ProCard(radius: SettingsLayout.cardRadius) {
-            VStack(alignment: .leading, spacing: 12) {
-                Picker("Provider", selection: self.talkProviderSelectionBinding) {
-                    ForEach(TalkModeProviderSelection.allCases) { option in
-                        Text(option.label).tag(option.rawValue)
-                    }
-                }
-                if self.shouldShowRealtimeVoicePicker {
-                    Picker("Realtime Voice", selection: self.talkRealtimeVoiceSelectionBinding) {
-                        Text("Gateway Default").tag("")
-                        ForEach(TalkModeRealtimeVoiceSelection.voices, id: \.self) { voice in
-                            Text(TalkModeRealtimeVoiceSelection.label(for: voice)).tag(voice)
+        VStack(alignment: .leading, spacing: 10) {
+            if self.gatewayConnected,
+               let issue = self.appModel.talkMode.gatewayTalkCurrentFallbackIssue
+            {
+                TalkRuntimeIssueBanner(
+                    issue: issue,
+                    onOpenSettings: nil,
+                    onShowDetails: {
+                        self.showTalkIssueDetails = true
+                    })
+            }
+            ProCard(radius: SettingsLayout.cardRadius) {
+                VStack(alignment: .leading, spacing: 12) {
+                    Picker("Provider", selection: self.talkProviderSelectionBinding) {
+                        ForEach(TalkModeProviderSelection.allCases) { option in
+                            Text(option.label).tag(option.rawValue)
                        }
                    }
+                    if self.shouldShowRealtimeVoicePicker {
+                        Picker("Realtime Voice", selection: self.talkRealtimeVoiceSelectionBinding) {
+                            Text("Gateway Default").tag("")
+                            ForEach(TalkModeRealtimeVoiceSelection.voices, id: \.self) { voice in
+                                Text(TalkModeRealtimeVoiceSelection.label(for: voice)).tag(voice)
+                            }
+                        }
+                    }
+                    self.detailRow("Voice Mode", value: self.appModel.talkMode.gatewayTalkVoiceModeTitle)
+                    Divider()
+                    self.detailRow("Active Voice", value: self.gatewayTalkActiveVoiceDetail)
+                    if let issue = self.gatewayTalkLastIssueDetail {
+                        Divider()
+                        self.detailRow("Last Voice Issue", value: issue)
+                    }
+                    Divider()
+                    self.detailRow("Transport", value: self.appModel.talkMode.gatewayTalkTransportLabel)
+                    Divider()
+                    self.detailRow("API Key", value: self.talkApiKeyStatus)
                }
-                self.detailRow("Voice Mode", value: self.appModel.talkMode.gatewayTalkVoiceModeTitle)
-                Divider()
-                self.detailRow("Transport", value: self.appModel.talkMode.gatewayTalkTransportLabel)
-                Divider()
-                self.detailRow("API Key", value: self.talkApiKeyStatus)
            }
        }
        .padding(.horizontal, OpenClawProMetric.pagePadding)
--- a/apps/ios/Sources/Design/TalkProTab.swift
+++ b/apps/ios/Sources/Design/TalkProTab.swift
@@ -8,6 +8,7 @@ struct TalkProTab: View {
        TalkDefaults.speakerphoneEnabledByDefault
    @AppStorage("talk.background.enabled") private var talkBackgroundEnabled: Bool = false
    @State private var showPermissionPrompt = false
+    @State private var showTalkIssueDetails = false
    var openSettings: () -> Void

    private var state: TalkProState {
@@ -30,6 +31,15 @@ struct TalkProTab: View {
                ScrollView {
                    VStack(alignment: .leading, spacing: 10) {
                        self.header
+                        if let fallbackIssue = self.fallbackIssue {
+                            TalkRuntimeIssueBanner(
+                                issue: fallbackIssue,
+                                onOpenSettings: self.openSettings,
+                                onShowDetails: {
+                                    self.showTalkIssueDetails = true
+                                })
+                                .padding(.horizontal, OpenClawProMetric.pagePadding)
+                        }
                        self.voiceHeroCard
                        self.conversationCard
                        self.voiceModeCard
@@ -62,6 +72,14 @@ struct TalkProTab: View {
            .presentationDetents([.medium, .large])
            .openClawSheetChrome()
        }
+        .sheet(isPresented: self.$showTalkIssueDetails) {
+            if let fallbackIssue = self.fallbackIssue {
+                TalkRuntimeIssueDetailsSheet(
+                    issue: fallbackIssue,
+                    onOpenSettings: self.openSettings)
+                    .openClawSheetChrome()
+            }
+        }
        .onAppear { self.alignPersistedTalkState() }
    }

@@ -173,9 +191,21 @@ struct TalkProTab: View {
                    .padding(.horizontal, 12)
                    .padding(.top, 11)
                    .padding(.bottom, 3)
-                self.infoRow(icon: "waveform", title: "Mode", value: self.appModel.talkMode.gatewayTalkVoiceModeTitle)
+                self.infoRow(
+                    icon: "waveform",
+                    title: "Configured",
+                    value: self.appModel.talkMode.gatewayTalkVoiceModeTitle)
+                Divider().padding(.leading, 54)
+                self.infoRow(
+                    icon: "waveform",
+                    title: "Active now",
+                    value: self.activeModeText)
                Divider().padding(.leading, 54)
                self.infoRow(icon: "antenna.radiowaves.left.and.right", title: "Transport", value: self.transportText)
+                if let issueText = self.talkIssueText {
+                    Divider().padding(.leading, 54)
+                    self.infoRow(icon: "exclamationmark.triangle.fill", title: "Last issue", value: issueText)
+                }
                Divider().padding(.leading, 54)
                self.infoRow(icon: "key.fill", title: "Permission", value: self.permissionText)
                Divider().padding(.leading, 54)
@@ -287,6 +317,11 @@ struct TalkProTab: View {
            GatewayStatusBuilder.build(appModel: self.appModel) == .connected
    }

+    private var fallbackIssue: TalkRuntimeIssue? {
+        guard self.gatewayConnected else { return nil }
+        return self.appModel.talkMode.gatewayTalkCurrentFallbackIssue
+    }
+
    private var headerSubtitle: String {
        let mode = self.appModel.talkMode.gatewayTalkVoiceModeTitle.trimmingCharacters(in: .whitespacesAndNewlines)
        let agent = self.appModel.chatAgentName.trimmingCharacters(in: .whitespacesAndNewlines)
@@ -317,6 +352,21 @@ struct TalkProTab: View {
        return "\(provider) • \(transport)"
    }

+    private var activeModeText: String {
+        let title = self.appModel.talkMode.gatewayTalkActiveModeTitle.trimmingCharacters(in: .whitespacesAndNewlines)
+        let subtitle = (self.appModel.talkMode.gatewayTalkActiveModeSubtitle ?? "")
+            .trimmingCharacters(in: .whitespacesAndNewlines)
+        if title.isEmpty { return "Not active" }
+        if subtitle.isEmpty { return title }
+        return "\(title) • \(subtitle)"
+    }
+
+    private var talkIssueText: String? {
+        let text = (self.appModel.talkMode.gatewayTalkLastIssueText ?? "")
+            .trimmingCharacters(in: .whitespacesAndNewlines)
+        return text.isEmpty ? nil : text
+    }
+
    private var permissionText: String {
        if let failure = self.appModel.talkMode.gatewayTalkPermissionState.failureMessage {
            return failure
--- a/apps/ios/Sources/Design/TalkRuntimeIssueBanner.swift
+++ b/apps/ios/Sources/Design/TalkRuntimeIssueBanner.swift
@@ -0,0 +1,142 @@
+import SwiftUI
+import UIKit
+
+struct TalkRuntimeIssueBanner: View {
+    @Environment(\.colorScheme) private var colorScheme
+
+    let issue: TalkRuntimeIssue
+    var onOpenSettings: (() -> Void)?
+    var onShowDetails: (() -> Void)?
+
+    var body: some View {
+        VStack(alignment: .leading, spacing: 10) {
+            HStack(alignment: .top, spacing: 10) {
+                Image(systemName: self.iconName)
+                    .font(.headline.weight(.semibold))
+                    .foregroundStyle(self.tint)
+                    .frame(width: 20)
+                    .padding(.top, 2)
+
+                VStack(alignment: .leading, spacing: 5) {
+                    HStack(alignment: .firstTextBaseline, spacing: 8) {
+                        Text(self.issue.fallbackBannerTitle)
+                            .font(.subheadline.weight(.semibold))
+                            .multilineTextAlignment(.leading)
+                        Spacer(minLength: 0)
+                        Text(self.issue.fallbackBannerOwnerLabel)
+                            .font(.caption.weight(.semibold))
+                            .foregroundStyle(.secondary)
+                    }
+
+                    Text(self.issue.fallbackBannerMessage)
+                        .font(.footnote)
+                        .foregroundStyle(.secondary)
+                        .fixedSize(horizontal: false, vertical: true)
+
+                    Text(self.issue.displayMessage)
+                        .font(.caption.weight(.medium))
+                        .foregroundStyle(self.tint)
+                        .fixedSize(horizontal: false, vertical: true)
+                }
+            }
+
+            HStack(spacing: 10) {
+                if let onOpenSettings {
+                    Button("Open Settings", action: onOpenSettings)
+                        .buttonStyle(.borderedProminent)
+                        .controlSize(.small)
+                }
+                if let onShowDetails {
+                    Button("Details", action: onShowDetails)
+                        .buttonStyle(.bordered)
+                        .controlSize(.small)
+                }
+            }
+        }
+        .frame(maxWidth: .infinity, alignment: .leading)
+        .padding(13)
+        .background {
+            RoundedRectangle(cornerRadius: 16, style: .continuous)
+                .fill(.ultraThickMaterial)
+                .overlay {
+                    RoundedRectangle(cornerRadius: 16, style: .continuous)
+                        .strokeBorder(Color.primary.opacity(self.colorScheme == .dark ? 0.12 : 0.07), lineWidth: 1)
+                }
+                .shadow(color: .black.opacity(self.colorScheme == .dark ? 0.16 : 0.07), radius: 16, y: 7)
+        }
+    }
+
+    private var iconName: String {
+        "exclamationmark.triangle.fill"
+    }
+
+    private var tint: Color {
+        .orange
+    }
+}
+
+struct TalkRuntimeIssueDetailsSheet: View {
+    @Environment(\.dismiss) private var dismiss
+
+    let issue: TalkRuntimeIssue
+    var onOpenSettings: (() -> Void)?
+
+    @State private var copyFeedback: String?
+
+    var body: some View {
+        NavigationStack {
+            List {
+                Section {
+                    VStack(alignment: .leading, spacing: 10) {
+                        Text(self.issue.fallbackBannerTitle)
+                            .font(.title3.weight(.semibold))
+                        Text(self.issue.fallbackBannerMessage)
+                            .font(.body)
+                            .foregroundStyle(.secondary)
+                        Text(self.issue.displayMessage)
+                            .font(.footnote.weight(.semibold))
+                            .foregroundStyle(.secondary)
+                    }
+                    .frame(maxWidth: .infinity, alignment: .leading)
+                    .padding(.vertical, 4)
+                }
+
+                Section("Technical details") {
+                    Text(verbatim: self.issue.technicalDetails)
+                        .font(.system(.footnote, design: .monospaced))
+                        .foregroundStyle(.secondary)
+                        .textSelection(.enabled)
+                    Button("Copy diagnostics") {
+                        UIPasteboard.general.string = self.issue.technicalDetails
+                        self.copyFeedback = "Copied diagnostics"
+                    }
+                }
+
+                if let copyFeedback {
+                    Section {
+                        Text(copyFeedback)
+                            .font(.footnote)
+                            .foregroundStyle(.secondary)
+                    }
+                }
+            }
+            .navigationTitle("Talk fallback")
+            .navigationBarTitleDisplayMode(.inline)
+            .toolbar {
+                ToolbarItem(placement: .topBarLeading) {
+                    if let onOpenSettings {
+                        Button("Open Settings") {
+                            self.dismiss()
+                            onOpenSettings()
+                        }
+                    }
+                }
+                ToolbarItem(placement: .topBarTrailing) {
+                    Button("Done") {
+                        self.dismiss()
+                    }
+                }
+            }
+        }
+    }
+}
--- a/apps/ios/Sources/Voice/RealtimeTalkRelaySession.swift
+++ b/apps/ios/Sources/Voice/RealtimeTalkRelaySession.swift
@@ -103,6 +103,12 @@ final class RealtimeTalkRelaySession {
        let failed: Bool
    }

+    private enum StartupWaitResult {
+        case ready
+        case failed(TalkRuntimeIssue)
+        case cancelled
+    }
+
    private nonisolated static let expectedInputEncoding = "pcm16"
    private nonisolated static let expectedOutputEncoding = "pcm16"
    private nonisolated static let defaultSampleRateHz = 24000
@@ -110,16 +116,23 @@ final class RealtimeTalkRelaySession {
    private nonisolated static let bargeInRmsThreshold: Float = 0.08
    private nonisolated static let bargeInCooldownMs: Double = 900
    private nonisolated static let minOutputBeforeBargeInMs: Double = 250
+    private nonisolated static let startupReadyTimeoutSeconds = 12

    private let gateway: GatewayNodeSession
    private let options: Options
    private let pcmPlayer: PCMStreamingAudioPlaying
    private let logger = Logger(subsystem: "ai.openclaw", category: "RealtimeTalkRelay")
    private let onStatus: (String) -> Void
+    private let onIssue: (TalkRuntimeIssue) -> Void
    private let onSpeakingChanged: (Bool) -> Void

    private let audioEngine = AVAudioEngine()
    private var relaySessionId: String?
+    private var hasReceivedReady = false
+    private var hasReceivedFailure = false
+    private var startupIssue: TalkRuntimeIssue?
+    private var startupWaiter: CheckedContinuation<StartupWaitResult, Never>?
+    private var pendingPreRelayEvents: [EventFrame] = []
    private var inputSampleRateHz = Double(RealtimeTalkRelaySession.defaultSampleRateHz)
    private var outputSampleRateHz = Double(RealtimeTalkRelaySession.defaultSampleRateHz)
    private var eventTask: Task<Void, Never>?
@@ -151,34 +164,53 @@ final class RealtimeTalkRelaySession {
        options: Options,
        pcmPlayer: PCMStreamingAudioPlaying,
        onStatus: @escaping (String) -> Void,
+        onIssue: @escaping (TalkRuntimeIssue) -> Void = { _ in },
        onSpeakingChanged: @escaping (Bool) -> Void)
    {
        self.gateway = gateway
        self.options = options
        self.pcmPlayer = pcmPlayer
        self.onStatus = onStatus
+        self.onIssue = onIssue
        self.onSpeakingChanged = onSpeakingChanged
    }

    func start() async throws {
        self.isClosed = false
+        self.hasReceivedReady = false
+        self.hasReceivedFailure = false
+        self.startupIssue = nil
+        self.startupWaiter = nil
+        self.pendingPreRelayEvents.removeAll()
        self.onStatus("Connecting realtime…")
-        let result = try await self.createRelaySession()
-        guard let relaySessionId = result.relaysessionid?.trimmingCharacters(in: .whitespacesAndNewlines),
-              !relaySessionId.isEmpty
-        else {
-            throw NSError(domain: "RealtimeTalkRelay", code: 1, userInfo: [
-                NSLocalizedDescriptionKey: "Gateway did not return a realtime relay session",
-            ])
-        }
-        self.relaySessionId = relaySessionId
+        let eventStream = await self.gateway.subscribeServerEvents(bufferingNewest: 200)
+        self.startEventPump(stream: eventStream)
        do {
+            let result = try await self.createRelaySession()
+            guard let relaySessionId = result.relaysessionid?.trimmingCharacters(in: .whitespacesAndNewlines),
+                  !relaySessionId.isEmpty
+            else {
+                throw NSError(domain: "RealtimeTalkRelay", code: 1, userInfo: [
+                    NSLocalizedDescriptionKey: "Gateway did not return a realtime relay session",
+                ])
+            }
+            self.relaySessionId = relaySessionId
            self.audioSender = RealtimeAudioSender(gateway: self.gateway, relaySessionId: relaySessionId)
-            let eventStream = await self.gateway.subscribeServerEvents(bufferingNewest: 200)
-            self.startEventPump(stream: eventStream)
            self.configureAudioContract(result.audio)
            try self.startMicrophonePump()
-            self.onStatus("Listening (Realtime)")
+            self.onStatus("Waiting for realtime…")
+            await self.drainPendingPreRelayEvents()
+            switch await self.waitForStartupResult(timeoutSeconds: Self.startupReadyTimeoutSeconds) {
+            case .ready:
+                return
+            case let .failed(issue):
+                self.close(sendClose: true)
+                throw NSError(domain: "RealtimeTalkRelay", code: 6, userInfo: [
+                    NSLocalizedDescriptionKey: issue.displayMessage,
+                ])
+            case .cancelled:
+                return
+            }
        } catch {
            let createdRelaySessionId = self.relaySessionId
            self.close(sendClose: false)
@@ -196,6 +228,7 @@ final class RealtimeTalkRelaySession {
    private func close(sendClose: Bool) {
        guard !self.isClosed else { return }
        self.isClosed = true
+        self.finishStartupWait(.cancelled)
        self.stopMicrophonePump()
        self.eventTask?.cancel()
        self.eventTask = nil
@@ -299,14 +332,21 @@ final class RealtimeTalkRelaySession {
        guard event.event == "talk.event",
              let payload = event.payload?.dictionaryValue
        else { return }
-        if let relaySessionId,
-           payload["relaySessionId"]?.stringValue != relaySessionId
-        {
+        guard let relaySessionId else {
+            self.pendingPreRelayEvents.append(event)
+            if self.pendingPreRelayEvents.count > 200 {
+                self.pendingPreRelayEvents.removeFirst(self.pendingPreRelayEvents.count - 200)
+            }
+            return
+        }
+        if payload["relaySessionId"]?.stringValue != relaySessionId {
            return
        }
        guard let type = payload["type"]?.stringValue else { return }
        switch type {
        case "ready":
+            self.hasReceivedReady = true
+            self.finishStartupWait(.ready)
            self.onStatus("Listening (Realtime)")
        case "audio":
            guard let base64 = payload["audioBase64"]?.stringValue,
@@ -331,17 +371,107 @@ final class RealtimeTalkRelaySession {
            await self.handleToolCall(payload)
        case "error":
            let message = payload["message"]?.stringValue ?? "Realtime failed"
+            let issue = Self.issue(
+                payload: payload,
+                fallbackMessage: message,
+                fallbackProvider: self.options.provider,
+                fallbackModel: self.options.model)
            GatewayDiagnostics.log("talk realtime: error=\(Self.safeLogMessage(message))")
+            self.hasReceivedFailure = true
+            self.startupIssue = issue
+            self.onIssue(issue)
+            self.finishStartupWait(.failed(issue))
            self.onStatus(message)
        case "close":
            GatewayDiagnostics.log("talk realtime: close")
-            self.onStatus("Ready")
+            if self.hasReceivedReady {
+                self.onStatus("Ready")
+            } else if !self.hasReceivedFailure {
+                let issue = TalkRuntimeIssue(
+                    code: .realtimeUnavailable,
+                    message: "Realtime closed before it became ready.",
+                    provider: self.options.provider,
+                    model: self.options.model,
+                    transport: "gateway-relay",
+                    phase: "connect")
+                self.onIssue(issue)
+                self.startupIssue = issue
+                self.finishStartupWait(.failed(issue))
+                self.onStatus("Realtime failed before connecting")
+            }
            self.close(sendClose: false)
        default:
            return
        }
    }

+    private func waitForStartupResult(timeoutSeconds: Int) async -> StartupWaitResult {
+        if self.isClosed { return .cancelled }
+        if self.hasReceivedReady { return .ready }
+        if let startupIssue { return .failed(startupIssue) }
+        return await withCheckedContinuation { continuation in
+            if self.isClosed {
+                continuation.resume(returning: .cancelled)
+                return
+            }
+            self.startupWaiter = continuation
+            Task { [weak self] in
+                try? await Task.sleep(nanoseconds: UInt64(max(0, timeoutSeconds)) * 1_000_000_000)
+                await self?.timeoutStartupWaiterIfNeeded()
+            }
+        }
+    }
+
+    private func drainPendingPreRelayEvents() async {
+        let pendingEvents = self.pendingPreRelayEvents
+        self.pendingPreRelayEvents.removeAll()
+        for event in pendingEvents {
+            await self.handleGatewayEvent(event)
+        }
+    }
+
+    private func finishStartupWait(_ result: StartupWaitResult) {
+        guard let waiter = self.startupWaiter else { return }
+        self.startupWaiter = nil
+        waiter.resume(returning: result)
+    }
+
+    private func timeoutStartupWaiterIfNeeded() {
+        guard !self.isClosed, self.startupWaiter != nil, !self.hasReceivedReady, self.startupIssue == nil else {
+            return
+        }
+        let issue = TalkRuntimeIssue(
+            code: .realtimeUnavailable,
+            message: "Realtime did not become ready in time.",
+            provider: self.options.provider,
+            model: self.options.model,
+            transport: "gateway-relay",
+            phase: "connect")
+        self.hasReceivedFailure = true
+        self.startupIssue = issue
+        self.onIssue(issue)
+        self.onStatus(issue.displayMessage)
+        self.finishStartupWait(.failed(issue))
+    }
+
+    private static func issue(
+        payload: [String: AnyCodable],
+        fallbackMessage: String,
+        fallbackProvider: String?,
+        fallbackModel: String?) -> TalkRuntimeIssue
+    {
+        let provider = payload["provider"]?.stringValue ?? fallbackProvider
+        let model = payload["model"]?.stringValue ?? fallbackModel
+        let transport = payload["transport"]?.stringValue ?? "gateway-relay"
+        let phase = payload["phase"]?.stringValue
+        return TalkRuntimeIssue.realtimeUnavailable(
+            message: fallbackMessage,
+            provider: provider,
+            model: model,
+            transport: transport,
+            phase: phase)
+    }
+
    private func recordOutputAudioChunk(byteCount: Int) {
        self.outputAudioChunkCount += 1
        self.outputAudioByteCount += byteCount
@@ -804,6 +934,25 @@ final class RealtimeTalkRelaySession {
 }

 extension RealtimeTalkRelaySession {
+    func _test_setRelaySessionId(_ relaySessionId: String) {
+        self.relaySessionId = relaySessionId
+    }
+
+    func _test_handleGatewayEvent(_ event: EventFrame) async {
+        await self.handleGatewayEvent(event)
+    }
+
+    func _test_waitForStartupCancelled(timeoutSeconds: Int) async -> Bool {
+        if case .cancelled = await self.waitForStartupResult(timeoutSeconds: timeoutSeconds) {
+            return true
+        }
+        return false
+    }
+
+    func _test_startupReadyTimeoutSeconds() -> Int {
+        Self.startupReadyTimeoutSeconds
+    }
+
    func _test_markOutputAudioStarted(nowMs: Double) {
        self.markOutputAudioStarted(byteCount: 4800, nowMs: nowMs)
    }
--- a/apps/ios/Sources/Voice/TalkModeGatewayConfig.swift
+++ b/apps/ios/Sources/Voice/TalkModeGatewayConfig.swift
@@ -7,6 +7,96 @@ enum TalkModeExecutionMode {
    case realtimeRelay
 }

+struct TalkRuntimeIssue: Equatable {
+    enum Code: String {
+        case realtimeUnavailable = "realtime_unavailable"
+    }
+
+    let code: Code
+    let message: String
+    let provider: String?
+    let model: String?
+    let transport: String?
+    let phase: String?
+    let occurredAt: Date
+
+    init(
+        code: Code,
+        message: String,
+        provider: String? = nil,
+        model: String? = nil,
+        transport: String? = nil,
+        phase: String? = nil,
+        occurredAt: Date = Date())
+    {
+        self.code = code
+        self.message = message.trimmingCharacters(in: .whitespacesAndNewlines)
+        self.provider = provider?.trimmingCharacters(in: .whitespacesAndNewlines)
+        self.model = model?.trimmingCharacters(in: .whitespacesAndNewlines)
+        self.transport = transport?.trimmingCharacters(in: .whitespacesAndNewlines)
+        self.phase = phase?.trimmingCharacters(in: .whitespacesAndNewlines)
+        self.occurredAt = occurredAt
+    }
+
+    var displayMessage: String {
+        if !self.message.isEmpty { return self.message }
+        return "Realtime voice did not start."
+    }
+
+    var fallbackStatusText: String {
+        "Listening (iOS Speech fallback)"
+    }
+
+    var fallbackBannerTitle: String {
+        "Using iOS Speech fallback"
+    }
+
+    var fallbackBannerOwnerLabel: String {
+        "Fallback active"
+    }
+
+    var fallbackBannerMessage: String {
+        "Realtime voice did not start. Talk is running with iOS speech recognition and TTS."
+    }
+
+    var technicalDetails: String {
+        var lines = [
+            "code: \(self.code.rawValue)",
+            "message: \(self.displayMessage)",
+        ]
+        if let provider, !provider.isEmpty { lines.append("provider: \(provider)") }
+        if let model, !model.isEmpty { lines.append("model: \(model)") }
+        if let transport, !transport.isEmpty { lines.append("transport: \(transport)") }
+        if let phase, !phase.isEmpty { lines.append("phase: \(phase)") }
+        return lines.joined(separator: "\n")
+    }
+
+    var diagnosticSummary: String {
+        var parts = [self.displayMessage]
+        if let provider, !provider.isEmpty { parts.append("provider: \(provider)") }
+        if let model, !model.isEmpty { parts.append("model: \(model)") }
+        if let transport, !transport.isEmpty { parts.append("transport: \(transport)") }
+        if let phase, !phase.isEmpty { parts.append("phase: \(phase)") }
+        return parts.joined(separator: " • ")
+    }
+
+    static func realtimeUnavailable(
+        message: String,
+        provider: String? = nil,
+        model: String? = nil,
+        transport: String? = nil,
+        phase: String? = nil) -> TalkRuntimeIssue
+    {
+        TalkRuntimeIssue(
+            code: .realtimeUnavailable,
+            message: message,
+            provider: provider,
+            model: model,
+            transport: transport,
+            phase: phase)
+    }
+}
+
 struct TalkVoiceModeDescriptor: Equatable {
    let title: String
    let subtitle: String?
--- a/apps/ios/Sources/Voice/TalkModeManager.swift
+++ b/apps/ios/Sources/Voice/TalkModeManager.swift
@@ -60,6 +60,10 @@ final class TalkModeManager: NSObject {
    var gatewayTalkVoiceModeTitle: String = "Not loaded"
    var gatewayTalkVoiceModeSubtitle: String?
    var gatewayTalkVoiceModeAccessibilityValue: String = "Not loaded"
+    var gatewayTalkActiveModeTitle: String = "Not active"
+    var gatewayTalkActiveModeSubtitle: String?
+    var gatewayTalkLastIssueText: String?
+    var gatewayTalkCurrentFallbackIssue: TalkRuntimeIssue?
    var gatewayTalkPermissionState: TalkGatewayPermissionState = .unknown

    var isGatewayConnected: Bool {
@@ -77,6 +81,12 @@ final class TalkModeManager: NSObject {
        case pushToTalk
    }

+    private enum RealtimeStartResult {
+        case started
+        case unavailable(TalkRuntimeIssue)
+        case ignored
+    }
+
    private var isStarting = false
    private var startAttemptID = 0
    private var captureMode: CaptureMode = .idle
@@ -129,6 +139,8 @@ final class TalkModeManager: NSObject {
        voiceId: nil,
        transport: nil,
        isRealtime: false)
+    private var pendingRealtimeIssue: TalkRuntimeIssue?
+    private var realtimeRelayStartIssue: TalkRuntimeIssue?
    private var apiKey: String?
    private var voiceAliases: [String: String] = [:]
    private var interruptOnSpeech: Bool = true
@@ -192,6 +204,8 @@ final class TalkModeManager: NSObject {
            }
        } else {
            self.stopRealtimeSession()
+            self.gatewayTalkActiveModeTitle = "Not active"
+            self.gatewayTalkActiveModeSubtitle = nil
            if self.isEnabled, !self.isSpeaking {
                self.statusText = "Offline"
            }
@@ -299,11 +313,15 @@ final class TalkModeManager: NSObject {
            return
        }
        if self.realtimeWebRTCEnabled {
-            let started = self.executionMode == .realtimeRelay
+            let realtimeStart = self.executionMode == .realtimeRelay
                ? await self.startRealtimeRelayIfAvailable()
                : await self.startRealtimeIfAvailable()
-            if started {
+            switch realtimeStart {
+            case .started, .ignored:
                return
+            case let .unavailable(issue):
+                self.pendingRealtimeIssue = issue
+                self.gatewayTalkLastIssueText = issue.diagnosticSummary
            }
        }

@@ -324,7 +342,11 @@ final class TalkModeManager: NSObject {
            self.captureMode = .continuous
            try self.startRecognition()
            self.isListening = true
-            self.statusText = "Listening"
+            if let issue = self.pendingRealtimeIssue {
+                self.markNativeFallbackActive(after: issue)
+            } else {
+                self.markNativeTalkActive()
+            }
            self.startSilenceMonitor()
            await self.subscribeChatIfNeeded(sessionKey: self.mainSessionKey)
            self.logger.info("listening")
@@ -379,6 +401,11 @@ final class TalkModeManager: NSObject {
        self.isPushToTalkActive = false
        self.captureMode = .idle
        self.statusText = "Off"
+        self.pendingRealtimeIssue = nil
+        self.gatewayTalkCurrentFallbackIssue = nil
+        self.gatewayTalkActiveModeTitle = "Not active"
+        self.gatewayTalkActiveModeSubtitle = nil
+        self.gatewayTalkLastIssueText = nil
        self.lastTranscript = ""
        self.lastHeard = nil
        self.silenceTask?.cancel()
@@ -425,6 +452,8 @@ final class TalkModeManager: NSObject {
        self.isPushToTalkActive = false
        self.captureMode = .idle
        self.statusText = "Paused"
+        self.gatewayTalkActiveModeTitle = "Paused"
+        self.gatewayTalkActiveModeSubtitle = nil
        self.lastTranscript = ""
        self.lastHeard = nil
        self.silenceTask?.cancel()
@@ -1047,8 +1076,10 @@ final class TalkModeManager: NSObject {
        }
    }

-    private func startRealtimeIfAvailable() async -> Bool {
-        guard let gateway else { return false }
+    private func startRealtimeIfAvailable() async -> RealtimeStartResult {
+        guard let gateway else {
+            return .unavailable(self.realtimeIssue(message: "Gateway not connected", phase: "start"))
+        }
        let startedAt = Self.nowSeconds()
        if self.prefetchedRealtimeSession == nil, let prefetchTask = self.realtimePrefetchTask {
            GatewayDiagnostics.log("talk.timeline realtime awaiting in-flight prefetch")
@@ -1069,49 +1100,53 @@ final class TalkModeManager: NSObject {
                prefetchedSession: prefetchedSession)
            guard self.realtimeSession === session, self.isEnabled else {
                session.stop()
-                return true
+                return .ignored
            }
            self.isListening = true
            self.captureMode = .continuous
-            self.statusText = "Listening"
+            self.markRealtimeActive()
            GatewayDiagnostics.log(
                "talk.timeline realtime start ready elapsedMs=\(Self.elapsedMs(since: startedAt))")
            GatewayDiagnostics.log("talk realtime: started direct OpenAI WebRTC session")
-            return true
+            return .started
        } catch {
            guard self.realtimeSession === session, self.isEnabled else {
                session.stop()
-                return true
+                return .ignored
            }
            self.stopRealtimeSession()
+            let issue = self.realtimeIssue(from: error, phase: "start")
            GatewayDiagnostics
                .log("talk realtime: unavailable; falling back to speech pipeline error=\(error.localizedDescription)")
            GatewayDiagnostics.log(
                "talk.timeline realtime start failed elapsedMs=\(Self.elapsedMs(since: startedAt)) "
                    + "error=\(error.localizedDescription)")
-            return false
+            return .unavailable(issue)
        }
    }

-    private func startRealtimeRelayIfAvailable() async -> Bool {
-        guard let gateway else { return false }
+    private func startRealtimeRelayIfAvailable() async -> RealtimeStartResult {
+        guard let gateway else {
+            return .unavailable(self.realtimeIssue(message: "Gateway not connected", phase: "start"))
+        }
        guard self.foregroundAudioCaptureAllowed else {
            self.statusText = "Paused"
            GatewayDiagnostics.log("talk realtime ignored: app backgrounded")
-            return true
+            return .ignored
        }
        if self.realtimeRelaySession != nil {
            self.captureMode = .continuous
            self.isListening = true
            GatewayDiagnostics.log("talk realtime ignored: already active")
-            return true
+            return .started
        }
        guard !self.realtimeRelayStartInFlight else {
            GatewayDiagnostics.log("talk realtime ignored: already starting")
-            return true
+            return .ignored
        }
        self.realtimeRelayStartInFlight = true
        defer { self.realtimeRelayStartInFlight = false }
+        self.prepareRealtimeRelayStart()
        GatewayDiagnostics.log("talk.timeline realtime relay start attempt sessionKey=\(self.mainSessionKey)")
        let startedAt = Self.nowSeconds()
        let relaySession = RealtimeTalkRelaySession(
@@ -1124,13 +1159,15 @@ final class TalkModeManager: NSObject {
            pcmPlayer: self.pcmPlayer,
            onStatus: { [weak self] status in
                guard let self else { return }
-                self.statusText = status
-                self.isListening = status.localizedCaseInsensitiveContains("listening")
-                if status.localizedCaseInsensitiveContains("thinking") {
-                    self.isListening = false
-                    self.isSpeaking = false
-                    self.isUserSpeechDetected = false
-                }
+                self.handleRealtimeRelayStatus(status)
+            },
+            onIssue: { [weak self] issue in
+                guard let self else { return }
+                self.realtimeRelayStartIssue = issue
+                self.pendingRealtimeIssue = issue
+                self.gatewayTalkLastIssueText = issue.diagnosticSummary
+                self.gatewayTalkActiveModeTitle = "Realtime unavailable"
+                self.gatewayTalkActiveModeSubtitle = issue.displayMessage
            },
            onSpeakingChanged: { [weak self] speaking in
                guard let self else { return }
@@ -1145,23 +1182,35 @@ final class TalkModeManager: NSObject {
            try await relaySession.start()
            guard self.realtimeRelaySession === relaySession, self.isEnabled else {
                relaySession.stop()
-                return true
+                return .ignored
+            }
+            if let issue = self.realtimeRelayStartIssue {
+                self.realtimeRelaySession = nil
+                relaySession.stop()
+                GatewayDiagnostics.log(
+                    "talk.timeline realtime relay start unavailable elapsedMs=\(Self.elapsedMs(since: startedAt)) "
+                        + "issue=\(issue.code.rawValue)")
+                return .unavailable(issue)
            }
            self.isListening = true
            self.captureMode = .continuous
+            self.realtimeRelayStartIssue = nil
            GatewayDiagnostics.log(
                "talk.timeline realtime relay start ready elapsedMs=\(Self.elapsedMs(since: startedAt))")
-            return true
+            return .started
        } catch {
            guard self.realtimeRelaySession === relaySession, self.isEnabled else {
                relaySession.stop()
-                return true
+                return .ignored
            }
            self.realtimeRelaySession = nil
+            let issue = self.realtimeRelayStartIssue
+                ?? self.realtimeIssue(from: error, phase: "start")
+            self.realtimeRelayStartIssue = nil
            GatewayDiagnostics.log(
                "talk.timeline realtime relay start failed elapsedMs=\(Self.elapsedMs(since: startedAt)) "
                    + "error=\(error.localizedDescription)")
-            return false
+            return .unavailable(issue)
        }
    }

@@ -2363,6 +2412,103 @@ extension TalkModeManager {
        self.gatewayTalkVoiceModeAccessibilityValue = descriptor.accessibilityValue
    }

+    private func markRealtimeActive() {
+        self.pendingRealtimeIssue = nil
+        self.gatewayTalkCurrentFallbackIssue = nil
+        self.gatewayTalkLastIssueText = nil
+        self.gatewayTalkActiveModeTitle = self.configuredVoiceModeDescriptor.title
+        self.gatewayTalkActiveModeSubtitle = self.configuredVoiceModeDescriptor.subtitle
+        self.statusText = "Listening (Realtime)"
+    }
+
+    private func handleRealtimeRelayStatus(_ status: String) {
+        if status == "Listening (Realtime)" {
+            self.markRealtimeActive()
+        } else {
+            self.statusText = status
+            if status == "Ready" {
+                self.realtimeRelaySession = nil
+                self.gatewayTalkActiveModeTitle = "Not active"
+                self.gatewayTalkActiveModeSubtitle = nil
+                self.isListening = false
+                self.isSpeaking = false
+                self.isUserSpeechDetected = false
+            }
+        }
+        self.isListening = status.localizedCaseInsensitiveContains("listening")
+        if status.localizedCaseInsensitiveContains("thinking") {
+            self.isListening = false
+            self.isSpeaking = false
+            self.isUserSpeechDetected = false
+        }
+    }
+
+    private func prepareRealtimeRelayStart() {
+        self.realtimeRelayStartIssue = nil
+        self.pendingRealtimeIssue = nil
+        self.gatewayTalkCurrentFallbackIssue = nil
+    }
+
+    private func markNativeTalkActive() {
+        self.pendingRealtimeIssue = nil
+        self.gatewayTalkCurrentFallbackIssue = nil
+        self.gatewayTalkActiveModeTitle = "iOS Speech + TTS"
+        self.gatewayTalkActiveModeSubtitle = nil
+        self.statusText = "Listening"
+    }
+
+    private func markNativeFallbackActive(after issue: TalkRuntimeIssue) {
+        self.gatewayTalkActiveModeTitle = "iOS Speech fallback"
+        self.gatewayTalkActiveModeSubtitle = issue.displayMessage
+        self.gatewayTalkCurrentFallbackIssue = issue
+        self.gatewayTalkLastIssueText = issue.diagnosticSummary
+        self.statusText = issue.fallbackStatusText
+    }
+
+    private func realtimeIssue(message: String, phase: String) -> TalkRuntimeIssue {
+        TalkRuntimeIssue.realtimeUnavailable(
+            message: message,
+            provider: self.realtimeProvider,
+            model: self.realtimeModelId,
+            transport: self.executionMode == .realtimeRelay ? "gateway-relay" : "webrtc",
+            phase: phase)
+    }
+
+    private func realtimeIssue(from error: Error, phase: String) -> TalkRuntimeIssue {
+        if let gatewayError = error as? GatewayResponseError,
+           let issue = Self.talkRuntimeIssue(
+               from: gatewayError,
+               fallbackProvider: self.realtimeProvider,
+               fallbackModel: self.realtimeModelId,
+               fallbackTransport: self.executionMode == .realtimeRelay ? "gateway-relay" : "webrtc",
+               fallbackPhase: phase)
+        {
+            return issue
+        }
+        return self.realtimeIssue(message: error.localizedDescription, phase: phase)
+    }
+
+    private static func talkRuntimeIssue(
+        from gatewayError: GatewayResponseError,
+        fallbackProvider: String?,
+        fallbackModel: String?,
+        fallbackTransport: String?,
+        fallbackPhase: String) -> TalkRuntimeIssue?
+    {
+        guard let rawIssue = gatewayError.details["talkIssue"]?.dictionaryValue else { return nil }
+        let message = rawIssue["message"]?.stringValue ?? gatewayError.message
+        let provider = rawIssue["provider"]?.stringValue ?? fallbackProvider
+        let model = rawIssue["model"]?.stringValue ?? fallbackModel
+        let transport = rawIssue["transport"]?.stringValue ?? fallbackTransport
+        let phase = rawIssue["phase"]?.stringValue ?? fallbackPhase
+        return TalkRuntimeIssue.realtimeUnavailable(
+            message: message,
+            provider: provider,
+            model: model,
+            transport: transport,
+            phase: phase)
+    }
+
    private func restoreConfiguredVoiceModeDescriptor() {
        self.applyVoiceModeDescriptor(self.configuredVoiceModeDescriptor)
    }
@@ -2836,7 +2982,11 @@ extension TalkModeManager: TalkRealtimeWebRTCSessionDelegate {
    func realtimeSession(_ session: TalkRealtimeWebRTCSession, didChangeStatus status: String) {
        guard session === self.realtimeSession else { return }
        GatewayDiagnostics.log("talk.timeline realtime status=\(status)")
-        self.statusText = status
+        if status == "Listening" {
+            self.markRealtimeActive()
+        } else {
+            self.statusText = status
+        }
        self.isListening = status == "Listening"
        self.isSpeaking = status == "Speaking"
        if status == "Thinking" {
@@ -2877,6 +3027,8 @@ extension TalkModeManager: TalkRealtimeWebRTCSessionDelegate {
        self.isListening = false
        self.isSpeaking = false
        self.isUserSpeechDetected = false
+        self.gatewayTalkActiveModeTitle = "Not active"
+        self.gatewayTalkActiveModeSubtitle = nil
        if self.isEnabled {
            self.statusText = self.gatewayConnected ? "Ready" : "Offline"
        }
@@ -2909,6 +3061,49 @@ extension TalkModeManager {
        self.gatewayTalkUsesRealtimeRelay
    }

+    func _test_markNativeFallbackActive(after issue: TalkRuntimeIssue) {
+        self.markNativeFallbackActive(after: issue)
+    }
+
+    func _test_recordRealtimeIssue(_ issue: TalkRuntimeIssue) {
+        self.pendingRealtimeIssue = issue
+        self.gatewayTalkLastIssueText = issue.diagnosticSummary
+        self.gatewayTalkActiveModeTitle = "Realtime unavailable"
+        self.gatewayTalkActiveModeSubtitle = issue.displayMessage
+    }
+
+    func _test_handleRealtimeRelayStatus(_ status: String) {
+        self.handleRealtimeRelayStatus(status)
+    }
+
+    func _test_prepareRealtimeRelayStart() {
+        self.prepareRealtimeRelayStart()
+    }
+
+    func _test_realtimeIssue(from error: Error, phase: String) -> TalkRuntimeIssue {
+        self.realtimeIssue(from: error, phase: phase)
+    }
+
+    func _test_hasPendingRealtimeIssue() -> Bool {
+        self.pendingRealtimeIssue != nil
+    }
+
+    func _test_gatewayTalkActiveModeTitle() -> String {
+        self.gatewayTalkActiveModeTitle
+    }
+
+    func _test_gatewayTalkActiveModeSubtitle() -> String? {
+        self.gatewayTalkActiveModeSubtitle
+    }
+
+    func _test_gatewayTalkLastIssueText() -> String? {
+        self.gatewayTalkLastIssueText
+    }
+
+    func _test_gatewayTalkCurrentFallbackIssue() -> TalkRuntimeIssue? {
+        self.gatewayTalkCurrentFallbackIssue
+    }
+
    func _test_seedTranscript(_ transcript: String) {
        self.lastTranscript = transcript
        self.lastHeard = Date()
--- a/apps/ios/SwiftSources.input.xcfilelist
+++ b/apps/ios/SwiftSources.input.xcfilelist
@@ -21,6 +21,7 @@ Sources/Design/SettingsProTab.swift
 Sources/Design/SettingsProTabSupport.swift
 Sources/Design/SettingsProTabSections.swift
 Sources/Design/SettingsProTabActions.swift
+Sources/Design/TalkRuntimeIssueBanner.swift
 Sources/Design/CommandCenterSupport.swift
 Sources/Design/AgentProTab+Overview.swift
 Sources/Design/AgentProTab+Destinations.swift
--- a/apps/ios/Tests/RealtimeTalkRelaySessionTests.swift
+++ b/apps/ios/Tests/RealtimeTalkRelaySessionTests.swift
@@ -1,5 +1,6 @@
 import Foundation
 import OpenClawKit
+import OpenClawProtocol
 import Testing
@testable import OpenClaw

@@ -37,4 +38,70 @@ private final class UnusedPCMStreamingAudioPlayer: PCMStreamingAudioPlaying {
        session._test_markOutputAudioStarted(nowMs: 500)
        #expect(session._test_outputStartedAtMs() == 500)
    }
+
+    @Test func closeAfterClassifiedErrorDoesNotReplaceIssue() async {
+        var issues: [TalkRuntimeIssue] = []
+        var statuses: [String] = []
+        let session = RealtimeTalkRelaySession(
+            gateway: GatewayNodeSession(),
+            options: .init(sessionKey: "main", provider: "openai", model: "gpt-realtime-2", voice: nil),
+            pcmPlayer: UnusedPCMStreamingAudioPlayer(),
+            onStatus: { statuses.append($0) },
+            onIssue: { issues.append($0) },
+            onSpeakingChanged: { _ in })
+        session._test_setRelaySessionId("relay-1")
+
+        await session._test_handleGatewayEvent(EventFrame(
+            type: "event",
+            event: "talk.event",
+            payload: AnyCodable([
+                "relaySessionId": "relay-1",
+                "type": "error",
+                "message": "OpenAI API key rejected with 401",
+                "code": "realtime_unavailable",
+                "provider": "openai",
+                "model": "gpt-realtime-2",
+                "transport": "gateway-relay",
+                "phase": "connect",
+            ]),
+            seq: nil,
+            stateversion: nil))
+        await session._test_handleGatewayEvent(EventFrame(
+            type: "event",
+            event: "talk.event",
+            payload: AnyCodable([
+                "relaySessionId": "relay-1",
+                "type": "close",
+                "reason": "error",
+            ]),
+            seq: nil,
+            stateversion: nil))
+
+        #expect(issues.map(\.code) == [.realtimeUnavailable])
+        #expect(statuses == ["OpenAI API key rejected with 401"])
+    }
+
+    @Test func closedRelayDoesNotWaitForStartupReady() async {
+        let session = RealtimeTalkRelaySession(
+            gateway: GatewayNodeSession(),
+            options: .init(sessionKey: "main", provider: "openai", model: "gpt-realtime-2", voice: nil),
+            pcmPlayer: UnusedPCMStreamingAudioPlayer(),
+            onStatus: { _ in },
+            onSpeakingChanged: { _ in })
+
+        session.stop()
+
+        #expect(await session._test_waitForStartupCancelled(timeoutSeconds: 1))
+    }
+
+    @Test func startupReadyWaitCoversGatewayConnectBudget() {
+        let session = RealtimeTalkRelaySession(
+            gateway: GatewayNodeSession(),
+            options: .init(sessionKey: "main", provider: "openai", model: "gpt-realtime-2", voice: nil),
+            pcmPlayer: UnusedPCMStreamingAudioPlayer(),
+            onStatus: { _ in },
+            onSpeakingChanged: { _ in })
+
+        #expect(session._test_startupReadyTimeoutSeconds() >= 12)
+    }
 }
--- a/apps/ios/Tests/TalkModeConfigParsingTests.swift
+++ b/apps/ios/Tests/TalkModeConfigParsingTests.swift
@@ -1,4 +1,5 @@
 import Foundation
+import OpenClawKit
 import Testing
@testable import OpenClaw

@@ -183,6 +184,132 @@ import Testing
        #expect(manager._test_gatewayTalkUsesRealtimeRelay())
    }

+    @Test func buildsGenericRealtimeFallbackIssueForDisplay() {
+        let issue = TalkRuntimeIssue.realtimeUnavailable(
+            message: "OpenAI API key rejected with 401",
+            provider: "openai",
+            model: "gpt-realtime-2",
+            transport: "gateway-relay",
+            phase: "start")
+
+        #expect(issue.code == .realtimeUnavailable)
+        #expect(issue.displayMessage == "OpenAI API key rejected with 401")
+        #expect(issue.diagnosticSummary.contains("provider: openai"))
+        #expect(issue.diagnosticSummary.contains("model: gpt-realtime-2"))
+        #expect(issue.fallbackStatusText == "Listening (iOS Speech fallback)")
+        #expect(issue.fallbackBannerTitle == "Using iOS Speech fallback")
+        #expect(issue.fallbackBannerOwnerLabel == "Fallback active")
+        #expect(issue
+            .fallbackBannerMessage ==
+            "Realtime voice did not start. Talk is running with iOS speech recognition and TTS.")
+        #expect(issue.technicalDetails.contains("code: realtime_unavailable"))
+    }
+
+    @Test func nativeFallbackKeepsRealtimeIssueVisible() {
+        let manager = TalkModeManager(allowSimulatorCapture: true)
+        let issue = TalkRuntimeIssue(
+            code: .realtimeUnavailable,
+            message: "Realtime closed before it became ready.",
+            provider: "openai",
+            model: "gpt-realtime-2",
+            transport: "gateway-relay",
+            phase: "connect")
+
+        manager._test_markNativeFallbackActive(after: issue)
+
+        #expect(manager.statusText == "Listening (iOS Speech fallback)")
+        #expect(manager._test_gatewayTalkActiveModeTitle() == "iOS Speech fallback")
+        #expect(manager._test_gatewayTalkActiveModeSubtitle() == "Realtime closed before it became ready.")
+        #expect(manager._test_gatewayTalkLastIssueText()?.contains("phase: connect") == true)
+        #expect(manager._test_gatewayTalkCurrentFallbackIssue() == issue)
+    }
+
+    @Test func gatewayTalkIssueDetailsDriveRealtimeFailureDisplay() {
+        let manager = TalkModeManager(allowSimulatorCapture: true)
+        let error = GatewayResponseError(
+            method: "talk.session.create",
+            code: "UNAVAILABLE",
+            message: "Error: OpenAI API key rejected with 401",
+            details: [
+                "talkIssue": AnyCodable([
+                    "code": "realtime_unavailable",
+                    "message": "OpenAI API key rejected with 401",
+                    "provider": "openai",
+                    "model": "gpt-realtime-2",
+                    "transport": "gateway-relay",
+                    "phase": "request",
+                ]),
+            ])
+
+        let issue = manager._test_realtimeIssue(from: error, phase: "start")
+
+        #expect(issue.code == .realtimeUnavailable)
+        #expect(issue.displayMessage == "OpenAI API key rejected with 401")
+        #expect(issue.provider == "openai")
+        #expect(issue.model == "gpt-realtime-2")
+        #expect(issue.transport == "gateway-relay")
+        #expect(issue.phase == "request")
+    }
+
+    @Test func relayStartupIssueSurvivesUntilReadyStatus() {
+        let manager = TalkModeManager(allowSimulatorCapture: true)
+        let issue = TalkRuntimeIssue(
+            code: .realtimeUnavailable,
+            message: "OpenAI API key rejected with 401",
+            provider: "openai",
+            model: "gpt-realtime-2",
+            transport: "gateway-relay",
+            phase: "connect")
+
+        manager._test_recordRealtimeIssue(issue)
+        manager._test_handleRealtimeRelayStatus("Connecting realtime…")
+
+        #expect(manager._test_gatewayTalkActiveModeTitle() == "Realtime unavailable")
+        #expect(manager._test_gatewayTalkLastIssueText()?.contains("OpenAI API key rejected") == true)
+
+        manager._test_handleRealtimeRelayStatus("Listening (Realtime)")
+
+        #expect(manager.statusText == "Listening (Realtime)")
+        #expect(manager._test_gatewayTalkLastIssueText() == nil)
+        #expect(manager._test_gatewayTalkCurrentFallbackIssue() == nil)
+    }
+
+    @Test func relayCloseClearsActiveRealtimeMode() {
+        let manager = TalkModeManager(allowSimulatorCapture: true)
+
+        manager._test_handleRealtimeRelayStatus("Listening (Realtime)")
+        #expect(manager.statusText == "Listening (Realtime)")
+        #expect(manager._test_gatewayTalkActiveModeTitle() != "Not active")
+
+        manager._test_handleRealtimeRelayStatus("Ready")
+
+        #expect(manager.statusText == "Ready")
+        #expect(manager._test_gatewayTalkActiveModeTitle() == "Not active")
+        #expect(manager._test_gatewayTalkActiveModeSubtitle() == nil)
+    }
+
+    @Test func relayRetryClearsStaleFallbackTriggerButKeepsLastIssueVisible() {
+        let manager = TalkModeManager(allowSimulatorCapture: true)
+        let issue = TalkRuntimeIssue(
+            code: .realtimeUnavailable,
+            message: "Realtime closed before it became ready.",
+            provider: "openai",
+            model: "gpt-realtime-2",
+            transport: "gateway-relay",
+            phase: "connect")
+
+        manager._test_recordRealtimeIssue(issue)
+        manager._test_markNativeFallbackActive(after: issue)
+        #expect(manager._test_hasPendingRealtimeIssue())
+        #expect(manager._test_gatewayTalkCurrentFallbackIssue() == issue)
+
+        manager._test_prepareRealtimeRelayStart()
+
+        #expect(!manager._test_hasPendingRealtimeIssue())
+        #expect(manager._test_gatewayTalkCurrentFallbackIssue() == nil)
+        #expect(manager._test_gatewayTalkLastIssueText()?.contains("Realtime closed before") == true)
+    }
+
    @Test func mapsWebRTCRealtimeTransportToGatewayRelayOnIOS() {
        let config: [String: Any] = [
            "talk": [
--- a/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift
+++ b/apps/shared/OpenClawKit/Sources/OpenClawProtocol/GatewayModels.swift
@@ -765,6 +765,7 @@ public struct AgentParams: Codable, Sendable {
    public let bootstrapcontextrunkind: AnyCodable?
    public let acpturnsource: String?
    public let internalruntimehandoffid: String?
+    public let execapprovalfollowupexpectedsessionid: String?
    public let internalevents: [[String: AnyCodable]]?
    public let inputprovenance: [String: AnyCodable]?
    public let suppresspromptpersistence: Bool?
@@ -806,6 +807,7 @@ public struct AgentParams: Codable, Sendable {
        bootstrapcontextrunkind: AnyCodable?,
        acpturnsource: String?,
        internalruntimehandoffid: String?,
+        execapprovalfollowupexpectedsessionid: String?,
        internalevents: [[String: AnyCodable]]?,
        inputprovenance: [String: AnyCodable]?,
        suppresspromptpersistence: Bool?,
@@ -846,6 +848,7 @@ public struct AgentParams: Codable, Sendable {
        self.bootstrapcontextrunkind = bootstrapcontextrunkind
        self.acpturnsource = acpturnsource
        self.internalruntimehandoffid = internalruntimehandoffid
+        self.execapprovalfollowupexpectedsessionid = execapprovalfollowupexpectedsessionid
        self.internalevents = internalevents
        self.inputprovenance = inputprovenance
        self.suppresspromptpersistence = suppresspromptpersistence
@@ -888,6 +891,7 @@ public struct AgentParams: Codable, Sendable {
        case bootstrapcontextrunkind = "bootstrapContextRunKind"
        case acpturnsource = "acpTurnSource"
        case internalruntimehandoffid = "internalRuntimeHandoffId"
+        case execapprovalfollowupexpectedsessionid = "execApprovalFollowupExpectedSessionId"
        case internalevents = "internalEvents"
        case inputprovenance = "inputProvenance"
        case suppresspromptpersistence = "suppressPromptPersistence"
--- a/docs/.generated/config-baseline.sha256
+++ b/docs/.generated/config-baseline.sha256
@@ -1,4 +1,4 @@
-a5a97a8b484acd13e68604037c8d8f448699700103c6ea2186f5914ad35a0623  config-baseline.json
-b0d668dbd794d2f54738152a4bcfd2a306c7954901e78d4dfbde7545a8301ce5  config-baseline.core.json
-0637c9bdcb9517f56049dd786563366877458d35df575328a6b80a890c8bc915  config-baseline.channel.json
+37b56008790612b8293930b6a29d74490e98daa90f954fca9d133fcc28645c4c  config-baseline.json
+75b64c2ea081369ba4306493313a8a4cd48b784145f92fed995e6b77a5df350d  config-baseline.core.json
+17d64c9799dfa239a49493413f1100bdd9237e9b67aaeae331a4604dbc227023  config-baseline.channel.json
 f9d1f50bfa8403891e76cd99dc1357cdece4a71e8ae18a39b190c2a14e6f97b0  config-baseline.plugin.json
--- a/docs/.generated/plugin-sdk-api-baseline.sha256
+++ b/docs/.generated/plugin-sdk-api-baseline.sha256
@@ -1,2 +1,2 @@
-fd2aaa281db68de9db32463e87fddecfb84b6db75080d0fe47719b2b9fff3d5c  plugin-sdk-api-baseline.json
-329a8fdad622d2ec801f99939ac6ac08685c3dd89e54aa3c2b4da4ac5580d504  plugin-sdk-api-baseline.jsonl
+de06fd99257e4b010e54578ea46605c3bc631c31cac5f68aaed4e301f924f8af  plugin-sdk-api-baseline.json
+1c7a5420c4bcb1ec08544ff43b83fa4d43f3c0dcda597a5a25aa5f5bab0cb199  plugin-sdk-api-baseline.jsonl
--- a/docs/.i18n/glossary.ar.json
+++ b/docs/.i18n/glossary.ar.json
@@ -35,6 +35,10 @@
    "source": "Heartbeat",
    "target": "Heartbeat"
  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
  {
    "source": "Mintlify",
    "target": "Mintlify"
--- a/docs/.i18n/glossary.de.json
+++ b/docs/.i18n/glossary.de.json
@@ -35,6 +35,10 @@
    "source": "Heartbeat",
    "target": "Heartbeat"
  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
  {
    "source": "Mintlify",
    "target": "Mintlify"
--- a/docs/.i18n/glossary.es.json
+++ b/docs/.i18n/glossary.es.json
@@ -35,6 +35,10 @@
    "source": "Heartbeat",
    "target": "Heartbeat"
  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
  {
    "source": "Mintlify",
    "target": "Mintlify"
--- a/docs/.i18n/glossary.fa.json
+++ b/docs/.i18n/glossary.fa.json
@@ -35,6 +35,10 @@
    "source": "Heartbeat",
    "target": "Heartbeat"
  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
  {
    "source": "Mintlify",
    "target": "Mintlify"
--- a/docs/.i18n/glossary.fr.json
+++ b/docs/.i18n/glossary.fr.json
@@ -35,6 +35,10 @@
    "source": "Heartbeat",
    "target": "Heartbeat"
  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
  {
    "source": "Mintlify",
    "target": "Mintlify"
--- a/docs/.i18n/glossary.id.json
+++ b/docs/.i18n/glossary.id.json
@@ -35,6 +35,10 @@
    "source": "Heartbeat",
    "target": "Heartbeat"
  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
  {
    "source": "Mintlify",
    "target": "Mintlify"
--- a/docs/.i18n/glossary.it.json
+++ b/docs/.i18n/glossary.it.json
@@ -35,6 +35,10 @@
    "source": "Heartbeat",
    "target": "Heartbeat"
  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
  {
    "source": "Mintlify",
    "target": "Mintlify"
--- a/docs/.i18n/glossary.ja-JP.json
+++ b/docs/.i18n/glossary.ja-JP.json
@@ -39,6 +39,10 @@
    "source": "Heartbeat",
    "target": "Heartbeat"
  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
  {
    "source": "local loopback",
    "target": "local loopback"
--- a/docs/.i18n/glossary.ko.json
+++ b/docs/.i18n/glossary.ko.json
@@ -35,6 +35,10 @@
    "source": "Heartbeat",
    "target": "Heartbeat"
  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
  {
    "source": "Mintlify",
    "target": "Mintlify"
--- a/docs/.i18n/glossary.nl.json
+++ b/docs/.i18n/glossary.nl.json
@@ -35,6 +35,10 @@
    "source": "Heartbeat",
    "target": "Heartbeat"
  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
  {
    "source": "Mintlify",
    "target": "Mintlify"
--- a/docs/.i18n/glossary.pl.json
+++ b/docs/.i18n/glossary.pl.json
@@ -35,6 +35,10 @@
    "source": "Heartbeat",
    "target": "Heartbeat"
  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
  {
    "source": "Mintlify",
    "target": "Mintlify"
--- a/docs/.i18n/glossary.pt-BR.json
+++ b/docs/.i18n/glossary.pt-BR.json
@@ -35,6 +35,10 @@
    "source": "Heartbeat",
    "target": "Heartbeat"
  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
  {
    "source": "Mintlify",
    "target": "Mintlify"
--- a/docs/.i18n/glossary.th.json
+++ b/docs/.i18n/glossary.th.json
@@ -35,6 +35,10 @@
    "source": "Heartbeat",
    "target": "Heartbeat"
  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
  {
    "source": "Mintlify",
    "target": "Mintlify"
--- a/docs/.i18n/glossary.tr.json
+++ b/docs/.i18n/glossary.tr.json
@@ -35,6 +35,10 @@
    "source": "Heartbeat",
    "target": "Heartbeat"
  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
  {
    "source": "Mintlify",
    "target": "Mintlify"
--- a/docs/.i18n/glossary.uk.json
+++ b/docs/.i18n/glossary.uk.json
@@ -35,6 +35,10 @@
    "source": "Heartbeat",
    "target": "Heartbeat"
  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
  {
    "source": "Mintlify",
    "target": "Mintlify"
--- a/docs/.i18n/glossary.vi.json
+++ b/docs/.i18n/glossary.vi.json
@@ -35,6 +35,10 @@
    "source": "Heartbeat",
    "target": "Heartbeat"
  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
  {
    "source": "Mintlify",
    "target": "Mintlify"
--- a/docs/.i18n/glossary.zh-CN.json
+++ b/docs/.i18n/glossary.zh-CN.json
--- a/docs/.i18n/glossary.zh-TW.json
+++ b/docs/.i18n/glossary.zh-TW.json
@@ -11,6 +11,10 @@
    "source": "ClawHub",
    "target": "ClawHub"
  },
+  {
+    "source": "ClickClack",
+    "target": "ClickClack"
+  },
  {
    "source": "CLI",
    "target": "命令列介面"
@@ -35,14 +39,38 @@
    "source": "Heartbeat",
    "target": "心跳偵測"
  },
+  {
+    "source": "Feishu",
+    "target": "Feishu"
+  },
+  {
+    "source": "IRC",
+    "target": "IRC"
+  },
+  {
+    "source": "LINE",
+    "target": "LINE"
+  },
+  {
+    "source": "Mattermost",
+    "target": "Mattermost"
+  },
  {
    "source": "Mintlify",
    "target": "Mintlify"
  },
+  {
+    "source": "Nextcloud Talk",
+    "target": "Nextcloud Talk"
+  },
  {
    "source": "Node",
    "target": "節點"
  },
+  {
+    "source": "Nostr",
+    "target": "Nostr"
+  },
  {
    "source": "OpenClaw",
    "target": "OpenClaw"
@@ -55,10 +83,30 @@
    "source": "Plugin",
    "target": "外掛"
  },
+  {
+    "source": "QQ Bot",
+    "target": "QQ Bot"
+  },
+  {
+    "source": "QQBot",
+    "target": "QQ Bot"
+  },
+  {
+    "source": "QQ bot",
+    "target": "QQ Bot"
+  },
+  {
+    "source": "SMS",
+    "target": "SMS"
+  },
  {
    "source": "Skills",
    "target": "Skills"
  },
+  {
+    "source": "Synology Chat",
+    "target": "Synology Chat"
+  },
  {
    "source": "Tailscale",
    "target": "Tailscale"
@@ -67,12 +115,48 @@
    "source": "TaskFlow",
    "target": "TaskFlow"
  },
+  {
+    "source": "Tlon",
+    "target": "Tlon"
+  },
+  {
+    "source": "Twitch",
+    "target": "Twitch"
+  },
+  {
+    "source": "Twilio",
+    "target": "Twilio"
+  },
  {
    "source": "TUI",
    "target": "終端介面"
  },
+  {
+    "source": "WeChat",
+    "target": "微信"
+  },
+  {
+    "source": "Weixin",
+    "target": "微信"
+  },
  {
    "source": "Webhook",
    "target": "網路鉤子"
+  },
+  {
+    "source": "Yuanbao",
+    "target": "騰訊元寶"
+  },
+  {
+    "source": "Zalo",
+    "target": "Zalo"
+  },
+  {
+    "source": "Zalo Personal",
+    "target": "Zalo Personal"
+  },
+  {
+    "source": "Zalo personal",
+    "target": "Zalo Personal"
  }
 ]
--- a/docs/automation/cron-jobs.md
+++ b/docs/automation/cron-jobs.md
@@ -122,6 +122,33 @@ This fires ~5–6 times per month instead of 0–1 times per month. OpenClaw use
  </Accordion>
 </AccordionGroup>

+### Command payloads
+
+Use command payloads for deterministic scripts that should run inside the Gateway scheduler without starting a model-backed isolated agent turn. Command jobs execute on the Gateway host, capture stdout/stderr, record the run in cron history, and reuse the same `announce`, `webhook`, and `none` delivery modes as isolated jobs.
+
+<Note>
+Command cron is an operator-admin Gateway automation surface, not an agent
+`tools.exec` call. Creating, updating, removing, or manually running cron jobs
+requires `operator.admin`; scheduled command runs later execute inside the
+Gateway process as that admin-authored automation. Agent exec policy such as
+`tools.exec.mode`, approval prompts, and per-agent tool allowlists governs
+model-visible exec tools, not command cron payloads.
+</Note>
+
+```bash
+openclaw cron create "*/15 * * * *" \
+  --name "Queue depth probe" \
+  --command "scripts/check-queue.sh" \
+  --command-cwd "/srv/app" \
+  --announce \
+  --channel telegram \
+  --to "-1001234567890"
+```
+
+`--command <shell>` stores `argv: ["sh", "-lc", <shell>]`. Use `--command-argv '["node","scripts/report.mjs"]'` when you want exact argv execution without shell parsing. Optional `--command-env KEY=VALUE`, `--command-input`, `--timeout-seconds`, `--no-output-timeout-seconds`, and `--output-max-bytes` fields control the process environment, stdin, and output bounds.
+
+If stdout is non-empty, that text is the delivered result. If stdout is empty and stderr is non-empty, stderr is delivered. If both streams are present, cron delivers a small `stdout:` / `stderr:` block. A zero exit code records the run as `ok`; non-zero exit, signal, timeout, or no-output timeout records `error` and can trigger failure alerts. A command that prints only `NO_REPLY` uses the normal cron silent-token suppression and posts nothing back to chat.
+
 ### Payload options for isolated jobs

 <ParamField path="--message" type="string" required>
@@ -246,6 +273,17 @@ Failure notifications follow a separate destination path:
      --webhook "https://example.invalid/openclaw/cron"
    ```
  </Tab>
+  <Tab title="Command output">
+    ```bash
+    openclaw cron create "*/15 * * * *" \
+      --name "Queue depth probe" \
+      --command "scripts/check-queue.sh" \
+      --command-cwd "/srv/app" \
+      --announce \
+      --channel telegram \
+      --to "-1001234567890"
+    ```
+  </Tab>
 </Tabs>

 ## Webhooks
@@ -432,6 +470,7 @@ Model override note:
 - `openclaw cron add|edit --model ...` changes the job's selected model.
 - If the model is allowed, that exact provider/model reaches the isolated agent run.
 - If it is not allowed or cannot be resolved, cron fails the run with an explicit validation error.
+- API `cron.update` payload patches can set `model: null` to clear a stored job model override.
 - Configured fallback chains still apply because cron `--model` is a job primary, not a session `/model` override.
 - Payload `fallbacks` replaces configured fallbacks for that job; `fallbacks: []` disables fallback and makes the run strict.
 - A plain `--model` with no explicit or configured fallback list does not fall through to the agent primary as a silent extra retry target.
--- a/docs/channels/imessage-from-bluebubbles.md
+++ b/docs/channels/imessage-from-bluebubbles.md
@@ -221,22 +221,22 @@ If the gateway logs `imessage: dropping group message from chat_id=<id>` or the

 ## Action parity at a glance

-| Action                                                     | legacy BlueBubbles                  | bundled iMessage                                                                                                        |
-| ---------------------------------------------------------- | ----------------------------------- | ----------------------------------------------------------------------------------------------------------------------- |
-| Send text / SMS fallback                                   | ✅                                  | ✅                                                                                                                      |
-| Send media (photo, video, file, voice)                     | ✅                                  | ✅                                                                                                                      |
-| Threaded reply (`reply_to_guid`)                           | ✅                                  | ✅ (closes [#51892](https://github.com/openclaw/openclaw/issues/51892))                                                 |
-| Tapback (`react`)                                          | ✅                                  | ✅                                                                                                                      |
-| Edit / unsend (macOS 13+ recipients)                       | ✅                                  | ✅                                                                                                                      |
-| Send with screen effect                                    | ✅                                  | ✅ (closes part of [#9394](https://github.com/openclaw/openclaw/issues/9394))                                           |
-| Rich text bold / italic / underline / strikethrough        | ✅                                  | ✅ (typed-run formatting via attributedBody)                                                                            |
-| Rename group / set group icon                              | ✅                                  | ✅                                                                                                                      |
-| Add / remove participant, leave group                      | ✅                                  | ✅                                                                                                                      |
-| Read receipts and typing indicator                         | ✅                                  | ✅ (gated on private API probe)                                                                                         |
-| Same-sender DM coalescing                                  | ✅                                  | ✅ (DM-only; opt-in via `channels.imessage.coalesceSameSenderDms`)                                                      |
-| Catchup of inbound messages received while gateway is down | ✅ (webhook replay + history fetch) | ✅ (opt-in via `channels.imessage.catchup.enabled`; closes [#78649](https://github.com/openclaw/openclaw/issues/78649)) |
+| Action                                              | legacy BlueBubbles                  | bundled iMessage                                                              |
+| --------------------------------------------------- | ----------------------------------- | ----------------------------------------------------------------------------- |
+| Send text / SMS fallback                            | ✅                                  | ✅                                                                            |
+| Send media (photo, video, file, voice)              | ✅                                  | ✅                                                                            |
+| Threaded reply (`reply_to_guid`)                    | ✅                                  | ✅ (closes [#51892](https://github.com/openclaw/openclaw/issues/51892))       |
+| Tapback (`react`)                                   | ✅                                  | ✅                                                                            |
+| Edit / unsend (macOS 13+ recipients)                | ✅                                  | ✅                                                                            |
+| Send with screen effect                             | ✅                                  | ✅ (closes part of [#9394](https://github.com/openclaw/openclaw/issues/9394)) |
+| Rich text bold / italic / underline / strikethrough | ✅                                  | ✅ (typed-run formatting via attributedBody)                                  |
+| Rename group / set group icon                       | ✅                                  | ✅                                                                            |
+| Add / remove participant, leave group               | ✅                                  | ✅                                                                            |
+| Read receipts and typing indicator                  | ✅                                  | ✅ (gated on private API probe)                                               |
+| Same-sender DM coalescing                           | ✅                                  | ✅ (DM-only; opt-in via `channels.imessage.coalesceSameSenderDms`)            |
+| Inbound recovery after a restart                    | ✅ (webhook replay + history fetch) | ✅ (automatic: replay missed via since_rowid + dedupe; wider window on local) |

-iMessage catchup is now available as an opt-in feature on the bundled plugin. On gateway startup, if `channels.imessage.catchup.enabled` is `true`, the gateway runs one `chats.list` + per-chat `messages.history` pass against the same JSON-RPC client used by `imsg watch`, replays each missed inbound row through the live dispatch path (allowlists, group policy, debouncer, echo cache), and persists a per-account cursor so subsequent startups pick up where they left off. See [Catching up after gateway downtime](/channels/imessage#catching-up-after-gateway-downtime) for tuning.
+iMessage recovers messages missed while the gateway was down: on startup it replays from the last dispatched rowid via `imsg watch.subscribe` `since_rowid` and dedupes by GUID, while a stale-backlog age fence suppresses the Push-flush "backlog bomb". This runs over the `imsg` RPC connection, so it works for remote SSH `cliPath` setups too; local setups get a wider recovery window because they can read `chat.db`. See [Inbound recovery after a bridge or gateway restart](/channels/imessage#inbound-recovery-after-a-bridge-or-gateway-restart).

 ## Pairing, sessions, and ACP bindings

--- a/docs/channels/imessage.md
+++ b/docs/channels/imessage.md
@@ -9,7 +9,7 @@ title: "iMessage"
 <Note>
 For OpenClaw iMessage deployments, use `imsg` on a signed-in macOS Messages host. If your Gateway runs on Linux or Windows, point `channels.imessage.cliPath` at an SSH wrapper that runs `imsg` on the Mac.

-**Gateway-downtime catchup is opt-in.** When enabled (`channels.imessage.catchup.enabled: true`), the gateway replays inbound messages that landed in `chat.db` while it was offline (crash, restart, Mac sleep) on next startup. Disabled by default — see [Catching up after gateway downtime](#catching-up-after-gateway-downtime). Closes [openclaw#78649](https://github.com/openclaw/openclaw/issues/78649).
+**Inbound recovery is automatic.** After a bridge or gateway restart, iMessage replays the messages missed while it was down and suppresses the stale "backlog bomb" Apple can flush after a Push recovery, deduping so nothing is dispatched twice. There is no config to enable — see [Inbound recovery after a bridge or gateway restart](#inbound-recovery-after-a-bridge-or-gateway-restart).
 </Note>

 <Warning>
@@ -654,14 +654,14 @@ When a user types a command and a URL together — e.g. `Dump https://example.co

 The two rows arrive at OpenClaw ~0.8-2.0 s apart on most setups. Without coalescing, the agent receives the command alone on turn 1, replies (often "send me the URL"), and only sees the URL on turn 2 — at which point the command context is already lost. This is Apple's send pipeline, not anything OpenClaw or `imsg` introduces.

-`channels.imessage.coalesceSameSenderDms` opts a DM into merging consecutive same-sender rows into a single agent turn. Group chats continue to dispatch per-message so multi-user turn structure is preserved.
+`channels.imessage.coalesceSameSenderDms` opts a DM into buffering consecutive same-sender rows. When `imsg` exposes the structural URL-preview marker `balloon_bundle_id: "com.apple.messages.URLBalloonProvider"` on one of the source rows, OpenClaw merges only that real split-send and keeps any other buffered rows as separate turns. On older `imsg` builds that emit no balloon metadata at all, OpenClaw cannot tell a split-send from separate sends, so it falls back to merging the bucket. That preserves the pre-metadata behavior rather than regressing `Dump <url>` split-sends into two turns. Group chats continue to dispatch per-message so multi-user turn structure is preserved.

 <Tabs>
  <Tab title="When to enable">
    Enable when:

    - You ship skills that expect `command + payload` in one message (dump, paste, save, queue, etc.).
-    - Your users paste URLs, images, or long content alongside commands.
+    - Your users paste URLs alongside commands.
    - You can accept the added DM turn latency (see below).

    Leave disabled when:
@@ -702,7 +702,8 @@ The two rows arrive at OpenClaw ~0.8-2.0 s apart on most setups. Without coalesc

  </Tab>
  <Tab title="Trade-offs">
-    - **Added latency for DM messages.** With the flag on, every DM (including standalone control commands and single-text follow-ups) waits up to the debounce window before dispatching, in case a payload row is coming. Group-chat messages keep instant dispatch.
+    - **Precise merging needs current `imsg` payload metadata.** When the URL row includes `balloon_bundle_id`, only that real split-send merges and other buffered rows stay separate. On older `imsg` builds that expose no balloon metadata, OpenClaw falls back to merging the buffered bucket so `Dump <url>` split-sends are not regressed into two turns (interim back-compat, removed once `imsg` coalesces split-sends upstream).
+    - **Added latency for DM messages.** With the flag on, every DM (including standalone control commands and single-text follow-ups) waits up to the debounce window before dispatching, in case a URL-preview row is coming. Group-chat messages keep instant dispatch.
    - **Merged output is bounded.** Merged text caps at 4000 chars with an explicit `…[truncated]` marker; attachments cap at 20; source entries cap at 10 (first-plus-latest retained beyond that). Every source GUID is tracked in `coalescedMessageGuids` for downstream telemetry.
    - **DM-only.** Group chats fall through to per-message dispatch so the bot stays responsive when multiple people are typing.
    - **Opt-in, per-channel.** Other channels (Telegram, WhatsApp, Slack, …) are unaffected. Legacy BlueBubbles configs that set `channels.bluebubbles.coalesceSameSenderDms` should migrate that value to `channels.imessage.coalesceSameSenderDms`.
@@ -712,77 +713,39 @@ The two rows arrive at OpenClaw ~0.8-2.0 s apart on most setups. Without coalesc

 ### Scenarios and what the agent sees

-| User composes                                                      | `chat.db` produces    | Flag off (default)                      | Flag on + 2500 ms window                                                |
-| ------------------------------------------------------------------ | --------------------- | --------------------------------------- | ----------------------------------------------------------------------- |
-| `Dump https://example.com` (one send)                              | 2 rows ~1 s apart     | Two agent turns: "Dump" alone, then URL | One turn: merged text `Dump https://example.com`                        |
-| `Save this 📎image.jpg caption` (attachment + text)                | 2 rows                | Two turns (attachment dropped on merge) | One turn: text + image preserved                                        |
-| `/status` (standalone command)                                     | 1 row                 | Instant dispatch                        | **Wait up to window, then dispatch**                                    |
-| URL pasted alone                                                   | 1 row                 | Instant dispatch                        | Instant dispatch (only one entry in bucket)                             |
-| Text + URL sent as two deliberate separate messages, minutes apart | 2 rows outside window | Two turns                               | Two turns (window expires between them)                                 |
-| Rapid flood (>10 small DMs inside window)                          | N rows                | N turns                                 | One turn, bounded output (first + latest, text/attachment caps applied) |
-| Two people typing in a group chat                                  | N rows from M senders | M+ turns (one per sender bucket)        | M+ turns — group chats are not coalesced                                |
+The "Flag on" column shows behavior on an `imsg` build that emits `balloon_bundle_id`. On older `imsg` builds that emit no balloon metadata at all, the rows below marked "Two turns" / "N turns" instead fall back to a legacy merge (one turn): OpenClaw cannot structurally tell a split-send from separate sends, so it preserves the pre-metadata merge. Precise separation activates once the build emits balloon metadata.

-## Catching up after gateway downtime
+| User composes                                                      | `chat.db` produces                  | Flag off (default)                      | Flag on + window (imsg emits balloon metadata)   |
+| ------------------------------------------------------------------ | ----------------------------------- | --------------------------------------- | ------------------------------------------------ |
+| `Dump https://example.com` (one send)                              | 2 rows ~1 s apart                   | Two agent turns: "Dump" alone, then URL | One turn: merged text `Dump https://example.com` |
+| `Save this 📎image.jpg caption` (attachment + text)                | 2 rows without URL balloon metadata | Two turns                               | Two turns (legacy merge on metadata-less builds) |
+| `/status` (standalone command)                                     | 1 row                               | Instant dispatch                        | **Wait up to window, then dispatch**             |
+| URL pasted alone                                                   | 1 row                               | Instant dispatch                        | Wait up to window, then dispatch                 |
+| Text + URL sent as two deliberate separate messages, minutes apart | 2 rows outside window               | Two turns                               | Two turns (window expires between them)          |
+| Rapid flood (>10 small DMs inside window)                          | N rows without URL balloon metadata | N turns                                 | N turns (legacy merge on metadata-less builds)   |
+| Two people typing in a group chat                                  | N rows from M senders               | M+ turns (one per sender bucket)        | M+ turns — group chats are not coalesced         |

-When the gateway is offline (crash, restart, Mac sleep, machine off), `imsg watch` resumes from the current `chat.db` state once the gateway comes back up — anything that arrived during the gap is, by default, never seen. Catchup replays those messages on the next startup so the agent does not silently miss inbound traffic.
+## Inbound recovery after a bridge or gateway restart

-Catchup is **disabled by default**. Enable it per channel:
+iMessage recovers messages missed while the gateway was down, and at the same time suppresses the stale "backlog bomb" Apple can flush after a Push recovery. The default behavior is always on, built on the inbound dedupe.

-```ts
-channels: {
-  imessage: {
-    catchup: {
-      enabled: true,             // master switch (default: false)
-      maxAgeMinutes: 120,        // skip rows older than now - 2h (default: 120, clamp 1..720)
-      perRunLimit: 50,           // max rows replayed per startup (default: 50, clamp 1..500)
-      firstRunLookbackMinutes: 30, // first run with no cursor: look back 30 min (default: 30)
-      maxFailureRetries: 10,     // give up on a wedged guid after 10 dispatch failures (default: 10)
-    },
-  },
-}
-```
+- **Replay dedupe.** Every dispatched inbound message is recorded by its Apple GUID in persistent plugin state (`imessage.inbound-dedupe`), claimed at ingestion and committed after handling (released on a transient failure so it can retry). Anything already handled is dropped instead of dispatched twice. This is what lets recovery replay aggressively without per-message bookkeeping.
+- **Downtime recovery.** On startup the monitor remembers the last dispatched `chat.db` rowid (a persisted per-account cursor) and passes it to `imsg watch.subscribe` as `since_rowid`, so imsg replays the rows that landed while the gateway was down, then tails live. Replay is bounded to the most recent rows and to messages up to ~2 hours old, and the dedupe drops anything already handled.
+- **Stale-backlog age fence.** Rows above the startup boundary are genuinely live; one whose send date is more than ~15 minutes older than its arrival is the Push-flush backlog and is suppressed. Replayed rows (at or below the boundary) use the wider recovery window instead, so a recently-missed message is delivered while ancient history is not.

-### How it runs
+Recovery works over both local and remote `cliPath` setups, because `since_rowid` replay runs over the same `imsg` RPC connection. The difference is the window: when the gateway can read `chat.db` (local), it anchors the startup rowid boundary, caps the replay span, and delivers missed messages up to a couple of hours old. Over a remote SSH `cliPath` it cannot read the database, so the replay is uncapped and every row uses the live age fence — it still recovers recently-missed messages and still suppresses old backlog, just with the narrower live window. Run the gateway on the Messages Mac for the wider recovery window.

-One pass per `monitorIMessageProvider` startup, sequenced as `imsg launch` ready → `watch.subscribe` → `performIMessageCatchup` → live dispatch loop. Catchup itself uses `chats.list` + per-chat `messages.history` against the same JSON-RPC client used by `imsg watch`. Anything that arrives during the catchup pass flows through live dispatch normally; the existing inbound-dedupe cache absorbs any overlap with replayed rows.
+### Operator-visible signal

-Each replayed row is fed through the live dispatch path (`evaluateIMessageInbound` + `dispatchInboundMessage`), so allowlists, group policy, debouncer, echo cache, and read receipts behave identically on replayed and live messages.
-
-### Cursor and retry semantics
-
-Catchup keeps a per-account cursor in SQLite plugin state:
-
-```json
-{
-  "lastSeenMs": 1717900800000,
-  "lastSeenRowid": 482910,
-  "updatedAt": 1717900801234,
-  "failureRetries": { "<guid>": 1 }
-}
-```
-
- The cursor advances on each successful dispatch and is held when a row's dispatch throws — the next startup retries the same row from the held cursor.
- After the startup catchup query succeeds, later live-handled rows also advance the same cursor so a gateway restart does not replay messages that were already handled live. Live cursor writes do not jump past catchup failures that are still below `maxFailureRetries`.
- After `maxFailureRetries` consecutive throws against the same `guid`, catchup logs a `warn` and force-advances the cursor past the wedged message so subsequent startups can make progress.
- Already-given-up guids are skipped on sight (no dispatch attempt) on later runs and counted under `skippedGivenUp` in the run summary.
- `openclaw doctor --fix` imports legacy `<openclawStateDir>/imessage/catchup/*.json` cursor files into SQLite plugin state and archives the old files.
-
-### Operator-visible signals
+Suppressed backlog is logged at the default level, never silently dropped (the `recovery` flag shows which window applied):

 ```
-imessage catchup: replayed=N skippedFromMe=… skippedGivenUp=… failed=… givenUp=… fetchedCount=…
-imessage catchup: giving up on guid=<guid> after <N> failures; advancing cursor past it
-imessage catchup: fetched <X> rows across chats, capped to perRunLimit=<Y>
+imessage: suppressed stale inbound backlog account=<id> sent=<iso> recovery=<bool> (<N> suppressed since start)
 ```

-A `WARN ... capped to perRunLimit` line means a single startup did not drain the full backlog. Raise `perRunLimit` (max 500) if your gaps regularly exceed the default 50-row pass.
+### Migration

-### When to leave it off
-
- Gateway runs continuously with watchdog auto-restart and gaps are always < a few seconds — the default of off is fine.
- DM volume is low and missed messages would not change agent behavior — the `firstRunLookbackMinutes` initial window can dispatch surprising old context on first enable.
-
-When you turn catchup on, the first startup with no cursor only looks back `firstRunLookbackMinutes` (30 min default), not the full `maxAgeMinutes` window — this avoids replaying a long history of pre-enable messages.
+`channels.imessage.catchup.*` is deprecated — downtime recovery is now automatic and needs no config for new setups. Existing configs with `catchup.enabled: true` remain honored as a compatibility profile for the recovery replay window. Disabled catchup blocks (`enabled: false` or no `enabled: true`) are retired; `openclaw doctor --fix` removes those.

 ## Troubleshooting

--- a/docs/cli/cron.md
+++ b/docs/cli/cron.md
@@ -34,6 +34,27 @@ openclaw cron create "0 18 * * 1-5" \
  --webhook "https://example.invalid/openclaw/cron"
 ```

+Use `--command` for deterministic shell-style jobs that should run inside OpenClaw cron without starting an isolated agent/model run:
+
+<Note>
+Command cron jobs are admin-authored Gateway automation. Creating, editing,
+removing, or manually running them requires `operator.admin`; the scheduled run
+later executes in the Gateway process, not as an agent `tools.exec` tool call.
+`tools.exec.*` and exec approvals still govern model-visible exec tools.
+</Note>
+
+```bash
+openclaw cron create "*/15 * * * *" \
+  --name "Queue depth probe" \
+  --command "scripts/check-queue.sh" \
+  --command-cwd "/srv/app" \
+  --announce \
+  --channel telegram \
+  --to "-1001234567890"
+```
+
+`--command <shell>` stores `argv: ["sh", "-lc", <shell>]`. Use `--command-argv '["node","scripts/report.mjs"]'` for exact argv execution. Command jobs capture stdout/stderr, record normal cron history, and route output through the same `announce`, `webhook`, or `none` delivery modes as isolated jobs. A command that prints only `NO_REPLY` is suppressed.
+
 ## Sessions

 `--session` accepts `main`, `isolated`, `current`, or `session:<id>`.
@@ -92,6 +113,10 @@ Note: isolated cron runs treat run-level agent failures as job errors even when
 no reply payload is produced, so model/provider failures still increment error
 counters and trigger failure notifications.

+Command cron jobs do not start an isolated agent turn. A zero exit code records
+`ok`; non-zero exit, signal, timeout, or no-output timeout records `error` and
+can trigger the same failure notification path.
+
 If an isolated run times out before the first model request, `openclaw cron show`
 and `openclaw cron runs` include a phase-specific error such as
 `setup timed out before runner start` or
@@ -252,6 +277,21 @@ openclaw cron create "0 7 * * *" \

 `--light-context` applies to isolated agent-turn jobs only. For cron runs, lightweight mode keeps bootstrap context empty instead of injecting the full workspace bootstrap set.

+Create a command job with exact argv, cwd, env, stdin, and output limits:
+
+```bash
+openclaw cron create "*/30 * * * *" \
+  --name "Position export" \
+  --command-argv '["node","scripts/export-position.mjs"]' \
+  --command-cwd "/srv/app" \
+  --command-env "NODE_ENV=production" \
+  --command-input '{"mode":"summary"}' \
+  --timeout-seconds 120 \
+  --no-output-timeout-seconds 30 \
+  --output-max-bytes 65536 \
+  --webhook "https://example.invalid/openclaw/cron"
+```
+
 ## Common admin commands

 Manual run and inspection:
--- a/docs/concepts/experimental-features.md
+++ b/docs/concepts/experimental-features.md
@@ -30,7 +30,7 @@ Treat them differently from normal config:

 ## Local model lean mode

-`agents.defaults.experimental.localModelLean: true` is a pressure-release valve for weaker local-model setups. When it is on, OpenClaw drops three default tools — `browser`, `cron`, and `message` — from the agent's tool surface for every turn. Nothing else changes. Use `agents.list[].experimental.localModelLean` to enable or disable the same behavior for one configured agent.
+`agents.defaults.experimental.localModelLean: true` is a pressure-release valve for weaker local-model setups. When it is on, OpenClaw drops three default tools — `browser`, `cron`, and `message` — from the agent's tool surface for every turn. It also defaults that run to structured Tool Search controls when `tools.toolSearch` is not explicitly configured, so larger plugin, MCP, or client tool catalogs stay behind `tool_search`, `tool_describe`, and `tool_call` instead of being dumped into the prompt. Runs that require direct `message` delivery keep that tool direct instead of enabling the lean-mode Tool Search default. Use `agents.list[].experimental.localModelLean` to enable or disable the same behavior for one configured agent.

 ### Why these three tools

@@ -40,7 +40,7 @@ These three tools have the largest descriptions and the most parameter shapes in
 - The model picking the right tool vs. emitting malformed tool calls because there are too many similar-looking schemas.
 - The Chat Completions adapter staying inside the server's structured-output limits vs. tripping a 400 on tool-call payload size.

-Removing them does not silently rewire OpenClaw — it just makes the tool list shorter. The model still has `read`, `write`, `edit`, `exec`, `apply_patch`, web search/fetch (when configured), memory, and session/agent tools available.
+Removing them does not silently rewire OpenClaw — it just makes the direct tool list shorter. The model still has `read`, `write`, `edit`, `exec`, `apply_patch`, web search/fetch (when configured), memory, and session/agent tools available. Extra catalogs remain callable through Tool Search unless you explicitly set `tools.toolSearch: false`.

 ### When to turn it on

@@ -56,6 +56,8 @@ If your backend handles the full default runtime cleanly, leave this off. Lean m

 Lean mode also does not replace `tools.profile`, `tools.allow`/`tools.deny`, or the model `compat.supportsTools: false` escape hatch. If you need a permanent narrower tool surface for a specific agent, prefer those stable knobs over the experimental flag.

+If you already tune Tool Search globally, OpenClaw leaves that operator config alone. Set `tools.toolSearch: false` to opt out of the lean-mode Tool Search default.
+
 ### Enable

 ```json5
@@ -94,7 +96,7 @@ Restart the Gateway after changing the flag, then confirm the trimmed tool list
 openclaw status --deep
 ```

-The deep status output lists the active agent tools; `browser`, `cron`, and `message` should be absent when lean mode is on.
+The deep status output lists the active agent tools; `browser`, `cron`, and `message` should be absent when lean mode is on unless the current delivery mode forces direct `message` replies.

 ## Experimental does not mean hidden

--- a/docs/concepts/qa-e2e-automation.md
+++ b/docs/concepts/qa-e2e-automation.md
@@ -48,6 +48,7 @@ script aliases; both forms are supported.
 | `qa telegram`                                       | Live transport lane against a real private Telegram group.                                                                                                                                                                                                              |
 | `qa discord`                                        | Live transport lane against a real private Discord guild channel.                                                                                                                                                                                                       |
 | `qa slack`                                          | Live transport lane against a real private Slack channel.                                                                                                                                                                                                               |
+| `qa whatsapp`                                       | Live transport lane against real WhatsApp Web accounts.                                                                                                                                                                                                                 |
 | `qa mantis`                                         | Before and after verification runner for live transport bugs, with Discord status-reactions evidence, Crabbox desktop/browser smoke, and Slack-in-VNC smoke. See [Mantis](/concepts/mantis) and [Mantis Slack Desktop Runbook](/concepts/mantis-slack-desktop-runbook). |

 ## Operator flow
@@ -168,15 +169,16 @@ decision still comes from the Discord REST oracle.

 CI uses the same command surface in `.github/workflows/qa-live-transports-convex.yml`. Scheduled and default manual runs execute the fast Matrix profile with live frontier credentials, `--fast`, and `OPENCLAW_QA_MATRIX_NO_REPLY_WINDOW_MS=3000`. Manual `matrix_profile=all` fans out into the five profile shards so the exhaustive catalog can run in parallel while keeping one artifact directory per shard.

-For transport-real Telegram, Discord, and Slack smoke lanes:
+For transport-real Telegram, Discord, Slack, and WhatsApp smoke lanes:

 ```bash
 pnpm openclaw qa telegram
 pnpm openclaw qa discord
 pnpm openclaw qa slack
+pnpm openclaw qa whatsapp
 ```

-They target a pre-existing real channel with two bots (driver + SUT). Required env vars, scenario lists, output artifacts, and the Convex credential pool are documented in [Telegram, Discord, and Slack QA reference](#telegram-discord-and-slack-qa-reference) below.
+They target a pre-existing real channel with two bots or accounts (driver + SUT). Required env vars, scenario lists, output artifacts, and the Convex credential pool are documented in [Telegram, Discord, Slack, and WhatsApp QA reference](#telegram-discord-slack-and-whatsapp-qa-reference) below.

 For a full Slack desktop VM run with VNC rescue, run:

@@ -276,10 +278,10 @@ coverage helpers, and scenario-selection helper from
 | Telegram | x      | x              | x          |                 |                 |                |                  |                  |                      | x            |                             |
 | Discord  | x      | x              | x          |                 |                 |                |                  |                  |                      |              | x                           |
 | Slack    | x      | x              | x          | x               | x               | x              | x                | x                |                      |              |                             |
+| WhatsApp | x      | x              |            | x               | x               | x              |                  |                  | x                    | x            |                             |

 This keeps `qa-channel` as the broad product-behavior suite while Matrix,
-Telegram, and future live transports share one explicit transport-contract
-checklist.
+Telegram, and other live transports share one explicit transport-contract checklist.

 For a disposable Linux VM lane without bringing Docker into the QA path, run:

@@ -308,25 +310,25 @@ guest: env-based provider keys, the QA live provider config path, and
 `CODEX_HOME` when present. Keep `--output-dir` under the repo root so the guest
 can write back through the mounted workspace.

-## Telegram, Discord, and Slack QA reference
+## Telegram, Discord, Slack, and WhatsApp QA reference

-Matrix has a [dedicated page](/concepts/qa-matrix) because of its scenario count and Docker-backed homeserver provisioning. Telegram, Discord, and Slack are smaller - a handful of scenarios each, no profile system, against pre-existing real channels - so their reference lives here.
+Matrix has a [dedicated page](/concepts/qa-matrix) because of its scenario count and Docker-backed homeserver provisioning. Telegram, Discord, Slack, and WhatsApp run against pre-existing real transports, so their reference lives here.

 ### Shared CLI flags

 These lanes register through `extensions/qa-lab/src/live-transports/shared/live-transport-cli.ts` and accept the same flags:

-| Flag                                  | Default                                                         | Description                                                                                                           |
-| ------------------------------------- | --------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- |
-| `--scenario <id>`                     | -                                                               | Run only this scenario. Repeatable.                                                                                   |
-| `--output-dir <path>`                 | `<repo>/.artifacts/qa-e2e/{telegram,discord,slack}-<timestamp>` | Where reports/summary/observed messages and the output log are written. Relative paths resolve against `--repo-root`. |
-| `--repo-root <path>`                  | `process.cwd()`                                                 | Repository root when invoking from a neutral cwd.                                                                     |
-| `--sut-account <id>`                  | `sut`                                                           | Temporary account id inside the QA gateway config.                                                                    |
-| `--provider-mode <mode>`              | `live-frontier`                                                 | `mock-openai` or `live-frontier` (legacy `live-openai` still works).                                                  |
-| `--model <ref>` / `--alt-model <ref>` | provider default                                                | Primary/alternate model refs.                                                                                         |
-| `--fast`                              | off                                                             | Provider fast mode where supported.                                                                                   |
-| `--credential-source <env\|convex>`   | `env`                                                           | See [Convex credential pool](#convex-credential-pool).                                                                |
-| `--credential-role <maintainer\|ci>`  | `ci` in CI, `maintainer` otherwise                              | Role used when `--credential-source convex`.                                                                          |
+| Flag                                  | Default                                            | Description                                                                                                           |
+| ------------------------------------- | -------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------- |
+| `--scenario <id>`                     | -                                                  | Run only this scenario. Repeatable.                                                                                   |
+| `--output-dir <path>`                 | `<repo>/.artifacts/qa-e2e/<transport>-<timestamp>` | Where reports/summary/observed messages and the output log are written. Relative paths resolve against `--repo-root`. |
+| `--repo-root <path>`                  | `process.cwd()`                                    | Repository root when invoking from a neutral cwd.                                                                     |
+| `--sut-account <id>`                  | `sut`                                              | Temporary account id inside the QA gateway config.                                                                    |
+| `--provider-mode <mode>`              | `live-frontier`                                    | `mock-openai` or `live-frontier` (legacy `live-openai` still works).                                                  |
+| `--model <ref>` / `--alt-model <ref>` | provider default                                   | Primary/alternate model refs.                                                                                         |
+| `--fast`                              | off                                                | Provider fast mode where supported.                                                                                   |
+| `--credential-source <env\|convex>`   | `env`                                              | See [Convex credential pool](#convex-credential-pool).                                                                |
+| `--credential-role <maintainer\|ci>`  | `ci` in CI, `maintainer` otherwise                 | Role used when `--credential-source convex`.                                                                          |

 Each lane exits non-zero on any failed scenario. `--allow-failures` writes artifacts without setting a failing exit code.

@@ -688,22 +690,52 @@ Required env when `--credential-source env`:

 Optional:

- `OPENCLAW_QA_WHATSAPP_GROUP_JID` enables `whatsapp-mention-gating`.
+- `OPENCLAW_QA_WHATSAPP_GROUP_JID` enables group scenarios such as
+  `whatsapp-mention-gating` and `whatsapp-group-allowlist-block`.
 - `OPENCLAW_QA_WHATSAPP_CAPTURE_CONTENT=1` keeps message bodies in
  observed-message artifacts.

-Scenarios (`extensions/qa-lab/src/live-transports/whatsapp/whatsapp-live.runtime.ts`):
+Scenario catalog (`extensions/qa-lab/src/live-transports/whatsapp/whatsapp-live.runtime.ts`):

- `whatsapp-canary`
- `whatsapp-pairing-block`
- `whatsapp-mention-gating`
- `whatsapp-approval-exec-native` - opt-in native WhatsApp exec approval
-  scenario. Requests an exec approval through the gateway, verifies the
-  WhatsApp message has native reaction approval affordances, resolves it, and
-  verifies the resolved WhatsApp follow-up.
- `whatsapp-approval-plugin-native` - opt-in native WhatsApp plugin approval
-  scenario. Enables exec and plugin approval forwarding together, then verifies
-  the same pending/resolved native WhatsApp path.
+- Baseline and group gating: `whatsapp-canary`, `whatsapp-pairing-block`,
+  `whatsapp-mention-gating`, `whatsapp-top-level-reply-shape`,
+  `whatsapp-restart-resume`, `whatsapp-group-allowlist-block`.
+- Native commands: `whatsapp-help-command`, `whatsapp-status-command`,
+  `whatsapp-commands-command`, `whatsapp-tools-compact-command`,
+  `whatsapp-whoami-command`, `whatsapp-context-command`,
+  `whatsapp-native-new-command`.
+- Reply and final-output behavior: `whatsapp-tool-only-usage-footer`,
+  `whatsapp-reply-to-message`, `whatsapp-reply-context-isolation`,
+  `whatsapp-reply-delivery-shape`, `whatsapp-stream-final-message-accounting`.
+- Inbound media and structured messages: `whatsapp-inbound-image-caption`,
+  `whatsapp-audio-preflight`, `whatsapp-inbound-structured-messages`,
+  `whatsapp-group-audio-gating`. These send real WhatsApp image, audio,
+  document, location, contact, and sticker events through the driver.
+- Outbound Gateway and message action coverage:
+  `whatsapp-outbound-media-matrix`,
+  `whatsapp-outbound-document-preserves-filename`, `whatsapp-outbound-poll`,
+  `whatsapp-message-actions`.
+- Access-control coverage: `whatsapp-access-control-dm-open`,
+  `whatsapp-access-control-dm-disabled`, `whatsapp-access-control-group-open`,
+  `whatsapp-access-control-group-disabled`, `whatsapp-group-allowlist-block`.
+- Native approvals: `whatsapp-approval-exec-deny-native`,
+  `whatsapp-approval-exec-native`, `whatsapp-approval-exec-reaction-native`,
+  `whatsapp-approval-plugin-native`.
+- Status reactions: `whatsapp-status-reactions`.
+
+The catalog currently contains 35 scenarios. The `live-frontier` default lane is
+kept small at 8 scenarios for fast smoke coverage. The `mock-openai` default
+lane runs 29 deterministic scenarios through the real WhatsApp transport while
+mocking only model output. Approval scenarios and a few heavier/blocking checks
+remain explicit by scenario id.
+
+The WhatsApp QA driver observes structured live events (`text`, `media`,
+`location`, `reaction`, and `poll`) and can actively send media, polls,
+contacts, locations, and stickers. QA Lab imports that driver through the
+`@openclaw/whatsapp/api.js` package surface instead of reaching into private
+WhatsApp runtime files. Message content is redacted by default. Outbound
+poll and upload-file coverage run through deterministic gateway `poll` and
+`message.action` calls instead of model-prompt-only tool invocation.

 Output artifacts:

--- a/docs/concepts/system-prompt.md
+++ b/docs/concepts/system-prompt.md
@@ -255,10 +255,11 @@ See [Date & Time](/date-time) for full behavior details.
 ## Skills

 When eligible skills exist, OpenClaw injects a compact **available skills list**
-(`formatSkillsForPrompt`) that includes the **file path** for each skill. The
-prompt instructs the model to use `read` to load the SKILL.md at the listed
-location (workspace, managed, or bundled). If no skills are eligible, the
-Skills section is omitted.
+(`formatSkillsForPrompt`) that includes the **file path** and content-derived
+`<version>` marker for each skill. The prompt instructs the model to use `read`
+to load the SKILL.md at the listed location (workspace, managed, or bundled),
+and to re-read a skill when its `<version>` differs from a previous turn. If no
+skills are eligible, the Skills section is omitted.

 Native Codex turns receive this list as turn-scoped collaboration developer
 instructions instead of per-turn user input, except lightweight cron turns that
@@ -283,6 +284,7 @@ that guidance directly in every tool description.
    <name>...</name>
    <description>...</description>
    <location>...</location>
+    <version>sha256:...</version>
  </skill>
 </available_skills>
 ```
--- a/docs/gateway/config-channels.md
+++ b/docs/gateway/config-channels.md
@@ -624,9 +624,6 @@ Before relying on an SSH wrapper for production sends, verify an outbound `imsg
        sendWithEffect: true,
        sendAttachment: true,
      },
-      catchup: {
-        enabled: false,
-      },
    },
  },
 }
@@ -642,7 +639,7 @@ Before relying on an SSH wrapper for production sends, verify an outbound `imsg
 - `channels.imessage.configWrites`: allow or deny iMessage-initiated config writes.
 - `channels.imessage.actions.*`: enable private API actions that are also gated by `imsg status` / `openclaw channels status --probe`.
 - `channels.imessage.includeAttachments` is off by default; set it to `true` before expecting inbound media in agent turns.
- `channels.imessage.catchup.enabled`: opt in to replaying inbound messages that arrived while the Gateway was down.
+- Inbound recovery after a bridge/gateway restart is automatic (GUID dedupe plus a stale-backlog age fence). Existing `channels.imessage.catchup.enabled: true` configs are still honored as a deprecated compatibility profile.
 - `channels.imessage.groups`: group registry and per-group settings. With `groupPolicy: "allowlist"`, configure either explicit `chat_id` keys or a `"*"` wildcard entry so group messages can pass the registry gate.
 - Top-level `bindings[]` entries with `type: "acp"` can bind iMessage conversations to persistent ACP sessions. Use a normalized handle or explicit chat target (`chat_id:*`, `chat_guid:*`, `chat_identifier:*`) in `match.peer.id`. Shared field semantics: [ACP Agents](/tools/acp-agents#persistent-channel-bindings).

--- a/docs/gateway/configuration-reference.md
+++ b/docs/gateway/configuration-reference.md
@@ -542,7 +542,7 @@ See [Inferred commitments](/concepts/commitments).
    tools: {
      // Additional /tools/invoke HTTP denies
      deny: ["browser"],
-      // Remove tools from the default HTTP deny list
+      // Remove tools from the default HTTP deny list for owner/admin callers
      allow: ["gateway"],
    },
    push: {
@@ -610,7 +610,10 @@ See [Inferred commitments](/concepts/commitments).
 - `gateway.nodes.pairing.autoApproveCidrs`: optional CIDR/IP allowlist for auto-approving first-time node device pairing with no requested scopes. It is disabled when unset. This does not auto-approve operator/browser/Control UI/WebChat pairing, and it does not auto-approve role, scope, metadata, or public-key upgrades.
 - `gateway.nodes.allowCommands` / `gateway.nodes.denyCommands`: global allow/deny shaping for declared node commands after pairing and platform allowlist evaluation. Use `allowCommands` to opt into dangerous node commands such as `camera.snap`, `camera.clip`, and `screen.record`; `denyCommands` removes a command even if a platform default or explicit allow would otherwise include it. After a node changes its declared command list, reject and re-approve that device pairing so the gateway stores the updated command snapshot.
 - `gateway.tools.deny`: extra tool names blocked for HTTP `POST /tools/invoke` (extends default deny list).
- `gateway.tools.allow`: remove tool names from the default HTTP deny list.
+- `gateway.tools.allow`: remove tool names from the default HTTP deny list for
+  owner/admin callers. This does not upgrade identity-bearing `operator.write`
+  callers into owner/admin access; `cron`, `gateway`, and `nodes` remain
+  unavailable to non-owner callers even when allowlisted.

 </Accordion>

--- a/docs/gateway/local-models.md
+++ b/docs/gateway/local-models.md
@@ -315,7 +315,7 @@ If the model loads cleanly but full agent turns misbehave, work top-down — con
   openclaw infer model run --gateway --model <provider/model> --prompt "Reply with exactly: pong" --json
   ```

-3. **Try lean mode.** If both probes pass but real agent turns fail with malformed tool calls or oversized prompts, enable `agents.defaults.experimental.localModelLean: true`. It drops the three heaviest default tools (`browser`, `cron`, `message`) so the prompt shape is smaller and less brittle. See [Experimental Features → Local model lean mode](/concepts/experimental-features#local-model-lean-mode) for the full explanation, when to use it, and how to confirm it is on.
+3. **Try lean mode.** If both probes pass but real agent turns fail with malformed tool calls or oversized prompts, enable `agents.defaults.experimental.localModelLean: true`. It drops the three heaviest default tools (`browser`, `cron`, `message`) and defaults larger tool catalogs behind structured Tool Search controls, except for runs that must keep direct `message` delivery semantics. See [Experimental Features → Local model lean mode](/concepts/experimental-features#local-model-lean-mode) for the full explanation, when to use it, and how to confirm it is on.

 4. **Disable tools entirely as a last resort.** If lean mode is not enough, set `models.providers.<provider>.models[].compat.supportsTools: false` for that model entry. The agent will then operate without tool calls on that model.

--- a/docs/gateway/protocol.md
+++ b/docs/gateway/protocol.md
@@ -580,6 +580,9 @@ terminal summary, and sanitized error text.
    `idempotencyKey` are optional.
  - If both `sessionKey` and `agentId` are present, the resolved session agent must match
    `agentId`.
+  - Owner-only core wrappers such as `cron`, `gateway`, and `nodes` require
+    owner/admin identity (`operator.admin`) even though the `tools.invoke`
+    method itself is `operator.write`.
  - The response is an SDK-facing envelope with `ok`, `toolName`, optional `output`, and typed
    `error` fields. Approval or policy refusals return `ok:false` in the payload rather than
    bypassing the gateway tool policy pipeline.
--- a/docs/gateway/sandboxing.md
+++ b/docs/gateway/sandboxing.md
@@ -318,7 +318,7 @@ With the OpenShell backend:
 Inbound media is copied into the active sandbox workspace (`media/inbound/*`).

 <Note>
-**Skills note:** the `read` tool is sandbox-rooted. With `workspaceAccess: "none"`, OpenClaw mirrors eligible skills into the sandbox workspace (`.../skills`) so they can be read. With `"rw"`, workspace skills are readable from `/workspace/skills`.
+**Skills note:** the `read` tool is sandbox-rooted. With `workspaceAccess: "none"`, OpenClaw mirrors eligible skills into the sandbox workspace (`.../skills`) so they can be read. With `"rw"`, workspace skills are readable from `/workspace/skills`, and eligible managed, bundled, or plugin skills are materialized into the generated read-only path `/workspace/.openclaw/sandbox-skills/skills`.
 </Note>

 ## Custom bind mounts
--- a/docs/gateway/security/audit-checks.md
+++ b/docs/gateway/security/audit-checks.md
@@ -39,7 +39,7 @@ exhaustive):
 | `gateway.trusted_proxies_missing`                             | warn          | Reverse-proxy headers are present but not trusted                                    | `gateway.trustedProxies`                                                                             | no       |
 | `gateway.http.no_auth`                                        | warn/critical | Gateway HTTP APIs reachable with `auth.mode="none"`                                  | `gateway.auth.mode`, `gateway.http.endpoints.*`, `plugins.entries.admin-http-rpc`                    | no       |
 | `gateway.http.session_key_override_enabled`                   | info          | HTTP API callers can override `sessionKey`                                           | `gateway.http.allowSessionKeyOverride`                                                               | no       |
-| `gateway.tools_invoke_http.dangerous_allow`                   | warn/critical | Re-enables dangerous tools over HTTP API                                             | `gateway.tools.allow`                                                                                | no       |
+| `gateway.tools_invoke_http.dangerous_allow`                   | warn/critical | Re-enables dangerous tools over HTTP API for owner/admin callers                     | `gateway.tools.allow`                                                                                | no       |
 | `gateway.nodes.allow_commands_dangerous`                      | warn/critical | Enables high-impact node commands (camera/screen/contacts/calendar/SMS)              | `gateway.nodes.allowCommands`                                                                        | no       |
 | `gateway.nodes.deny_commands_ineffective`                     | warn          | Pattern-like deny entries do not match shell text or groups                          | `gateway.nodes.denyCommands`                                                                         | no       |
 | `gateway.tailscale_funnel`                                    | critical      | Public internet exposure                                                             | `gateway.tailscale.mode`                                                                             | no       |
--- a/docs/gateway/tools-invoke-http-api.md
+++ b/docs/gateway/tools-invoke-http-api.md
@@ -128,13 +128,19 @@ You can customize this deny list via `gateway.tools`:
    tools: {
      // Additional tools to block over HTTP /tools/invoke
      deny: ["browser"],
-      // Remove tools from the default deny list
+      // Remove tools from the default deny list for owner/admin callers
      allow: ["gateway"],
    },
  },
 }
 ```

+`gateway.tools.allow` is an exposure override, not a scope upgrade. In
+identity-bearing HTTP modes, `cron`, `gateway`, and `nodes` remain unavailable
+to callers that do not have owner/admin identity (`operator.admin`) even when
+they are listed in `gateway.tools.allow`. Shared-secret bearer auth still follows
+the full trusted-operator rule above.
+
 To help group policies resolve context, you can optionally set:

 - `x-openclaw-message-channel: <channel>` (example: `slack`, `telegram`)
--- a/docs/maturity-scorecard/LTS.md
+++ b/docs/maturity-scorecard/LTS.md
@@ -0,0 +1,376 @@
+---
+title: LTS category proposal
+version: 1
+---
+
+# LTS category proposal
+
+This proposal identifies a minimal set of maturity-scorecard categories that
+should be eligible for the first enterprise-oriented LTS support promise.
+
+Scores are shown as `Coverage/Quality` from the current
+`inventory/<surface>/scores.yaml` files. They are useful context, but LTS
+eligibility here is a human product-support decision and does not require the
+current mechanical threshold of `coverage > 90` and `quality > 80`.
+Coverage and Quality numbers are Codex-generated and still need human
+verification before they are treated as authoritative.
+Completeness is intentionally omitted until that score is ready for use.
+Category names link to the corresponding per-category evidence note.
+
+Legend:
+
+- `Surface`: a top-level product or operating area in the taxonomy, such as `Gateway runtime`, `CLI`, `Slack`, or `Linux Gateway host`.
+- `Category`: a scored capability area within one surface, used as the unit for maturity and LTS inclusion decisions.
+- `✅`: category is included in the proposed initial LTS slice.
+- `➡️`: category is deferred from the proposed initial LTS slice.
+
+## Proposed initial LTS Surfaces
+
+### Gateway runtime (12/13)
+
+| Status | Category                                                                                              | Score (Coverage/Quality) |
+| ------ | ----------------------------------------------------------------------------------------------------- | ------------------------ |
+| ✅     | [Gateway Lifecycle](inventory/gateway-runtime/runtime-lifecycle-and-supervision.md)                   | `86/82`                  |
+| ✅     | [WebSocket Connection](inventory/gateway-runtime/websocket-handshake-and-session-establishment.md)    | `84/76`                  |
+| ✅     | [Device Auth and Pairing](inventory/gateway-runtime/device-identity-auth-and-pairing.md)              | `88/72`                  |
+| ✅     | [Security Controls](inventory/gateway-runtime/security-and-hardening-posture.md)                      | `84/74`                  |
+| ✅     | [Approvals and Remote Execution](inventory/gateway-runtime/approval-and-execution-safety.md)          | `88/72`                  |
+| ✅     | [Roles and Permissions](inventory/gateway-runtime/roles-scopes-and-operator-policy.md)                | `85/62`                  |
+| ✅     | [Health, Diagnostics, and Repair](inventory/gateway-runtime/observability-health-and-repair.md)       | `68/62`                  |
+| ✅     | [HTTP APIs](inventory/gateway-runtime/http-apis.md)                                                   | `88/74`                  |
+| ✅     | [Hosted Web Surface](inventory/gateway-runtime/hosted-web-surface.md)                                 | `88/74`                  |
+| ✅     | [Gateway RPC APIs and Events](inventory/gateway-runtime/core-rpc-coverage.md)                         | `68/57`                  |
+| ✅     | [Network Access and Discovery](inventory/gateway-runtime/network-exposure-and-transport-selection.md) | `68/62`                  |
+| ➡️     | [Nodes and Remote Capabilities](inventory/gateway-runtime/node-transport-and-capability-relay.md)     | `84/63`                  |
+| ✅     | [Protocol Compatibility](inventory/gateway-runtime/protocol-typing-and-compatibility.md)              | `72/70`                  |
+
+### Security, auth, pairing, and secrets (5/6)
+
+| Status | Category                                                                                                                            | Score (Coverage/Quality) |
+| ------ | ----------------------------------------------------------------------------------------------------------------------------------- | ------------------------ |
+| ✅     | [Approval Policy and Tool Safeguards](inventory/security-auth-pairing-and-secrets/approval-policy-and-dangerous-tool-safeguards.md) | `86/72`                  |
+| ✅     | [Gateway Auth and Remote Access](inventory/security-auth-pairing-and-secrets/gateway-auth-and-network-exposure.md)                  | `82/68`                  |
+| ✅     | [Device and Node Pairing](inventory/security-auth-pairing-and-secrets/device-identity-and-operator-pairing.md)                      | `83/66`                  |
+| ✅     | [Credential and Secret Hygiene](inventory/security-auth-pairing-and-secrets/secrets-storage-redaction-and-configuration-hygiene.md) | `78/62`                  |
+| ✅     | [Channel Access Control](inventory/security-auth-pairing-and-secrets/channel-identity-allowlists-and-sender-pairing.md)             | `78/66`                  |
+| ➡️     | [Plugin Trust](inventory/security-auth-pairing-and-secrets/plugin-installation-trust-and-security-boundaries.md)                    | `76/70`                  |
+
+### Agent Runtime (6/9)
+
+| Status | Category                                                                                                                             | Score (Coverage/Quality) |
+| ------ | ------------------------------------------------------------------------------------------------------------------------------------ | ------------------------ |
+| ✅     | [Agent Turn Execution](inventory/agent-runtime-and-provider-execution/agent-turn-orchestration-and-runtime-lifecycle.md)             | `82/74`                  |
+| ✅     | [Model and Runtime Selection](inventory/agent-runtime-and-provider-execution/model-selection-provider-routing-and-runtime-policy.md) | `84/72`                  |
+| ✅     | [Hosted Provider Execution](inventory/agent-runtime-and-provider-execution/hosted-provider-adapters-and-payload-compatibility.md)    | `76/70`                  |
+| ✅     | [Tool Execution Controls](inventory/agent-runtime-and-provider-execution/tool-execution-approvals-and-sandbox-policy.md)             | `86/74`                  |
+| ✅     | [Provider Auth](inventory/agent-runtime-and-provider-execution/provider-auth-profiles-and-credential-health.md)                      | `80/66`                  |
+| ➡️     | [External Runtimes and Subagents](inventory/agent-runtime-and-provider-execution/cli-harnesses-external-runtimes-and-subagents.md)   | `78/66`                  |
+| ➡️     | [Local and Self-hosted Providers](inventory/agent-runtime-and-provider-execution/local-and-self-hosted-provider-execution.md)        | `70/60`                  |
+| ➡️     | [Streaming and Progress](inventory/agent-runtime-and-provider-execution/streaming-progress-and-preview-visibility.md)                | `84/70`                  |
+| ✅     | [Tool Calls and Response Handling](inventory/agent-runtime-and-provider-execution/streaming-tool-call-and-response-normalization.md) | `80/66`                  |
+
+### Session, memory, and context engine (6/9)
+
+| Status | Category                                                                                                                          | Score (Coverage/Quality) |
+| ------ | --------------------------------------------------------------------------------------------------------------------------------- | ------------------------ |
+| ✅     | [Session Routing](inventory/session-memory-and-context-engine/session-routing-and-conversation-binding.md)                        | `82/74`                  |
+| ✅     | [CLI Session and Transcript Management](inventory/session-memory-and-context-engine/cli-session-and-transcript-management.md)     | `74/68`                  |
+| ✅     | [Context Engine](inventory/session-memory-and-context-engine/context-engine-and-runtime-assembly.md)                              | `72/80`                  |
+| ✅     | [Transcript Persistence](inventory/session-memory-and-context-engine/transcript-persistence-and-durability.md)                    | `78/58`                  |
+| ✅     | [Token Management](inventory/session-memory-and-context-engine/compaction-pruning-and-token-pressure.md)                          | `78/60`                  |
+| ➡️     | [Cross-client History and Session Parity](inventory/session-memory-and-context-engine/cross-client-history-and-session-parity.md) | `76/62`                  |
+| ➡️     | [Diagnostics, Maintenance, and Recovery](inventory/session-memory-and-context-engine/diagnostics-maintenance-and-recovery.md)     | `72/68`                  |
+| ✅     | [Core Prompts and Context](inventory/session-memory-and-context-engine/instruction-profile-and-context-visibility.md)             | `68/70`                  |
+| ➡️     | [Memory](inventory/session-memory-and-context-engine/memory-files-tools-and-active-memory.md)                                     | `66/58`                  |
+
+### CLI (6/7)
+
+| Status | Category                                                                                                              | Score (Coverage/Quality) |
+| ------ | --------------------------------------------------------------------------------------------------------------------- | ------------------------ |
+| ✅     | [CLI Setup](inventory/cli-install-update-onboard-doctor/package-install-and-cli-entrypoints.md)                       | `78/75`                  |
+| ✅     | [Onboarding and Auth Setup](inventory/cli-install-update-onboard-doctor/first-run-onboarding-and-auth-selection.md)   | `86/78`                  |
+| ✅     | [Gateway Service Management](inventory/cli-install-update-onboard-doctor/gateway-service-install-and-lifecycle.md)    | `88/66`                  |
+| ✅     | [CLI Observability](inventory/cli-install-update-onboard-doctor/status-health-logs-and-diagnostics-support-path.md)   | `84/74`                  |
+| ✅     | [Doctor](inventory/cli-install-update-onboard-doctor/doctor-config-auth-plugin-and-lint.md)                           | `80/68`                  |
+| ✅     | [Updates and Upgrades](inventory/cli-install-update-onboard-doctor/update-channel-and-core-upgrade-flow.md)           | `82/68`                  |
+| ➡️     | [Plugin and Channel Setup](inventory/cli-install-update-onboard-doctor/plugin-and-channel-setup-during-onboarding.md) | `82/72`                  |
+
+### Linux Gateway host (4/5)
+
+| Status | Category                                                                                                              | Score (Coverage/Quality) |
+| ------ | --------------------------------------------------------------------------------------------------------------------- | ------------------------ |
+| ✅     | [Host Setup and Updates](inventory/linux-gateway-host/linux-cli-install-and-update-path.md)                           | `82/78`                  |
+| ✅     | [Gateway Runtime and Service Control](inventory/linux-gateway-host/foreground-gateway-runtime-and-process-control.md) | `83/78`                  |
+| ✅     | [Remote Access and Security](inventory/linux-gateway-host/remote-network-exposure-tls-and-tailscale.md)               | `78/74`                  |
+| ✅     | [Diagnostics and Repair](inventory/linux-gateway-host/diagnostics-logs-doctor-and-repair.md)                          | `82/78`                  |
+| ➡️     | [Deployment Targets](inventory/linux-gateway-host/vps-container-and-cloud-deployment-guidance.md)                     | `76/72`                  |
+
+### Windows via WSL2 (5/6)
+
+| Status | Category                                                                                          | Score (Coverage/Quality) |
+| ------ | ------------------------------------------------------------------------------------------------- | ------------------------ |
+| ✅     | [WSL Setup](inventory/windows-via-wsl2/wsl2-install-and-runtime-prerequisites.md)                 | `76/70`                  |
+| ✅     | [CLI](inventory/windows-via-wsl2/wsl2-cli.md)                                                     | `76/70`                  |
+| ✅     | [Gateway Service Lifecycle](inventory/windows-via-wsl2/systemd-gateway-service-lifecycle.md)      | `64/66`                  |
+| ✅     | [Gateway Access and Exposure](inventory/windows-via-wsl2/auth-secrets-and-exposure-posture.md)    | `70/65`                  |
+| ✅     | [Diagnostics and Repair](inventory/windows-via-wsl2/diagnostics-doctor-logs-and-repair.md)        | `74/72`                  |
+| ➡️     | [Browser and Control UI](inventory/windows-via-wsl2/split-host-browser-and-control-ui-interop.md) | `72/70`                  |
+
+### Native Windows (1/4)
+
+| Status | Category                                                                                                                | Score (Coverage/Quality) |
+| ------ | ----------------------------------------------------------------------------------------------------------------------- | ------------------------ |
+| ✅     | [CLI](inventory/native-windows-cli-and-gateway/native-powershell-install-and-cli-entrypoints.md)                        | `72/66`                  |
+| ➡️     | [Gateway Management](inventory/native-windows-cli-and-gateway/native-gateway-foreground-runtime-and-process-control.md) | `68/62`                  |
+| ➡️     | [Networking](inventory/native-windows-cli-and-gateway/windows-host-networking-portproxy-and-remote-access.md)           | `58/56`                  |
+| ➡️     | [Updates](inventory/native-windows-cli-and-gateway/windows-update-restart-handoff-and-package-locks.md)                 | `74/68`                  |
+
+### Observability (3/5)
+
+| Status | Category                                                                                                           | Score (Coverage/Quality) |
+| ------ | ------------------------------------------------------------------------------------------------------------------ | ------------------------ |
+| ✅     | [Health and Repair](inventory/telemetry-diagnostics-and-observability/health-status-probes.md)                     | `80/76`                  |
+| ✅     | [Logging](inventory/telemetry-diagnostics-and-observability/logging-log-tail-and-redaction.md)                     | `82/84`                  |
+| ✅     | [Session Diagnostics](inventory/telemetry-diagnostics-and-observability/session-run-and-usage-diagnostics.md)      | `82/78`                  |
+| ➡️     | [Diagnostic Collection](inventory/telemetry-diagnostics-and-observability/diagnostics-export-support-bundles.md)   | `76/74`                  |
+| ➡️     | [Telemetry Export](inventory/telemetry-diagnostics-and-observability/diagnostic-events-hooks-and-trace-context.md) | `78/78`                  |
+
+### Channel framework (5/8)
+
+| Status | Category                                                                                                          | Score (Coverage/Quality) |
+| ------ | ----------------------------------------------------------------------------------------------------------------- | ------------------------ |
+| ✅     | [Channel Setup](inventory/channel-framework/channel-setup.md)                                                     | `84/78`                  |
+| ✅     | [Inbound Access and Identity Gates](inventory/channel-framework/inbound-access-and-identity-gates.md)             | `80/76`                  |
+| ✅     | [Conversation Routing and Delivery](inventory/channel-framework/conversation-routing-and-delivery.md)             | `77/71`                  |
+| ✅     | [Outbound Delivery and Reply Pipeline](inventory/channel-framework/outbound-delivery-and-reply-pipeline.md)       | `82/75`                  |
+| ✅     | [Status Health and Operator Controls](inventory/channel-framework/status-health-and-operator-controls.md)         | `82/78`                  |
+| ➡️     | [Channel Actions Commands and Approvals](inventory/channel-framework/channel-actions-commands-and-approvals.md)   | `68/72`                  |
+| ➡️     | [Group Thread and Ambient Room Behavior](inventory/channel-framework/group-thread-and-ambient-room-behavior.md)   | `76/68`                  |
+| ➡️     | [Media Attachments and Rich Channel Data](inventory/channel-framework/media-attachments-and-rich-channel-data.md) | `68/70`                  |
+
+### Slack (5/5)
+
+| Status | Category                                                                                             | Score (Coverage/Quality) |
+| ------ | ---------------------------------------------------------------------------------------------------- | ------------------------ |
+| ✅     | [Channel Setup and Operations](inventory/slack/app-install-auth-manifest-and-scopes.md)              | `74/68`                  |
+| ✅     | [Access and Identity](inventory/slack/dm-pairing-and-sender-authorization.md)                        | `74/70`                  |
+| ✅     | [Conversation Routing and Delivery](inventory/slack/channel-thread-routing-and-session-isolation.md) | `64/66`                  |
+| ✅     | [Media and Rich Content](inventory/slack/media-attachments-files-and-vision.md)                      | `64/66`                  |
+| ✅     | [Native Controls and Approvals](inventory/slack/slash-commands-and-native-command-routing.md)        | `72/70`                  |
+
+### Discord (4/6)
+
+| Status | Category                                                                                                         | Score (Coverage/Quality) |
+| ------ | ---------------------------------------------------------------------------------------------------------------- | ------------------------ |
+| ✅     | [Channel Setup and Operations](inventory/discord/bot-setup-and-account-configuration.md)                         | `74/71`                  |
+| ✅     | [Access and Identity](inventory/discord/dm-pairing-and-sender-authorization.md)                                  | `74/72`                  |
+| ✅     | [Conversation Routing and Delivery](inventory/discord/guild-channel-routing-and-session-isolation.md)            | `74/72`                  |
+| ✅     | [Media and Rich Content](inventory/discord/media-attachments-and-voice-message-handling.md)                      | `74/72`                  |
+| ➡️     | [Native Controls and Approvals](inventory/discord/native-slash-commands-components-and-interactive-callbacks.md) | `58/72`                  |
+| ➡️     | [Realtime Voice and Calls](inventory/discord/realtime-discord-voice-channels.md)                                 | `74/66`                  |
+
+### Telegram (5/5)
+
+| Status | Category                                                                                         | Score (Coverage/Quality) |
+| ------ | ------------------------------------------------------------------------------------------------ | ------------------------ |
+| ✅     | [Channel Setup and Operations](inventory/telegram/bot-setup-and-account-configuration.md)        | `76/70`                  |
+| ✅     | [Access and Identity](inventory/telegram/dm-pairing-and-sender-authorization.md)                 | `76/68`                  |
+| ✅     | [Conversation Routing and Delivery](inventory/telegram/group-forum-topic-and-session-routing.md) | `74/68`                  |
+| ✅     | [Media and Rich Content](inventory/telegram/media-location-polls-and-rich-inputs.md)             | `74/72`                  |
+| ✅     | [Native Controls and Approvals](inventory/telegram/inline-buttons-approvals-and-actions.md)      | `74/72`                  |
+
+### OpenAI / Codex provider path (3/5)
+
+| Status | Category                                                                                                                        | Score (Coverage/Quality) |
+| ------ | ------------------------------------------------------------------------------------------------------------------------------- | ------------------------ |
+| ✅     | [Model and Auth](inventory/openai-codex-provider-path/canonical-openai-model-routing-and-catalog.md)                            | `78/66`                  |
+| ✅     | [Responses and Tool Compatibility](inventory/openai-codex-provider-path/codex-responses-transport-and-payload-compatibility.md) | `76/70`                  |
+| ✅     | [Native Codex Harness](inventory/openai-codex-provider-path/native-codex-app-server-harness-and-thread-lifecycle.md)            | `82/72`                  |
+| ➡️     | [Image and Multimodal Input](inventory/openai-codex-provider-path/image-generation-editing-and-multimodal-input.md)             | `80/72`                  |
+| ➡️     | [Voice and Realtime Audio](inventory/openai-codex-provider-path/realtime-voice-transcription-and-speech.md)                     | `72/68`                  |
+
+### Browser automation and exec/sandbox tools (2/3)
+
+| Status | Category                                                                                                                   | Score (Coverage/Quality) |
+| ------ | -------------------------------------------------------------------------------------------------------------------------- | ------------------------ |
+| ✅     | [Tool Invocation and Execution](inventory/browser-automation-and-exec-sandbox-tools/exec-routing-and-process-lifecycle.md) | `82/79`                  |
+| ✅     | [Sandbox and Tool Policy](inventory/browser-automation-and-exec-sandbox-tools/sandbox-backends-and-workspace-isolation.md) | `76/72`                  |
+| ➡️     | [Browser Automation](inventory/browser-automation-and-exec-sandbox-tools/browser-actions-snapshots-and-artifacts.md)       | `78/74`                  |
+
+### Plugins (7/9)
+
+| Status | Category                                                                                                                | Score (Coverage/Quality) |
+| ------ | ----------------------------------------------------------------------------------------------------------------------- | ------------------------ |
+| ✅     | [Installing and running plugins](inventory/plugin-sdk-and-bundled-plugin-architecture/runtime-loading-and-lifecycle.md) | `86/84`                  |
+| ✅     | [Bundled plugins](inventory/plugin-sdk-and-bundled-plugin-architecture/bundled-plugin-discovery-and-inventory.md)       | `86/84`                  |
+| ➡️     | [Canvas plugin](inventory/plugin-sdk-and-bundled-plugin-architecture/canvas-plugin.md)                                  | `76/66`                  |
+| ✅     | [Plugin approvals](inventory/plugin-sdk-and-bundled-plugin-architecture/approval-and-security-boundaries.md)            | `84/86`                  |
+| ✅     | [Provider and tool plugins](inventory/plugin-sdk-and-bundled-plugin-architecture/provider-tool-plugin-architecture.md)  | `84/82`                  |
+| ✅     | [Channel plugins](inventory/plugin-sdk-and-bundled-plugin-architecture/channel-plugin-architecture.md)                  | `82/78`                  |
+| ✅     | [Authoring and Packaging plugins](inventory/plugin-sdk-and-bundled-plugin-architecture/public-sdk-api-and-subpaths.md)  | `77/74`                  |
+| ✅     | [Publishing plugins](inventory/plugin-sdk-and-bundled-plugin-architecture/distribution-release-and-compatibility.md)    | `79/82`                  |
+| ➡️     | [Testing plugins](inventory/plugin-sdk-and-bundled-plugin-architecture/developer-testing-and-fixtures.md)               | `84/81`                  |
+
+## Prioritized non-LTS candidates
+
+This section ranks the currently non-LTS surface/category pairs that should be
+prioritized for future LTS eligibility. It is based on the current taxonomy,
+`inventory/**/scores.yaml`, and sentiment from local `discrawl` and `gitcrawl`
+archives.
+
+Current scan basis:
+
+- Initial LTS slice: `68` categories.
+- Total taxonomy: `279` categories.
+- Non-LTS scan scope: `211` categories.
+- `gitcrawl` freshness: synced through 2026-05-28.
+- `discrawl` freshness: synced through 2026-05-29.
+
+### First Wave
+
+#### Docker / Podman hosting
+
+- [Container Setup](inventory/docker-podman-hosting/docker-install-compose-and-first-run-setup.md): `74/76`
+- [Container Operations](inventory/docker-podman-hosting/runtime-configuration-state-volumes-and-secrets.md): `76/70`
+- [Image Release and Validation](inventory/docker-podman-hosting/image-build-release-packaging-and-attestations.md): `84/78`
+- [Agent Sandbox and Tooling](inventory/docker-podman-hosting/containerized-agents-sandbox-and-tooling-support.md): `75/68`
+
+Why: this is the strongest enterprise deployment gap outside the initial LTS
+slice. Discord support sentiment repeatedly clusters around VPS, Docker, WSL,
+volume persistence, secrets, update, and rollback confusion. GitHub also has a
+current Docker gateway restart-loop issue, `#86612`.
+
+#### Microsoft Teams
+
+- [Channel Setup and Operations](inventory/microsoft-teams/setup-app-registration-credentials-admin-install.md): `58/64`
+- [Access and Identity](inventory/microsoft-teams/dm-pairing-sender-authorization-config-writes.md): `60/62`
+- [Conversation Routing and Delivery](inventory/microsoft-teams/team-channel-routing-mention-gates-sessions-thread-context.md): `68/66`
+- [Media and Rich Content](inventory/microsoft-teams/media-attachments-file-consent-graph-file-flows.md): `62/58`
+- [Native Controls and Approvals](inventory/microsoft-teams/actions-reactions-polls-approvals-group-management.md): `64/66`
+
+Why: Teams has low current scores, but it is the obvious second enterprise
+workplace channel after Slack. GitHub has strong concrete signal for channel
+session behavior, multiple-bot support, attachment handling, managed identity,
+and setup/admin complexity: `#81084`, `#71058`, `#65329`, `#67177`, and
+`#85149`.
+
+#### Cross-provider auth
+
+- Anthropic provider path / [Provider Auth and Recovery](inventory/anthropic-provider-path/auth-onboarding-and-credential-profile-health.md): `78/70`
+- Google provider path / [Provider Setup and Credentials](inventory/google-provider-path/provider-auth-credentials-and-operator-setup.md): `72/60`
+
+Why: provider auth is one of the highest recurring Discord support themes.
+Users get stuck on missing auth, fallback routing, cooldowns, stale profiles,
+plaintext secrets, provider mismatch, and unclear recovery commands. These
+categories are prerequisites for making any multi-provider enterprise harness
+reliable.
+
+#### Gateway Web App
+
+- [Browser Access and Trust](inventory/browser-control-ui-and-webchat/gateway-connection-auth-device-pairing-and-origins.md): `84/68`
+- [Configuration](inventory/browser-control-ui-and-webchat/config-schema-editing-and-safe-writes.md): `82/78`
+- [Browser UI](inventory/browser-control-ui-and-webchat/control-ui-static-shell-routing-and-pwa.md): `74/72`
+- [WebChat Conversations](inventory/browser-control-ui-and-webchat/chat-composer-session-model-controls-and-rendering.md): `78/66`
+- [Operator Console](inventory/browser-control-ui-and-webchat/diagnostics-logs-update-and-activity.md): `78/74`
+
+Why: this is the operator and admin surface for an enterprise deployment.
+GitHub has open UX and runtime issues around auth gates, transcript loss,
+uploads, CJK input and streaming, and partial reloads: `#85750`, `#72500`,
+`#83344`, `#81606`, `#86035`, `#60247`, and `#86435`.
+
+#### Automation: cron, hooks, tasks, polling
+
+- [Cron Jobs](inventory/automation-cron-hooks-tasks-polling/cron-job-lifecycle.md): `82/73`
+- [Background Tasks and Flows](inventory/automation-cron-hooks-tasks-polling/background-task-ledger.md): `73/68`
+- [Event Ingress](inventory/automation-cron-hooks-tasks-polling/channel-polling-webhooks.md): `65/58`
+- [Automation Hooks](inventory/automation-cron-hooks-tasks-polling/internal-hooks.md): `78/72`
+- [Heartbeat](inventory/automation-cron-hooks-tasks-polling/heartbeat-commitments.md): `82/72`
+
+Why: enterprise agents need durable scheduled work, alerting, and recovery.
+GitHub has current signal for startup races, duplicate names, silent data loss,
+status visibility, elevated scoping, and owner-tool stripping: `#75889`,
+`#76160`, `#83538`, `#51184`, `#41484`, and `#72954`.
+
+#### TUI
+
+- [Runtime Modes](inventory/tui-and-terminal-ux/launch-modes-and-cli-entrypoints.md): `78/72`
+- [Input and Commands](inventory/tui-and-terminal-ux/composer-keybindings-and-input-editing.md): `76/70`
+- [Session Management](inventory/tui-and-terminal-ux/session-lifecycle-history-and-resume.md): `80/68`
+- [Local Shell Execution](inventory/tui-and-terminal-ux/local-shell-execution-and-approval-boundary.md): `70/76`
+- [Rendering and Output Safety](inventory/tui-and-terminal-ux/streaming-message-rendering-and-tool-cards.md): `76/70`
+
+Why: TUI is a real operator-facing surface with broad docs and decent baseline
+coverage, but it is still less proven as a primary supported workflow than the
+CLI and Gateway host paths in the initial slice. Promote it when launch modes,
+command/input behavior, session resume, local shell boundaries, and streaming
+rendering are treated as one terminal-native support promise.
+
+### Second wave
+
+#### macOS Gateway host
+
+- [Gateway Service Lifecycle](inventory/macos-gateway-host/launchagent-service-lifecycle.md): `82/76`
+- [Local Gateway Integration](inventory/macos-gateway-host/local-gateway-mode-host-configuration.md): `76/82`
+- [Diagnostics and Observability](inventory/macos-gateway-host/diagnostics-logs-operator-observability.md): `80/83`
+- [CLI Setup](inventory/macos-gateway-host/cli-install-runtime-prerequisites.md): `82/76`
+- [Remote Gateway Mode](inventory/macos-gateway-host/remote-gateway-mode-transport.md): `72/82`
+
+Why: Linux is the cleaner first LTS host, but macOS has heavy real-world support
+volume and strong desktop-gateway relevance. Current issues include LaunchAgent
+reporting, bind behavior, cert and update drift, external-volume failures,
+Homebrew/runtime drift, unrecoverable upgrades, restart loops, and install
+failures: `#81751`, `#65619`, `#86579`, `#87199`, `#75250`, `#85027`,
+`#73673`, and `#60398`.
+
+#### Browser automation and exec/sandbox tools
+
+- [Browser Automation](inventory/browser-automation-and-exec-sandbox-tools/browser-actions-snapshots-and-artifacts.md): `78/74`
+
+Why: the initial LTS slice already includes core tool invocation and sandbox
+policy, but browser execution is part of a practical enterprise agent harness.
+Open issues include sandbox/runtime mismatch, non-Docker backend support,
+noVNC/CJK behavior, upload access, timeouts, and Control UI responsiveness.
+
+#### Web search tools
+
+- [Network Safety](inventory/web-search-tools/network-safety-ssrf-redirects-and-untrusted-content.md): `84/84`
+- [Tool Availability and Fetch](inventory/web-search-tools/tool-exposure-policy-and-runtime-tool-wiring.md): `82/80`
+- [Search Providers](inventory/web-search-tools/bundled-structured-search-providers.md): `76/72`
+- [Setup and Diagnostics](inventory/web-search-tools/operator-setup-provider-selection-and-credential-repair.md): `74/70`
+
+Why: web fetch and structured search are useful for enterprise research
+workflows, but they are outside the minimal first support promise. Promote this
+surface when network safety, runtime tool wiring, provider selection, timeout
+behavior, and operator repair are accepted together. GitHub has search timeout,
+provider-native tool, tool-drop, and provider option signal:
+`#87505`, `#23353`, `#77826`, and `#84872`.
+
+#### Gateway runtime
+
+- [Nodes and Remote Capabilities](inventory/gateway-runtime/node-transport-and-capability-relay.md): `84/63`
+
+Why: node pairing and remote node capability relay still harden the perimeter
+around the existing Gateway LTS promise and need separate operational proof.
+
+### Lower priority for LTS
+
+Observability should add Diagnostic Collection and
+Telemetry Export hardening after the runtime and channel priorities above.
+Plugin SDK should add Testing plugins, Packaging plugins, then Publishing
+plugins; this matters for ecosystem durability, but has weaker direct
+enterprise sentiment than Docker, Teams, Slack, and provider auth.
+
+Continue to defer mobile apps, voice, media generation, regional channels,
+iMessage, Matrix, WhatsApp, and long-tail providers unless a specific customer
+commitment changes the support boundary.
+
+## Interpretation
+
+This LTS slice is intentionally conservative. It promises enough for an
+enterprise to run a usable agent harness with Gateway, auth and policy,
+session/runtime execution, operational diagnostics, Linux hosting, Slack,
+Discord, Telegram, the OpenAI/Codex provider path, and tool execution controls.
+
+Categories outside this slice can keep shipping, but should not be part of the
+initial LTS guarantee until their owner, support boundary, upgrade behavior,
+and enterprise failure modes are explicitly accepted.
--- a/docs/maturity-scorecard/README.md
+++ b/docs/maturity-scorecard/README.md
@@ -0,0 +1,214 @@
+---
+title: Maturity scorecard process
+version: 3
+---
+
+# Maturity scorecard process
+
+This directory is an artifact root maintained by the local `claw-score` skill
+defined in the external `claw-score` `SKILL.md`.
+
+The skill owns scoring policy, scoring workflow, validation, artifact shape,
+and renderer expectations. This README is the human-facing directory contract
+and process overview.
+
+The top-level scorecard layout is owned by the skill template
+`.agents/skills/claw-score/references/maturity-scorecard-template.md`, then
+rendered into [maturity-scorecard.md](maturity-scorecard.md).
+
+Operationally, the skill separates three workflows: taxonomy maintenance, score
+computation, and skill self-maintenance. The detailed agent instructions for
+those live in the skill reference files, not in this README.
+
+## Source files
+
+- `taxonomy.yaml` is the source of truth for surfaces, maturity levels,
+  surface ids, category definitions, category `human_lts_override` values,
+  category `docs` reading lists, surface `completeness_instructions`, and
+  `last_score_run` provenance for the active in-repo surfaces.
+- `/Users/kevinlin/tmp/maturity/taxonomy.yaml` stores the archived taxonomy for
+  the other surfaces that are temporarily out of the active in-repo scope.
+- `<artifact-root>/<surface>/scores.yaml` is the per-surface score source for
+  Coverage, Quality, Completeness, and row identity (`name` and
+  `category_note`). The renderer joins taxonomy-owned category metadata from
+  `taxonomy.yaml`. Active artifact paths are derived by naming convention from
+  the taxonomy surface id: `inventory/<surface-id>/report.md`,
+  `inventory/<surface-id>/scores.yaml`, and `inventory/<surface-id>/<category-note>`.
+  Historical archived surfaces live at `/Users/kevinlin/tmp/maturity` and are
+  intentionally skipped by the normal `claw-score` render and sync workflows.
+- [maturity-scorecard.md](maturity-scorecard.md), [taxonomy.md](taxonomy.md),
+  [taxonomy-outline.md](taxonomy-outline.md), and
+  `<artifact-root>/<surface>/report.md` are rendered Markdown artifacts. Do not
+  hand-edit their generated tables.
+
+## Directory layout
+
+```text
+docs/kevinslin/maturity-scorecard/
+├── README.md
+├── taxonomy.md
+├── taxonomy-outline.md
+├── maturity-scorecard.md
+└── inventory/
+    ├── gateway-runtime/
+    │   ├── report.md
+    │   ├── <category>.md
+    │   └── scores.yaml
+    └── plugin-sdk-and-bundled-plugin-architecture/
+        ├── report.md
+        ├── <category>.md
+        └── scores.yaml
+```
+
+Interpret these files as follows:
+
+- `README.md`: human-facing process overview and artifact contract.
+- `taxonomy.md`: rendered taxonomy reference generated from the skill-owned
+  taxonomy YAML.
+- `taxonomy-outline.md`: rendered surface outline grouped by family, generated
+  from the skill-owned taxonomy YAML.
+- [maturity-scorecard.md](maturity-scorecard.md): rendered top-level scorecard generated from the
+  skill-owned taxonomy.
+- `inventory/`: canonical artifact root for active maturity-scorecard work.
+- `/Users/kevinlin/tmp/maturity`: archive location for historical artifact
+  trees and the archived taxonomy file. Treat it as out of scope unless
+  explicitly restoring archived work.
+- `<artifact-root>/<surface>/scores.yaml`: per-surface score source generated or
+  refreshed by the skill.
+- `<artifact-root>/<surface>/report.md`: rendered surface report.
+- `<artifact-root>/<surface>/<category>.md`: per-category evidence note.
+
+## Concepts
+
+- `taxonomy`: the skill-owned YAML file that defines the top-level maturity
+  model, surface inventory, per-surface category metadata, and `last_score_run`
+  state.
+- `scorecard`: the rendered top-level Markdown overview generated from the
+  taxonomy. Its generated table includes per-surface Coverage, Quality,
+  Completeness, and LTS status columns derived from `scores.yaml` plus
+  taxonomy `human_lts_override` metadata.
+- `taxonomy doc`: the rendered Markdown reference view of the taxonomy,
+  including the surface inventory and per-surface categories.
+- `taxonomy outline`: the rendered Markdown outline of active surfaces grouped
+  by family.
+- `surface`: one scored product or platform area from the taxonomy.
+- `surface slug`: the stable filesystem-friendly identifier used for a
+  surface's inventory directory and filenames.
+- `artifact root`: the per-surface parent directory selected in taxonomy
+  naming convention. Active work currently uses `inventory/<surface-id>/`;
+  archived surfaces are marked in taxonomy with `archived: true`.
+- `category`: a significant user-facing or operator-facing part of a surface
+  that gets its own evidence note and row in the per-surface score YAML. A
+  category should represent a capability area a user can actually utilize, not
+  an internal implementation bucket.
+- `category note`: the per-category Markdown evidence artifact
+  `<artifact-root>/<surface>/<category>.md`. Notes include a taxonomy-derived
+  `## Features` section that mirrors the category feature list from
+  `taxonomy.yaml`.
+- `scores.yaml`: the canonical per-surface score source
+  `<artifact-root>/<surface>/scores.yaml`; it stores Coverage, Quality,
+  Completeness, and row identity, while taxonomy owns features, docs, search
+  anchors, `human_lts_override`, and surface-level
+  `completeness_instructions`.
+- `LTS.md`: hand-curated initial LTS slice. Its status rows must stay
+  synchronized with taxonomy `human_lts_override` values and rendered
+  per-surface report matrix LTS cells by running
+  `.agents/skills/claw-score/scripts/validate_lts_sync.py`.
+- `completeness_instructions`: taxonomy-owned surface metadata pointing to a
+  skill-relative rubric file under `.agents/skills/claw-score/` that explains
+  how to score Completeness for that surface.
+- `features`: taxonomy-owned category metadata stored as objects with `name`
+  and `description`. Keep `name` short and scannable; put the fuller
+  explanation in `description`. A feature should be a user-invokable
+  capability for that surface/category, not a handshake step or other
+  implementation-only detail.
+- `docs`: taxonomy-owned category metadata listing repo-relative doc URLs that
+  best cover the category. Keep this as a short primary-reading list, not a
+  full evidence dump. During taxonomy maintenance, this list should be chosen
+  by scanning the OpenClaw docs corpus for the category and selecting the
+  canonical pages a reviewer should open first.
+- `surface report`: the rendered per-surface Markdown report
+  `<artifact-root>/<surface>/report.md`.
+
+Category display names should be short, operator-facing capability names.
+Prefer fewer coarser categories, merge related concepts that share docs and
+operator workflows, and keep old or implementation-heavy terminology in
+`search_anchors`, feature descriptions, or evidence rather than in the display
+name.
+
+## Versioning
+
+Markdown scorecard artifacts use frontmatter `version` for the scoring process
+that produced that document.
+
+During a real rescore, the surface report and category notes should have
+frontmatter `version` equal to the active `scores.yaml process_version`.
+
+YAML sources use:
+
+- `version`: schema version for the file shape. This starts at `1`.
+- `process_version`: scoring process version. Current scoring runs use `3`.
+
+Do not bulk-update existing per-surface `last_score_run.process_version` or
+`scores.yaml process_version` for render-only, taxonomy-only, or mechanical doc
+changes. Update a surface's scoring provenance when that surface is actually
+rescored with refreshed evidence.
+
+## LTS
+
+LTS is generated, not scored by category agents.
+
+The renderer marks a category as LTS when either condition is true:
+
+- `quality > 80 and coverage > 90`
+- the matching taxonomy category sets `human_lts_override: true`
+
+Keep `human_lts_override` in `taxonomy.yaml`. Do not write it into
+`scores.yaml`.
+
+## Regeneration
+
+Use the skill scripts from the repository root:
+
+```bash
+python3 .agents/skills/claw-score/scripts/sync_taxonomy_categories.py \
+  --taxonomy .agents/skills/claw-score/taxonomy.yaml \
+  --scorecard-root docs/kevinslin/maturity-scorecard
+
+python3 .agents/skills/claw-score/scripts/sync_scores_yaml.py \
+  --taxonomy .agents/skills/claw-score/taxonomy.yaml \
+  --scorecard-root docs/kevinslin/maturity-scorecard
+
+python3 .agents/skills/claw-score/scripts/render_taxonomy_from_taxonomy.py \
+  --taxonomy .agents/skills/claw-score/taxonomy.yaml \
+  --taxonomy-doc docs/kevinslin/maturity-scorecard/taxonomy.md \
+  --taxonomy-outline-doc docs/kevinslin/maturity-scorecard/taxonomy-outline.md
+
+python3 .agents/skills/claw-score/scripts/render_scorecard_from_taxonomy.py \
+  --taxonomy .agents/skills/claw-score/taxonomy.yaml \
+  --scorecard docs/kevinslin/maturity-scorecard/maturity-scorecard.md
+```
+
+Use each command's `--check` mode before handoff when verifying artifacts.
+
+If the skill's renderers, sync scripts, or templates change, rerun the relevant
+commands above and update this README in the same change when the artifact
+contract or regeneration guidance changes.
+
+## Editing rules
+
+- For scoring, rescoring, audits, taxonomy changes, report regeneration, or
+  output-shape changes, use `claw-score`.
+- When updating the `claw-score` skill itself, update the relevant source
+  files under `.agents/skills/claw-score/` and keep this README aligned with
+  any artifact-contract, terminology, or regeneration changes.
+- Do not hand-edit generated tables or inventories in `taxonomy.md` or
+  `taxonomy-outline.md`; rerender them through the skill scripts.
+- Do not hand-edit generated score tables in `maturity-scorecard.md` or
+  `<artifact-root>/<surface>/report.md`; rerender them through the skill
+  scripts. That includes the report's feature lists, which are rendered from
+  taxonomy.
+- Do not hand-edit taxonomy-derived `## Features` sections in category notes;
+  update `taxonomy.yaml` and rerender the owning surface report instead.
+- Keep agent instructions in the external `claw-score` `SKILL.md`, not in this
+  directory.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/agent-turn-orchestration-and-runtime-lifecycle.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/agent-turn-orchestration-and-runtime-lifecycle.md
@@ -0,0 +1,89 @@
+---
+title: "Agent Runtime - Agent Turn Execution Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Agent Runtime - Agent Turn Execution Maturity Note
+
+## Summary
+
+Agent turns have a first-class runtime lifecycle: docs explain gateway/embedded starts, queueing, session locks, event streams, timeouts, and early termination; source centralizes turn execution in `runAgentTurnWithFallback`; tests exercise fallback orchestration, aborts, lifecycle backstops, event delivery, and runtime telemetry. Quality is Beta because archive evidence still shows recent empty/failed replies and timeout edge cases around long-running or restarted embedded turns.
+
+## Category Scope
+
+This category covers user/operator-visible turn execution: starting an agent turn, choosing gateway versus embedded runtime, establishing session/run ids, applying queue locks, bridging events, honoring aborts, timing provider/model work, and emitting terminal outcomes.
+
+## Features
+
+- Turn startup and runtime choice: Starting an agent turn and choosing gateway versus embedded runtime execution.
+- Session and run coordination: Establishing session and run ids, queue locks, and related execution coordination.
+- Abort and terminal outcomes: Honoring aborts, timing provider/model work, and emitting terminal outcomes.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Stable (82%)`
+
+Coverage is broad across concepts, CLI docs, source, and tests. The remaining coverage gap is direct scenario proof for every runtime restart/timeout path per provider release.
+
+## Quality Score
+
+- Score: `Beta (74%)`
+
+The lifecycle has strong guardrails and diagnostics, but recent operational reports still show terminal-empty replies, restart recovery cases, and timeout-sensitive local/embedded runs that need clearer operator recovery behavior.
+
+## Completeness Score
+
+- Score: `Stable (82%)`
+- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Turn startup and runtime choice, Session and run coordination, Abort and terminal outcomes.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Some runtime failure modes are documented through tests and archived issues rather than a single operator-facing troubleshooting guide.
+- Long-running local or external runtime turns still appear sensitive to timeout configuration.
+- Archive searches found little direct GitHub issue coverage for the narrow `agent.wait` and embedded fallback terms, so Discord evidence carries more of the field-signal burden.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/concepts/agent-loop.md` documents the agent RPC shape, `agentCommand`, `runEmbeddedAgent`, event bridge, `agent.wait`, queueing/session locks, streaming/tool/final payload behavior, event streams, timeouts, and early termination reasons.
+- `/Users/kevinlin/code/openclaw/docs/cli/agent.md` documents running an agent turn via Gateway, model/thinking/local/deliver/timeout options, local preload behavior, gateway timeout fallback session/run ids, and SIGTERM/SIGINT `chat.abort`.
+- `/Users/kevinlin/code/openclaw/docs/concepts/agent-runtimes.md` explains runtimes versus providers/model/channel, embedded harnesses, CLI backends, Codex surfaces, runtime ownership, runtime selection, and fail-closed explicit runtimes.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.ts` imports `runEmbeddedAgent`, `runWithModelFallback`, runtime provider resolution, and outcome planning; it implements turn timing, context window resolution, `runAgentTurnWithFallback`, fallback candidate auth/profile setup, live model switches, run diagnostics, reply media, and compaction notices.
+- `/Users/kevinlin/code/openclaw/src/agents/cli-runner.ts` finalizes CLI context engine turns, persists approved CLI transcripts, and runs CLI agent turns through the same hook path.
+- `/Users/kevinlin/code/openclaw/packages/agent-core/src/agent-loop.test.ts` anchors EventStream failure handling in the lower-level agent loop package.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers `runAgentTurnWithFallback`, abort signal propagation, queued fallback rechecks, fallback auth availability, CLI assistant event previews, lifecycle terminal backstops, gateway restart copy, external error formatting, live model switch restart/retry caps, and auth profile state on retries.
+- `/Users/kevinlin/code/openclaw/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts` covers spawned-session lifecycle cleanup, enough gateway request time, MCP cleanup, delete via `agent.wait`, timeout handling, account routing, and announce behavior.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/packages/agent-core/src/agent-loop.test.ts` covers EventStream error paths.
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` includes focused unit coverage for fallback orchestration, terminal result classification, empty result handling, and turn-level diagnostics.
+
+### Gitcrawl queries
+
+- `gitcrawl --json search issues -R openclaw/openclaw "agent loop gateway_timeout chat.abort embedded fallback"` returned no matching issues, suggesting the exact lifecycle query is not where field reports are clustered.
+- `gitcrawl --json search issues -R openclaw/openclaw "runAgentTurnWithFallback agent runner timeout"` returned no matching issues.
+- `gitcrawl --json search issues -R openclaw/openclaw "local model provider context timeout Ollama"` returned issues including #87642 on exposing `waitForRun` timeout for slow local LLMs, #86599 on local provider calls blocking the gateway event loop on Windows, and #74204 on memory embed timeout for local GGUF.
+
+### Discrawl queries
+
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "runEmbeddedAgent agent.wait"` returned no matches.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "agent.wait gateway_timeout embedded fallback"` returned no matches.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "model fallback decision"` returned recent discussions around openai-codex timeouts, fallback decisions, No API key fallback decisions, OpenRouter timeout decisions, missing bearer logs, and repeated fallback errors in session repair loops.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/cli-harnesses-external-runtimes-and-subagents.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/cli-harnesses-external-runtimes-and-subagents.md
@@ -0,0 +1,94 @@
+---
+title: "Agent Runtime - External Runtimes and Subagents Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Agent Runtime - External Runtimes and Subagents Maturity Note
+
+## Summary
+
+OpenClaw treats external runtimes as a first-class execution mode: docs separate providers from runtimes, explain Codex/OpenClaw/ACP/external harness ownership, document Claude CLI and Gemini CLI aliases, and describe subagent auth, delivery, cleanup, and recovery. Source bridges CLI transcripts and events through `runCliAgent`, and tests cover CLI previews, subagent sessions, lifecycle cleanup, and runtime override boundaries. Quality is Alpha because archives show recurring issues around `claude-cli`, ACP/subagent delivery, unsupported backend settings, trajectory artifacts, and auth propagation from main sessions.
+
+## Category Scope
+
+This category covers operator-visible execution outside the default embedded
+provider path: choosing external harnesses, using CLI runtime aliases, running
+subagent turns, and recovering from cleanup, timeout, or liveness issues in
+those external runtimes.
+
+## Features
+
+- External harness selection: Choosing Codex app-server, ACP, and other external runtime harnesses.
+- CLI runtime aliases: Runtime aliases and CLI-based execution paths such as Claude CLI and Gemini CLI.
+- Subagent turns: Spawning, delivering, and announcing subagent work outside the default embedded path.
+- Runtime recovery: Cleanup, timeout, and liveness behavior for external runtimes and subagents.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Beta (78%)`
+
+Coverage is good for Codex/CLI/subagent workflows, but external harnesses and ACP have less uniform proof than the embedded runtime.
+
+## Quality Score
+
+- Score: `Alpha (66%)`
+
+CLI/subagent execution is functional but operationally fragile where backend-specific auth, tool permission boundaries, unsupported settings, and result delivery vary by runtime.
+
+## Completeness Score
+
+- Score: `Beta (78%)`
+- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for External harness selection, CLI runtime aliases, Subagent turns, Runtime recovery.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- External runtime behavior shifts with upstream CLI tools and needs more release-by-release proof.
+- Subagent UX and lifecycle semantics are well tested but still produce field reports around delivery, account routing, and parity.
+- Some CLI artifacts and diagnostics, such as pure `claude-cli` trajectories, have active gap reports.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/concepts/agent-runtimes.md` documents runtimes versus providers/model/channel, embedded harnesses versus CLI backends, Codex surfaces, runtime decision tree, ownership split, runtime selection, fail-closed explicit runtimes, CLI backend aliases, Claude CLI, OpenAI default to Codex harness, and compatibility contract.
+- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents Claude CLI setup, same-host requirement, canonical Anthropic refs with `agentRuntime.id: "claude-cli"`, legacy refs, and thinking defaults.
+- `/Users/kevinlin/code/openclaw/docs/providers/google.md` documents Gemini CLI OAuth setup, plugin capabilities, legacy aliases, and capability expectations.
+- `/Users/kevinlin/code/openclaw/docs/tools/subagents.md` documents subagent auth, announce behavior, delivery routing, sessions_history sanitation, subagent tool policy, concurrency, liveness, and recovery.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/src/agents/cli-runner.ts` persists approved CLI user turn transcripts, finalizes CLI context engine turns, invokes `before_agent_reply`, and runs CLI agent turns.
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.ts` resolves CLI runtime execution providers, handles runtime overrides, bridges CLI assistant events into previews, forwards runtime plan/approval/command/patch events, and enforces live switch retry caps.
+- `/Users/kevinlin/code/openclaw/src/agents/agent-tools.ts` applies subagent/inherited tool policy and configures tool execution boundaries used by spawned runtime sessions.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts` covers `sessions_spawn` lifecycle behavior, cleanup, enough gateway request time, MCP cleanup, delete via `agent.wait`, timeout handling, account routing, and announce behavior.
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers forwarding static extra prompts to CLI backends, prepared CLI user turns at the persistence boundary, no CLI session reuse for room-event turns, CLI assistant event previews, reasoning previews, CLI runtime override boundaries, and Codex app-server telemetry.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` includes focused tests for runtime override resolution, CLI preview bridging, external error formatting, gateway restart recovery copy, and live model switch retry caps.
+- `/Users/kevinlin/code/openclaw/src/agents/agent-tools.message-provider-policy.test.ts` covers provider-policy behavior that affects external runtime tool surfaces.
+
+### Gitcrawl queries
+
+- `gitcrawl --json search issues -R openclaw/openclaw "claude-cli codex cli harness subagent sessions_spawn"` returned #73097 on PI harness ignoring `cliBackends` configuration and splitting subagent execution from chat path.
+- `gitcrawl --json search issues -R openclaw/openclaw "openai-codex Anthropic Google provider tool call"` returned #80667 on `trajectory.jsonl` never being written for pure `claude-cli` sessions and #78196 on extension plugin loader behavior.
+- `gitcrawl --json search issues -R openclaw/openclaw "local model provider context timeout Ollama"` returned #81214 on an OpenClaw subagent regression and #87642 on exposing subagent-control `waitForRun` timeout for slow local LLMs.
+
+### Discrawl queries
+
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "sessions_spawn claude-cli"` returned Apr-May 2026 discussions about ACP runtime failures with Claude Opus settings, Claude CLI tool availability, tool permission boundaries/sandboxing, ACP/sub-agent relay UX, and subagent/ACP result delivery regressions.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "tool call streaming"` returned Claude CLI/WebChat tool visibility concerns and app-server watchdog discussions that affect external runtime delivery.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "No API key found provider openai-codex"` returned related Codex OAuth profile propagation and rebuild-recognition reports that affect external runtime/subagent sessions.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/hosted-provider-adapters-and-payload-compatibility.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/hosted-provider-adapters-and-payload-compatibility.md
@@ -0,0 +1,98 @@
+---
+title: "Agent Runtime - Hosted Provider Execution Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Agent Runtime - Hosted Provider Execution Maturity Note
+
+## Summary
+
+Hosted provider adapter coverage is solid for OpenAI/Codex, Anthropic, Google, and OpenAI-compatible routes. Docs explain provider-specific setup, thinking controls, OAuth/API-key distinctions, CLI runtime alternatives, and capability expectations. Source includes provider-specific message/tool/thinking conversion, timeout handling, websocket/SSE behavior, prompt-cache affinity, and tool-call normalization. Quality is Beta because hosted provider payload semantics still change quickly, especially for Codex OAuth routing, Anthropic streaming JSON, Google tool-call ids, and OpenAI-compatible tool behavior.
+
+## Category Scope
+
+This category covers operator-visible hosted provider execution: running turns
+against hosted providers, using provider-specific model options, exercising
+hosted tool use, applying reasoning or cache controls, and receiving streamed
+or final replies despite provider payload differences.
+
+## Features
+
+- Hosted provider turns: Running agent turns against hosted providers such as OpenAI, Anthropic, and Google.
+- Provider-specific model options: Provider-specific model parameters and runtime request settings exposed to users or operators.
+- Hosted tool use: Tool use behavior when the active runtime is a hosted provider.
+- Reasoning and cache controls: Provider-specific reasoning, thinking, and cache-related controls during hosted execution.
+- Hosted streaming and replies: Operator-visible streaming and reply behavior while hosted adapters normalize payload differences.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Beta (76%)`
+
+Coverage is good for the major providers, but OpenAI-compatible and fast-moving hosted provider variants still rely on scattered tests, docs, and archive evidence rather than a uniform compatibility table.
+
+## Quality Score
+
+- Score: `Beta (70%)`
+
+Adapters include many compatibility guards, but provider payload drift and streaming/tool-call quirks remain visible in archived issues and Discord reports.
+
+## Completeness Score
+
+- Score: `Beta (76%)`
+- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Hosted provider turns, Provider-specific model options, Hosted tool use, Reasoning and cache controls, Hosted streaming and replies.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Provider-specific tool-call and thinking semantics still need recurring live proof.
+- OpenAI-compatible hosted providers and route aliases have less systematic evidence than first-party routes.
+- Some adapter failures surface as generic fallback or missing-key errors, making operator diagnosis harder.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/providers/openai.md` documents OpenAI/Codex route distinctions, naming maps, capability tables, GPT-5.5/Codex app-server notes, Codex OAuth setup, and default agent routes.
+- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents Anthropic API key versus Claude CLI, canonical refs, legacy refs, thinking defaults, and prompt caching.
+- `/Users/kevinlin/code/openclaw/docs/providers/google.md` documents Google plugin capabilities, Gemini CLI OAuth, model refs, capabilities, and thinking/reasoning controls.
+- `/Users/kevinlin/code/openclaw/docs/concepts/models.md` documents model ref/runtime separation and fallback selection that provider adapters consume.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/src/llm/providers/openai-codex-responses.ts` implements Codex Responses transport setup, account id handling, body/header construction, timeout signals, websocket path, retryable error classification, and prompt-cache affinity.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.ts` implements Anthropic stream setup, request parameter construction, event handling, OAuth system prompt handling, thinking modes, tool id normalization, message transforms, fine-grained tool streaming beta, and tool conversion.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/google-shared.ts` implements Google thinking-part semantics, thought signature retention, tool-call id requirements, assistant text/thinking/tool-call conversion, and tool-result conversion.
+- `/Users/kevinlin/code/openclaw/src/agents/transport-stream-shared.ts` provides cross-provider transport stream sanitization, tool-call argument coercion, metadata merge, finalization, and error stream handling.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers OpenAI session runtime overrides, Codex app-server telemetry, external error formatting, missing custom tool output guidance, and Bedrock tool mismatch reset hints.
+- `/Users/kevinlin/code/openclaw/src/commands/models.list.e2e.test.ts` covers provider catalog rows, auth/local/provider behavior, and catalog responsiveness.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/src/llm/providers/openai-codex-responses.test.ts` covers account id decoding, transport timeouts, websocket/SSE behavior, timeout behavior, and prompt-cache affinity.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.test.ts` covers Anthropic provider auth and signed thinking replay.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/google-shared.test.ts` covers projecting text, thinking, tool calls, response ids, and usage.
+- `/Users/kevinlin/code/openclaw/src/agents/transport-stream-shared.test.ts` covers sanitization, non-empty tool payload text, headers, success streams, and failure cleanup.
+
+### Gitcrawl queries
+
+- `gitcrawl --json search issues -R openclaw/openclaw "openai-codex Anthropic Google provider tool call"` returned #80667 on pure `claude-cli` sessions missing `trajectory.jsonl` and #78196 on extension plugin loader behavior.
+- `gitcrawl --json search issues -R openclaw/openclaw "tool call streaming truncated tool_call provider"` returned #60593 on recurring Anthropic streaming JSON parse errors, #70033 on tool calls emitting empty `{}` arguments for large content, and #87711 on empty assistant delivery.
+- `gitcrawl --json search prs -R openclaw/openclaw "provider error descriptors fallback rate limit"` returned PR #86642 adding structured provider error descriptors.
+
+### Discrawl queries
+
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "tool call streaming"` returned discussions about native progress callbacks, provider streaming/tool-call wrapping, visible tool-call blocks, app-server idle watchdog behavior, Claude CLI/WebChat tool visibility, and Telegram progress modes.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "openai-codex provider routing"` returned reports about OpenAI OAuth/Codex routing, direct OpenAI Responses path drift, stale persisted route state, and old config/runtime pins.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "model fallback decision"` returned hosted-provider timeout and fallback discussions, including OpenRouter timeouts and missing bearer failures.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/local-and-self-hosted-provider-execution.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/local-and-self-hosted-provider-execution.md
@@ -0,0 +1,91 @@
+---
+title: "Agent Runtime - Local and Self-hosted Providers Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Agent Runtime - Local and Self-hosted Providers Maturity Note
+
+## Summary
+
+Local and self-hosted provider execution is documented and implemented, with especially detailed Ollama guidance for native `/api/chat`, OpenAI-compatible `/v1`, local markers, auth profile format, tool-support flags, lean profiles, context windows, timeouts, and live smoke commands. Coverage is Beta because it is concentrated in Ollama/local-model docs and command behavior. Quality is Alpha because archive evidence shows local models still struggle with tool calling, cold-start timeouts, raw JSON/tool text, and event-loop blocking.
+
+## Category Scope
+
+This category covers local and self-hosted execution paths visible to users/operators: Ollama, OpenAI-compatible local servers, local model profile configuration, tool-capability flags, timeouts, context windows, local image/model smoke checks, and local provider failure handling.
+
+## Features
+
+- Local provider profiles: Local model profile configuration for Ollama and OpenAI-compatible local servers.
+- Tool-capability flags: Local provider capability flags and behavior for tool use.
+- Timeouts and context windows: Local provider timeout and context-window configuration.
+- Local smoke checks: Local image and model smoke checks visible to operators.
+- Local failure handling: Operator-facing failure handling for local and self-hosted providers.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Beta (70%)`
+
+Coverage is useful and operator-facing, but it is uneven across local backends and less uniformly tested than hosted providers.
+
+## Quality Score
+
+- Score: `Alpha (60%)`
+
+Local execution is workable but still fragile in practice: model tool-calling quality, cold starts, context limits, local server blocking, and OpenAI-compatible mode quirks remain recurring issues.
+
+## Completeness Score
+
+- Score: `Beta (70%)`
+- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Local provider profiles, Tool-capability flags, Timeouts and context windows, Local smoke checks, Local failure handling.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Local provider evidence is strongly Ollama-centered; other local/self-hosted runtimes need the same level of scenario proof.
+- Tool-calling behavior depends heavily on model capability and provider mode.
+- Timeout guidance exists, but operator defaults still produce reports for slow local LLMs.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/providers/ollama.md` documents native `/api/chat`, `/v1` warnings, raw tool JSON behavior, local auth rules, provider IDs, `models list`, exact `/model ollama` failure behavior, endpoint preflight, live test command, custom base URLs, `compat.supportsTools: false`, `localModelLean`, `timeoutSeconds`, OpenAI-compatible mode tool/streaming reliability warnings, context windows, streaming/tool-calling/thinking support, garbled output handling, and cold local model timeouts.
+- `/Users/kevinlin/code/openclaw/docs/concepts/models.md` documents local/GGUF refs, model allowlists, and runtime-independent model refs.
+- `/Users/kevinlin/code/openclaw/docs/cli/agent.md` documents `--local`, timeout options, and embedded fallback behavior for local agent runs.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/src/agents/agent-tools.ts` applies model-provider tool policy and suppresses tools such as web search for local lean profiles.
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/model-selection.ts` resolves model allowlists, local refs, thinking/reasoning settings, and context token limits used by local providers.
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.ts` handles context-window hints, local/embedded runtime fallback behavior, provider timeout copy, and local model retry/fallback interactions.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/commands/models.list.e2e.test.ts` covers local/provider behavior in model catalog/status output.
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers context-aware reserve token floors, overflow recovery text, local/runtime fallback interactions, model capacity copy, and timeout/fallback diagnostics relevant to local providers.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/src/agents/agent-tools.message-provider-policy.test.ts` covers provider-based tool policy behavior.
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` includes unit-style coverage for context windows, provider fallback state, and timeout/failure copy.
+
+### Gitcrawl queries
+
+- `gitcrawl --json search issues -R openclaw/openclaw "Ollama vLLM SGLang LM Studio tool calling"` returned no matches for the exact backend set.
+- `gitcrawl --json search issues -R openclaw/openclaw "local model provider context timeout Ollama"` returned #87642 on exposing `waitForRun` timeout for slow local LLMs, #86599 on local model provider calls blocking the gateway event loop on Windows, #74204 on memory embed timeout for local GGUF, #81214 on subagent regression, and #65502 on resilient model fallback with retry and safe mode.
+- `gitcrawl --json search prs -R openclaw/openclaw "Ollama native tool calling streaming"` returned no matching PRs.
+
+### Discrawl queries
+
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "Ollama tool calling OpenClaw"` returned guidance that some local models are poor at tool calling, user questions about local model limitations and tool use, maintainer guidance that raw tools printed as text indicate model/tool-calling compatibility problems, and comments closing issues around local backend support and Ollama `/v1` misconfiguration.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "models list provider routing fallback"` returned user-helping-user guidance on Ollama provider versus session/tool pressure and local/custom provider handling.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "usage limit fallback openai-codex"` included adjacent operator discussions about fallback configuration, useful as contrast but not primary local-provider evidence.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/model-selection-provider-routing-and-runtime-policy.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/model-selection-provider-routing-and-runtime-policy.md
@@ -0,0 +1,92 @@
+---
+title: "Agent Runtime - Model and Runtime Selection Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Agent Runtime - Model and Runtime Selection Maturity Note
+
+## Summary
+
+Model selection and provider routing are among the most mature parts of this surface. Docs explain model refs, configured defaults, user-selected strict refs, provider fallbacks, auth-profile fallbacks, `/model`, runtime overrides, and thinking/context policy. Source centralizes most state in `createModelSelectionState`, and tests cover model list/set behavior plus fallback/retry routing. Quality is Beta because archive evidence shows recent drift around Codex OAuth routes, stale `openai-codex` refs, and per-session/provider fallback state.
+
+## Category Scope
+
+This category covers selecting a model/provider/runtime for an agent turn, honoring user and config choices, resolving thinking/context settings, handling runtime provider overrides, and preserving or clearing invalid route state.
+
+## Features
+
+- Model reference selection: Selecting the model reference for an agent turn from user or configured defaults.
+- Provider and runtime overrides: Handling provider selection and runtime overrides for a turn.
+- Thinking and context settings: Resolving thinking and context settings as part of model selection.
+- Invalid route recovery: Preserving or clearing invalid route state when selections drift or fail.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Stable (84%)`
+
+Coverage is strong across model docs, CLI command docs, provider docs, source, and e2e tests for model list/set and fallback normalization.
+
+## Quality Score
+
+- Score: `Beta (72%)`
+
+Routing behavior is explicit and defensive, but quality is pulled down by recent operator-visible route repair, stale auth/provider refs, and fallback stickiness reports.
+
+## Completeness Score
+
+- Score: `Stable (84%)`
+- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Model reference selection, Provider and runtime overrides, Thinking and context settings, Invalid route recovery.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Operator recovery for stale provider/runtime refs is spread across doctor behavior, provider docs, and error copy.
+- Runtime policy is strong for common providers but field reports show route drift when Codex OAuth, custom provider IDs, and fallback profiles interact.
+- The system needs recurring release-level scenario proof for route repair and fallback reset behavior.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/concepts/models.md` documents model refs versus runtime, selection order, provider/auth fallbacks, configured defaults, auto fallback selections, strict user session selections, model allowlists, local/GGUF refs, `/model` switching, live switching, and strict selected refs.
+- `/Users/kevinlin/code/openclaw/docs/cli/models.md` documents `models list`, `models set`, status/probe options, catalog/auth columns, provider catalog responsiveness, ref parsing/fallback, auth profiles, login, paste-api-key, and OpenAI API versus ChatGPT/OAuth routing.
+- `/Users/kevinlin/code/openclaw/docs/providers/openai.md` documents OpenAI/Codex route distinctions, naming map, GPT-5.5/Codex app-server repair notes, capability tables, and the default OpenAI agent route summary.
+- `/Users/kevinlin/code/openclaw/docs/concepts/agent-runtimes.md` documents runtime selection, fail-closed explicit runtimes, CLI backend aliases, and OpenAI defaulting to the Codex harness.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/model-selection.ts` implements provider/model initialization, allowlists, catalog visibility policy, direct stored override handling, stale legacy `openai-codex` override clearing, auth profile override validation, thinking/reasoning resolution, and context token resolution.
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.ts` applies fallback candidate auth profiles, live model switches, runtime config, runtime provider resolution, and retry state.
+- `/Users/kevinlin/code/openclaw/src/agents/configured-provider-fallback.ts` defines configured fallback behavior for provider selection.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/commands/models.set.e2e.test.ts` covers model setting and fallback normalization.
+- `/Users/kevinlin/code/openclaw/src/commands/models.list.e2e.test.ts` covers model list/status, catalog/auth/local/provider behavior, and provider visibility.
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers fallback rechecks, stale queued probe dropping after user model switches, preserving and re-persisting fallback origins, CLI runtime override boundaries, model capacity errors, live model switch restarts, and retry-loop caps.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` includes focused tests for provider/model fallback retry state, auth profile preservation, dropping `authProfileId` when fallback switches providers, and same-provider auth profile fallback.
+- `/Users/kevinlin/code/openclaw/src/commands/models.auth.provider-resolution.test.ts` covers auth-provider resolution behavior for model commands.
+
+### Gitcrawl queries
+
+- `gitcrawl --json search issues -R openclaw/openclaw "models list model selection fallback auth profile provider"` returned #59168 on using `provider/name` as the internal model key, #83954 on Pro-plan paths for `gpt-5.5-pro` and retired Spark via Codex CLI/app-server, and #70055 on disabling external CLI sync for auth profiles via config.
+- `gitcrawl --json search issues -R openclaw/openclaw "No API key found provider openai-codex auth profile"` returned stale route and Codex OAuth issues including #86470 on doctor rewriting `openai-codex/*` to `openai/*`, #83223 on migrated routes still looking up `openai-codex` auth before fallback, and #86820 on compaction falling back to direct OpenAI API.
+- `gitcrawl --json search issues -R openclaw/openclaw "rate limit fallback usage limit openai-codex"` returned #85103 on provider-wide quota fallback not triggering, #87467 on auto rate-limit fallback staying pinned after primary recovery, #79604 on rotating auth profiles before provider fallback, and #79611 on active-memory plugin failover.
+
+### Discrawl queries
+
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "models list provider routing fallback"` returned a May 16 beta announcement emphasizing Codex app-server reliability, progress timeouts, compaction handling, tool policy enforcement, OAuth fallback, local/custom providers, and guidance on Ollama provider pressure, vision routing, CLI crashes, and per-turn model routing with `before_model_resolve`.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "openai-codex provider routing"` returned maintainer notes around OpenAI OAuth/Codex routing, `openai-codex` being load-bearing in auth profile resolution, compaction routing, context config, auth order, and stale persisted route state.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "model fallback decision"` returned recent discussions about openai-codex timeouts, fallback decisions, No API key fallback decisions, OpenRouter timeouts, and session repair loops.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/provider-auth-profiles-and-credential-health.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/provider-auth-profiles-and-credential-health.md
@@ -0,0 +1,113 @@
+---
+title: "Agent Runtime - Provider Auth Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Agent Runtime - Provider Auth Maturity Note
+
+## Summary
+
+Provider auth is broad enough to cover setup, selection, health checks, fallback, and operator-facing diagnostics in one category. Docs explain API keys, OAuth, provider/auth-profile fallbacks, status and probe output, stale-route repair, and restart guidance. Source validates provider/profile compatibility, carries fallback candidate state, classifies structured provider failures, and formats missing-key, OAuth-refresh, capacity, and restart recovery guidance. Quality remains Alpha because archive evidence still shows repeated operator failures around Codex OAuth route repair, profile propagation, quota fallback semantics, sticky fallback state, and provider key discovery.
+
+## Category Scope
+
+This category covers provider credentials, auth profile health, and operator-visible provider recovery behavior: login and paste-key flows, provider auth profile selection, doctor and status repair, auth failover, provider fallback chains, quota and capacity recovery, missing-key and OAuth guidance, restart and stale-route hints, structured diagnostics, subagent credential propagation, and credential-related runtime errors.
+
+## Features
+
+- Login and API-key setup: Login, OAuth, and paste-key flows for provider access.
+- Auth profile selection: Selecting and validating provider auth profiles.
+- Credential health checks: Doctor, status, and related credential health checks and repair signals.
+- Auth failover: Same-provider and cross-profile auth fallback behavior.
+- Provider fallback recovery: Provider and auth-profile fallback behavior when execution fails.
+- Rate-limit and capacity recovery: Recovery paths for quota, capacity, and rate-limit failures.
+- Missing-key and OAuth guidance: Operator guidance for missing keys, expired OAuth state, and related auth failures.
+- Restart and stale-route recovery: Recovery from stale route state, restart requirements, and related provider drift.
+- Structured provider diagnostics: Structured provider errors and diagnostics delivered into logs or agent replies.
+- Subagent credential propagation: Propagating provider credentials into subagent and delegated runtime flows.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Stable (80%)`
+
+Coverage is strong for OpenAI/Codex, Anthropic, Google, model commands, fallback state, and operator-facing recovery copy, but provider auth and diagnostics still span many flows and are not yet represented by a single end-to-end operator proof matrix.
+
+## Quality Score
+
+- Score: `Alpha (66%)`
+
+Auth/profile behavior remains a frequent operational pain point, especially where Codex OAuth, direct OpenAI API routes, compaction, subagents, doctor repair, and quota fallback behavior overlap.
+
+## Completeness Score
+
+- Score: `Stable (80%)`
+- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Login and API-key setup, Auth profile selection, Credential health checks, Auth failover, Provider fallback recovery, Rate-limit and capacity recovery, Missing-key and OAuth guidance, Restart and stale-route recovery, Structured provider diagnostics, Subagent credential propagation.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Codex OAuth route repair still produces recent open GitHub and Discord reports.
+- Subagent and compaction flows can lose or reinterpret auth profile state.
+- Quota-wide and account-specific provider failures need clearer fallback semantics.
+- Recovery from stale `openai-codex` route state still depends on doctor repair and explicit guidance.
+- Some missing-key and fallback diagnostics are strong in tests but still too hard for operators to map to root cause.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/cli/models.md` documents model status/auth overview, Codex OAuth troubleshooting, auth profile listing, login, paste-api-key, OpenAI API versus ChatGPT/OAuth, and Claude CLI notes.
+- `/Users/kevinlin/code/openclaw/docs/concepts/models.md` documents primary model selection, fallbacks, provider auth failover, auto fallback selections, strict user selections, and live model switching.
+- `/Users/kevinlin/code/openclaw/docs/cli/agent.md` documents gateway fallback behavior, embedded fallback metadata, gateway timeout fallback session/run id, and SIGTERM/SIGINT `chat.abort`.
+- `/Users/kevinlin/code/openclaw/docs/providers/openai.md` documents `openai`, `openai-codex`, Codex plugin and `agentRuntime` naming, OpenAI/Codex route selection, Codex OAuth setup, and doctor repair behavior.
+- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents API key versus Claude CLI authentication and canonical Anthropic refs with `agentRuntime.id: "claude-cli"`.
+- `/Users/kevinlin/code/openclaw/docs/providers/google.md` documents Google plugin auth, Gemini CLI OAuth setup, and warning/alias behavior.
+- `/Users/kevinlin/code/openclaw/docs/tools/subagents.md` documents subagent auth resolution by agent id and fallback to main profiles.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/model-selection.ts` validates auth profile overrides against accepted auth providers, clears invalid overrides, and handles stale legacy `openai-codex` state.
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.ts` carries fallback candidate auth profile state, applies live model switch auth changes, preserves same-provider auth fallback, and drops auth profile ids when switching providers.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/openai-codex-responses.ts` classifies retryable errors and configures timeout/retry behavior for Codex Responses transport.
+- `/Users/kevinlin/code/openclaw/src/agents/transport-stream-shared.ts` builds structured failure streams with error details.
+- `/Users/kevinlin/code/openclaw/src/commands/auth-choice.apply.api-providers.test.ts` maps API key/token provider choices for auth flows.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/commands/models.list.e2e.test.ts` covers catalog auth/status presentation for providers.
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers OAuth refresh failure guidance, missing API key guidance, stale `openai-codex` missing-key failures pointing at doctor repair, auth profile state on retries, provider-switch auth profile dropping, and same-provider auth fallback.
+- `/Users/kevinlin/code/openclaw/src/commands/models.set.e2e.test.ts` covers fallback normalization in model command behavior.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/src/commands/auth-choice.apply.api-providers.test.ts` covers auth choice mapping for API key/token providers.
+- `/Users/kevinlin/code/openclaw/src/commands/models.auth.provider-resolution.test.ts` covers provider auth resolution for model commands.
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` includes focused auth-profile regression coverage.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/openai-codex-responses.test.ts` covers transport timeouts and websocket/SSE behavior feeding retry decisions.
+- `/Users/kevinlin/code/openclaw/src/agents/transport-stream-shared.test.ts` covers failure cleanup and non-empty failure streams.
+
+### Gitcrawl queries
+
+- `gitcrawl --json search issues -R openclaw/openclaw "No API key found provider openai-codex auth profile"` returned many active issues, including #84252 on doctor/status leaving `openai-codex` OAuth sidecar auth partially repaired, #87677 on memory embeddings through Codex OAuth runtime, #86470 on doctor rewriting `openai-codex/*` to `openai/*`, #85797 on image generation requiring an API key despite OAuth, #86820 on compaction falling back to direct OpenAI API, #87051 on OAuth profile not propagating to subagent sessions, #83223 on migrated routes still looking up `openai-codex` auth before fallback, and #80171 on runtime parity QA.
+- `gitcrawl --json search issues -R openclaw/openclaw "openai-codex Anthropic Google provider tool call"` returned #80667 on missing `trajectory.jsonl` for pure `claude-cli` sessions and #78196 on extension plugin loader behavior.
+- `gitcrawl --json search issues -R openclaw/openclaw "provider error guidance reauth fallback"` returned no direct matches.
+- `gitcrawl --json search issues -R openclaw/openclaw "rate limit fallback usage limit openai-codex"` returned #85103 on model fallback chain not triggering for provider-wide quota exhaustion, #87467 on auto rate-limit fallback staying pinned to fallback after primary recovery, #79604 on rotating auth profiles within a candidate before next provider, and #79611 on active-memory plugin provider failover and timeout.
+- `gitcrawl --json search prs -R openclaw/openclaw "provider error descriptors fallback rate limit"` returned #86642 adding structured provider error descriptors.
+- `gitcrawl --json search prs -R openclaw/openclaw "agent runner fallback model switch"` returned PRs including #85235 on message-tool-only diagnostics, #80482 on cooldown inline API key billing failures, #62682 on terminal abort versus retryable failures, and #86089 on restart recovery replies.
+
+### Discrawl queries
+
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "No API key found provider openai-codex"` returned May 2026 reports around OpenAI OAuth/Codex routing, plugin errors with `No API key found for provider "openai-codex"`, existing Codex auth no longer recognized after rebuild, direct API routing failures, and users seeing missing OpenAI keys despite Codex OAuth.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "reauth provider auth profile"` returned Codex auth refresh/persistence reports, scope issues, stale auth order, token rotation failures, and older reauth command confusion.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "openai-codex provider routing"` returned maintainer/user notes about auth profile resolution, compaction routing, context config, auth order, stale route state, and doctor repair guidance.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "usage limit fallback openai-codex"` returned discussions about Claude CLI usage/billing fallback losing context, multi-account Codex OAuth failover, OpenAI rate limit auth/provider guidance, Codex backend challenge/limit paths, rate-limit/account-id errors, fallback configs, and model failover being blocked or sticky.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "model fallback decision"` returned recent fallback decision logs for openai-codex timeouts, No API key cases, OpenRouter timeouts, missing bearer errors, Anthropic empty responses, and session repair loops.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/report.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/report.md
@@ -0,0 +1,328 @@
+---
+title: "Agent Runtime Maturity Report"
+version: 3
+last_refreshed: 2026-05-31
+last_refreshed_by: codex
+---
+
+# Agent Runtime Maturity Report
+
+## Top-level scores
+
+These rollups are simple arithmetic means over the category-note numeric
+scores in
+`scores.yaml`. Percentages are rounded to the nearest whole number.
+
+- Coverage: `Stable (80%)`
+- Quality: `Alpha (69%)`
+- Completeness: `Stable (80%)`
+- LTS Features: `6/9`
+
+## Summary
+
+This report promotes the archived `agent-runtime-and-provider-execution` maturity evidence from `/Users/kevinlin/tmp/maturity/agent-runtime-and-provider-execution` into the current process-version-3 inventory contract.
+
+The category Coverage and Quality scores come from the archived evidence-backed score rows. Completeness is initialized from the same archived evidence breadth and known-gap record, then joined with the surface-specific completeness rubric referenced by taxonomy.
+
+## Matrix
+
+| Category                                                                              | LTS | Coverage       | Quality       | Completeness   | Features to evaluate                                                                                                                                                                                                                                                                       |
+| ------------------------------------------------------------------------------------- | --- | -------------- | ------------- | -------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| [Agent Turn Execution](agent-turn-orchestration-and-runtime-lifecycle.md)             | ✅  | `Stable (82%)` | `Beta (74%)`  | `Stable (82%)` | Turn startup and runtime choice, Session and run coordination, Abort and terminal outcomes                                                                                                                                                                                                 |
+| [External Runtimes and Subagents](cli-harnesses-external-runtimes-and-subagents.md)   | ❌  | `Beta (78%)`   | `Alpha (66%)` | `Beta (78%)`   | External harness selection, CLI runtime aliases, Subagent turns, Runtime recovery                                                                                                                                                                                                          |
+| [Hosted Provider Execution](hosted-provider-adapters-and-payload-compatibility.md)    | ✅  | `Beta (76%)`   | `Beta (70%)`  | `Beta (76%)`   | Hosted provider turns, Provider-specific model options, Hosted tool use, Reasoning and cache controls, Hosted streaming and replies                                                                                                                                                        |
+| [Local and Self-hosted Providers](local-and-self-hosted-provider-execution.md)        | ❌  | `Beta (70%)`   | `Alpha (60%)` | `Beta (70%)`   | Local provider profiles, Tool-capability flags, Timeouts and context windows, Local smoke checks, Local failure handling                                                                                                                                                                   |
+| [Model and Runtime Selection](model-selection-provider-routing-and-runtime-policy.md) | ✅  | `Stable (84%)` | `Beta (72%)`  | `Stable (84%)` | Model reference selection, Provider and runtime overrides, Thinking and context settings, Invalid route recovery                                                                                                                                                                           |
+| [Provider Auth](provider-auth-profiles-and-credential-health.md)                      | ✅  | `Stable (80%)` | `Alpha (66%)` | `Stable (80%)` | Login and API-key setup, Auth profile selection, Credential health checks, Auth failover, Provider fallback recovery, Rate-limit and capacity recovery, Missing-key and OAuth guidance, Restart and stale-route recovery, Structured provider diagnostics, Subagent credential propagation |
+| [Streaming and Progress](streaming-progress-and-preview-visibility.md)                | ❌  | `Stable (84%)` | `Beta (70%)`  | `Stable (84%)` | Streaming replies, Progress visibility                                                                                                                                                                                                                                                     |
+| [Tool Calls and Response Handling](streaming-tool-call-and-response-normalization.md) | ✅  | `Stable (80%)` | `Alpha (66%)` | `Stable (80%)` | Tool-call handling, Usage and response reporting, Failure recovery                                                                                                                                                                                                                         |
+| [Tool Execution Controls](tool-execution-approvals-and-sandbox-policy.md)             | ✅  | `Stable (86%)` | `Beta (74%)`  | `Stable (86%)` | Tool availability rules, Sandboxed exec behavior, Approval flow, Elevated execution, Tool safety controls, Delegated tool access                                                                                                                                                           |
+
+## Scoring rubric
+
+- Coverage:
+  maturity-label rating for integration, e2e, live, or server/runtime flow
+  evidence across the category. Unit tests can provide supporting context but never make a
+  feature covered by themselves.
+- Quality:
+  maturity-label rating for implementation and operational robustness. Unit,
+  integration, e2e, live, and real runtime-flow test coverage are Coverage
+  inputs only; they do not raise or lower Quality.
+- Completeness:
+  maturity-label rating for how fully the category delivers the intended
+  surface-specific capability set. Use the taxonomy-linked completeness
+  instructions for this surface.
+- LTS:
+  calculated as `quality > 80 and coverage > 90`, or when the matching
+  taxonomy category sets `human_lts_override`.
+- Shared score bands:
+  `Lovable = 95-100`, `Stable = 80-95`, `Beta = 70-80`,
+  `Alpha = 50-70`, and `Experimental = 0-50`. At shared boundaries, choose the
+  higher maturity label.
+- Major quality/completeness gaps:
+  evidence text only, tracked in the detailed feature inventory rather than as a
+  separate scored dimension.
+
+## Detailed feature inventory
+
+### 1. Agent Turn Execution
+
+Search anchors: agent RPC shape and event stream, runAgentTurnWithFallback, agent.wait timeout and terminal outcomes.
+
+Category note: [Agent Turn Execution](agent-turn-orchestration-and-runtime-lifecycle.md)
+
+Score decisions:
+
+- Coverage: `Stable (82%)`
+- Quality: `Beta (74%)`
+- Completeness: `Stable (82%)`
+- LTS: ✅
+
+Features:
+
+- Turn startup and runtime choice: Starting an agent turn and choosing gateway versus embedded runtime execution.
+- Session and run coordination: Establishing session and run ids, queue locks, and related execution coordination.
+- Abort and terminal outcomes: Honoring aborts, timing provider/model work, and emitting terminal outcomes.
+
+Primary docs:
+
+- `docs/concepts/agent-loop.md`
+- `docs/cli/agent.md`
+- `docs/concepts/agent-runtimes.md`
+
+### 2. External Runtimes and Subagents
+
+Search anchors: agent runtimes, subagent turns, CLI runtime aliases.
+
+Category note: [External Runtimes and Subagents](cli-harnesses-external-runtimes-and-subagents.md)
+
+Score decisions:
+
+- Coverage: `Beta (78%)`
+- Quality: `Alpha (66%)`
+- Completeness: `Beta (78%)`
+- LTS: ❌
+
+Features:
+
+- External harness selection: Choosing Codex app-server, ACP, and other external runtime harnesses.
+- CLI runtime aliases: Runtime aliases and CLI-based execution paths such as Claude CLI and Gemini CLI.
+- Subagent turns: Spawning, delivering, and announcing subagent work outside the default embedded path.
+- Runtime recovery: Cleanup, timeout, and liveness behavior for external runtimes and subagents.
+
+Primary docs:
+
+- `docs/concepts/agent-runtimes.md`
+- `docs/providers/anthropic.md`
+- `docs/providers/google.md`
+- `docs/tools/subagents.md`
+
+### 3. Hosted Provider Execution
+
+Search anchors: hosted provider turns, provider-specific model options, streaming reply normalization.
+
+Category note: [Hosted Provider Execution](hosted-provider-adapters-and-payload-compatibility.md)
+
+Score decisions:
+
+- Coverage: `Beta (76%)`
+- Quality: `Beta (70%)`
+- Completeness: `Beta (76%)`
+- LTS: ✅
+
+Features:
+
+- Hosted provider turns: Running agent turns against hosted providers such as OpenAI, Anthropic, and Google.
+- Provider-specific model options: Provider-specific model parameters and runtime request settings exposed to users or operators.
+- Hosted tool use: Tool use behavior when the active runtime is a hosted provider.
+- Reasoning and cache controls: Provider-specific reasoning, thinking, and cache-related controls during hosted execution.
+- Hosted streaming and replies: Operator-visible streaming and reply behavior while hosted adapters normalize payload differences.
+
+Primary docs:
+
+- `docs/providers/openai.md`
+- `docs/providers/anthropic.md`
+- `docs/providers/google.md`
+- `docs/concepts/models.md`
+
+### 4. Local and Self-hosted Providers
+
+Search anchors: Ollama local provider profiles, OpenAI-compatible local servers, local smoke checks.
+
+Category note: [Local and Self-hosted Providers](local-and-self-hosted-provider-execution.md)
+
+Score decisions:
+
+- Coverage: `Beta (70%)`
+- Quality: `Alpha (60%)`
+- Completeness: `Beta (70%)`
+- LTS: ❌
+
+Features:
+
+- Local provider profiles: Local model profile configuration for Ollama and OpenAI-compatible local servers.
+- Tool-capability flags: Local provider capability flags and behavior for tool use.
+- Timeouts and context windows: Local provider timeout and context-window configuration.
+- Local smoke checks: Local image and model smoke checks visible to operators.
+- Local failure handling: Operator-facing failure handling for local and self-hosted providers.
+
+Primary docs:
+
+- `docs/providers/ollama.md`
+- `docs/concepts/models.md`
+- `docs/cli/agent.md`
+
+### 5. Model and Runtime Selection
+
+Search anchors: model reference selection, runtime overrides, thinking and context settings.
+
+Category note: [Model and Runtime Selection](model-selection-provider-routing-and-runtime-policy.md)
+
+Score decisions:
+
+- Coverage: `Stable (84%)`
+- Quality: `Beta (72%)`
+- Completeness: `Stable (84%)`
+- LTS: ✅
+
+Features:
+
+- Model reference selection: Selecting the model reference for an agent turn from user or configured defaults.
+- Provider and runtime overrides: Handling provider selection and runtime overrides for a turn.
+- Thinking and context settings: Resolving thinking and context settings as part of model selection.
+- Invalid route recovery: Preserving or clearing invalid route state when selections drift or fail.
+
+Primary docs:
+
+- `docs/concepts/models.md`
+- `docs/cli/models.md`
+- `docs/providers/openai.md`
+- `docs/concepts/agent-runtimes.md`
+
+### 6. Provider Auth
+
+Search anchors: login and API-key setup, auth profile selection, provider fallback recovery.
+
+Category note: [Provider Auth](provider-auth-profiles-and-credential-health.md)
+
+Score decisions:
+
+- Coverage: `Stable (80%)`
+- Quality: `Alpha (66%)`
+- Completeness: `Stable (80%)`
+- LTS: ✅
+
+Features:
+
+- Login and API-key setup: Login, OAuth, and paste-key flows for provider access.
+- Auth profile selection: Selecting and validating provider auth profiles.
+- Credential health checks: Doctor, status, and related credential health checks and repair signals.
+- Auth failover: Same-provider and cross-profile auth fallback behavior.
+- Provider fallback recovery: Provider and auth-profile fallback behavior when execution fails.
+- Rate-limit and capacity recovery: Recovery paths for quota, capacity, and rate-limit failures.
+- Missing-key and OAuth guidance: Operator guidance for missing keys, expired OAuth state, and related auth failures.
+- Restart and stale-route recovery: Recovery from stale route state, restart requirements, and related provider drift.
+- Structured provider diagnostics: Structured provider errors and diagnostics delivered into logs or agent replies.
+- Subagent credential propagation: Propagating provider credentials into subagent and delegated runtime flows.
+
+Primary docs:
+
+- `docs/concepts/models.md`
+- `docs/cli/agent.md`
+- `docs/cli/models.md`
+- `docs/providers/openai.md`
+- `docs/providers/anthropic.md`
+- `docs/providers/google.md`
+- `docs/tools/subagents.md`
+
+### 7. Streaming and Progress
+
+Search anchors: streaming replies, progress visibility, event delivery.
+
+Category note: [Streaming and Progress](streaming-progress-and-preview-visibility.md)
+
+Score decisions:
+
+- Coverage: `Stable (84%)`
+- Quality: `Beta (70%)`
+- Completeness: `Stable (84%)`
+- LTS: ❌
+
+Features:
+
+- Streaming replies: Streaming block updates and partial assistant output before final delivery.
+- Progress visibility: Progress preview events and item lifecycle updates surfaced during execution.
+
+Primary docs:
+
+- `docs/concepts/streaming.md`
+- `docs/concepts/agent-loop.md`
+
+### 8. Tool Calls and Response Handling
+
+Search anchors: tool-call handling, usage reporting, failure recovery.
+
+Category note: [Tool Calls and Response Handling](streaming-tool-call-and-response-normalization.md)
+
+Score decisions:
+
+- Coverage: `Stable (80%)`
+- Quality: `Alpha (66%)`
+- Completeness: `Stable (80%)`
+- LTS: ✅
+
+Features:
+
+- Tool-call handling: Reliable tool-call behavior across providers, including malformed or provider-specific payload differences.
+- Usage and response reporting: Response ids and usage accounting normalized into operator-visible runtime behavior.
+- Failure recovery: Failure-stream finalization and cleanup when provider output is malformed or incomplete.
+
+Primary docs:
+
+- `docs/concepts/agent-loop.md`
+- `docs/providers/ollama.md`
+
+### 9. Tool Execution Controls
+
+Search anchors: tool availability rules, sandboxed exec behavior, approval flow.
+
+Category note: [Tool Execution Controls](tool-execution-approvals-and-sandbox-policy.md)
+
+Score decisions:
+
+- Coverage: `Stable (86%)`
+- Quality: `Beta (74%)`
+- Completeness: `Stable (86%)`
+- LTS: ✅
+
+Features:
+
+- Tool availability rules: Which tools are available during a turn after policy resolution and provider-based suppression.
+- Sandboxed exec behavior: Exec behavior, sandbox roots, and workspace constraints visible to operators.
+- Approval flow: Operator approval gates for tool execution.
+- Elevated execution: Elevated host execution rules and related controls.
+- Tool safety controls: Before-tool-call hooks and related guardrails that shape operator-visible tool behavior.
+- Delegated tool access: Inherited or narrowed tool policy for subagents and delegated execution.
+
+Primary docs:
+
+- `docs/gateway/sandbox-vs-tool-policy-vs-elevated.md`
+- `docs/concepts/agent-loop.md`
+- `docs/tools/subagents.md`
+
+## Recommended scorecard interpretation
+
+Use this migrated score as the current inventory baseline. Refresh individual categories with live category-agent research before treating a high score as an LTS promotion gate.
+
+## Out of scope for this surface
+
+- Redefining taxonomy category boundaries; taxonomy remains the source of truth for category identity, features, docs, and search anchors.
+
+## Audit provenance
+
+- Score source:
+  `docs/kevinslin/maturity-scorecard/inventory/agent-runtime-and-provider-execution/scores.yaml`.
+- Taxonomy metadata source:
+  `.agents/skills/claw-score/taxonomy.yaml`.
+- Archived evidence source:
+  `/Users/kevinlin/tmp/maturity/agent-runtime-and-provider-execution`.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/scores.yaml
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/scores.yaml
@@ -0,0 +1,48 @@
+version: 1
+process_version: 3
+data:
+  - name: Agent Turn Execution
+    category_note: agent-turn-orchestration-and-runtime-lifecycle.md
+    coverage: 82
+    quality: 74
+    completeness: 82
+  - name: External Runtimes and Subagents
+    category_note: cli-harnesses-external-runtimes-and-subagents.md
+    coverage: 78
+    quality: 66
+    completeness: 78
+  - name: Hosted Provider Execution
+    category_note: hosted-provider-adapters-and-payload-compatibility.md
+    coverage: 76
+    quality: 70
+    completeness: 76
+  - name: Local and Self-hosted Providers
+    category_note: local-and-self-hosted-provider-execution.md
+    coverage: 70
+    quality: 60
+    completeness: 70
+  - name: Model and Runtime Selection
+    category_note: model-selection-provider-routing-and-runtime-policy.md
+    coverage: 84
+    quality: 72
+    completeness: 84
+  - name: Provider Auth
+    category_note: provider-auth-profiles-and-credential-health.md
+    coverage: 80
+    quality: 66
+    completeness: 80
+  - name: Streaming and Progress
+    category_note: streaming-progress-and-preview-visibility.md
+    coverage: 84
+    quality: 70
+    completeness: 84
+  - name: Tool Calls and Response Handling
+    category_note: streaming-tool-call-and-response-normalization.md
+    coverage: 80
+    quality: 66
+    completeness: 80
+  - name: Tool Execution Controls
+    category_note: tool-execution-approvals-and-sandbox-policy.md
+    coverage: 86
+    quality: 74
+    completeness: 86
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/streaming-progress-and-preview-visibility.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/streaming-progress-and-preview-visibility.md
@@ -0,0 +1,84 @@
+---
+title: "Agent Runtime - Streaming and Progress Maturity Note"
+version: 3
+last_refreshed: 2026-05-31
+last_refreshed_by: codex
+---
+
+# Agent Runtime - Streaming and Progress Maturity Note
+
+## Summary
+
+Streaming and progress visibility are well covered in docs and tests: OpenClaw separates provider/runtime streaming from channel delivery, documents block and preview streaming modes, and surfaces tool-progress and item lifecycle updates before final delivery. Coverage is Stable. Quality is Beta because runtime and channel differences still produce missing progress updates, suppressed previews, and occasional terminal-update confusion.
+
+## Category Scope
+
+This category covers operator-visible streaming and progress behavior before
+final delivery: streaming replies, preview and block streaming modes, and
+progress visibility through tool-progress or item lifecycle updates.
+
+## Features
+
+- Streaming replies: Streaming block updates and partial assistant output before final delivery.
+- Progress visibility: Progress preview events and item lifecycle updates surfaced during execution.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Stable (84%)`
+
+Coverage is strong across streaming docs, agent-loop docs, event plumbing, and focused tests for preview updates, item lifecycle events, duplicate progress suppression, and terminal delivery behavior.
+
+## Quality Score
+
+- Score: `Beta (70%)`
+
+Streaming and progress behavior is broadly solid, but field reports still show runtime- and channel-specific differences around progress callbacks, terminal updates, and preview behavior.
+
+## Completeness Score
+
+- Score: `Stable (84%)`
+- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Streaming replies, Progress visibility.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived combined category.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Some progress visibility behavior still differs by runtime and channel.
+- Native progress callbacks can still be suppressed or delayed in some flows.
+- Terminal updates after long-running tool activity still need more consistent operator-facing behavior.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/concepts/streaming.md` documents the two streaming layers, block streaming, preview modes, channel mapping, runtime behavior, and tool-progress preview updates.
+- `/Users/kevinlin/code/openclaw/docs/concepts/agent-loop.md` documents assistant and tool event streams, block streaming behavior, event-stream shapes, and session timeout semantics.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.ts` bridges assistant and tool events into previews, tracks item lifecycle events, suppresses duplicate progress, and handles terminal streaming text before final delivery.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.ts` emits streaming assistant and tool events, including fine-grained tool streaming behavior.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/google-shared.ts` emits assistant, thinking, and tool-call stream events that feed operator-visible progress and streaming behavior.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers CLI assistant event previews, item lifecycle events, duplicate progress skipping, raw tool progress details, tool-start progress before slow typing, and Codex app-server telemetry.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/src/llm/providers/google-shared.test.ts` covers streamed text, thinking, and tool event projection.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.test.ts` covers streaming behavior that affects signed thinking replay and provider event handling.
+
+### Gitcrawl queries
+
+- `gitcrawl --json search issues -R openclaw/openclaw "tool call streaming truncated tool_call provider"` returned #60593 on recurring Anthropic streaming JSON parse errors, #70033 on tool calls emitting empty `{}` arguments for large content, and #87711 on empty assistant delivery.
+
+### Discrawl queries
+
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "tool call streaming"` returned May 2026 discussions about `/verbose off` suppressing native progress callbacks, missing terminal updates, provider streaming/tool-call wrapping, and tool-call visibility modes.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/streaming-tool-call-and-response-normalization.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/streaming-tool-call-and-response-normalization.md
@@ -0,0 +1,93 @@
+---
+title: "Agent Runtime - Tool Calls and Response Handling Maturity Note"
+version: 3
+last_refreshed: 2026-05-31
+last_refreshed_by: codex
+---
+
+# Agent Runtime - Tool Calls and Response Handling Maturity Note
+
+## Summary
+
+Tool-call handling and response normalization are well covered in docs and tests: OpenClaw normalizes malformed tool-call arguments, provider-specific payload differences, usage accounting, and terminal failure streams across adapters and shared transport code. Coverage is Stable. Quality is Alpha because recent archive evidence still shows empty tool arguments, raw tool JSON or visible tool-call blocks, and terminal-empty assistant delivery that still leaks surprising states to users.
+
+## Category Scope
+
+This category covers operator-visible tool-call and response-handling behavior:
+reliable tool-call payload handling across providers, usage and response
+reporting, and recovery when provider output is malformed, empty, or
+incomplete.
+
+## Features
+
+- Tool-call handling: Reliable tool-call behavior across providers, including malformed or provider-specific payload differences.
+- Usage and response reporting: Response ids and usage accounting normalized into operator-visible runtime behavior.
+- Failure recovery: Failure-stream finalization and cleanup when provider output is malformed or incomplete.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Stable (80%)`
+
+Coverage is strong across provider adapters, shared transport code, and focused tests for tool payload coercion, response ids and usage, malformed provider output, and failure finalization.
+
+## Quality Score
+
+- Score: `Alpha (66%)`
+
+Normalization is robust in source, but field reports show provider tool-call output and malformed responses still leak surprising states to users.
+
+## Completeness Score
+
+- Score: `Stable (80%)`
+- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Tool-call handling, Usage and response reporting, Failure recovery.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Empty or malformed tool-call arguments still emerge in provider-specific edge cases.
+- Raw tool JSON or visible tool-call blocks still appear in some local or compatibility modes.
+- Terminal-empty and tool-only responses still need more consistent operator-facing explanation.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/concepts/agent-loop.md` documents tool-event streams, final payload behavior, and timeout behavior around the agent loop.
+- `/Users/kevinlin/code/openclaw/docs/providers/ollama.md` documents native tool-calling, OpenAI-compatible mode reliability warnings, raw tool JSON as text, garbled output handling, and tool-calling compatibility caveats.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/src/agents/transport-stream-shared.ts` coerces transport tool-call arguments, merges headers and metadata, finalizes failure streams, and normalizes transport error details.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.ts` normalizes streamed tool ids and partial JSON tool arguments for Anthropic responses.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/google-shared.ts` normalizes tool call ids, tool arguments, response ids, and usage accounting for Google-family providers.
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.ts` formats terminal empty/tool-only outcomes and bridges normalized tool results into operator-visible replies.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers media-only tool results, plan-only terminal result fallback, terminal-empty result classification, stripping glued leading `NO_REPLY` tokens, streamed tool results delivery, and tool-only outcome handling.
+- `/Users/kevinlin/code/openclaw/src/agents/agent-tools.before-tool-call.integration.e2e.test.ts` covers hook-driven tool parameter modification, blocking, deduplication, and context around tool-call execution.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/src/agents/transport-stream-shared.test.ts` covers surrogate sanitization, non-empty tool payload text, header propagation, successful stream finalization, and failure cleanup.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/google-shared.test.ts` covers tool call projection, response ids, and usage.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.test.ts` covers signed thinking replay and provider behavior that affects tool payload normalization.
+
+### Gitcrawl queries
+
+- `gitcrawl --json search issues -R openclaw/openclaw "tool call streaming truncated tool_call provider"` returned #60593 on recurring Anthropic streaming JSON parse errors, #70033 on tool calls emitting empty `{}` arguments for large content, and #87711 on empty assistant delivery.
+- `gitcrawl --json search issues -R openclaw/openclaw "openai-codex Anthropic Google provider tool call"` returned adapter-adjacent issues for `claude-cli` session artifacts and extension plugin loading.
+- `gitcrawl --json search prs -R openclaw/openclaw "provider error descriptors fallback rate limit"` returned #86642, which improves structured provider error descriptors feeding normalized runtime errors.
+
+### Discrawl queries
+
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "tool call streaming"` returned May 2026 discussions about provider streaming/tool-call wrapping, visible tool-call blocks, Claude CLI/WebChat tool visibility, and missing terminal updates.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "Ollama tool calling OpenClaw"` returned reports and guidance on raw tools printed as text and model/tool-calling compatibility.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "model fallback decision"` returned fallback logs where missing or empty provider output contributed to operator-visible failure paths.
--- a/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/tool-execution-approvals-and-sandbox-policy.md
+++ b/docs/maturity-scorecard/inventory/agent-runtime-and-provider-execution/tool-execution-approvals-and-sandbox-policy.md
@@ -0,0 +1,94 @@
+---
+title: "Agent Runtime - Tool Execution Controls Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Agent Runtime - Tool Execution Controls Maturity Note
+
+## Summary
+
+Tool execution policy is the strongest component in this surface. Docs distinguish sandboxing, tool policy, and elevated approvals; source centralizes tool registration, workspace/sandbox roots, inherited/subagent policy, exec config, schema normalization, and before-tool-call hooks; tests cover approval gates, policy hooks, subagent tool restrictions, and progress behavior. Quality is Beta because archive evidence still shows edge cases around exec approval forwarding, per-agent deny rules, sandbox backend expectations, and plugin/service boundary assumptions.
+
+## Category Scope
+
+This category covers operator-visible control over tools during agent turns:
+tool availability rules, sandboxed exec behavior, approval flow, elevated
+execution, tool safety controls, and delegated tool access for subagents.
+
+## Features
+
+- Tool availability rules: Which tools are available during a turn after policy resolution and provider-based suppression.
+- Sandboxed exec behavior: Exec behavior, sandbox roots, and workspace constraints visible to operators.
+- Approval flow: Operator approval gates for tool execution.
+- Elevated execution: Elevated host execution rules and related controls.
+- Tool safety controls: Before-tool-call hooks and related guardrails that shape operator-visible tool behavior.
+- Delegated tool access: Inherited or narrowed tool policy for subagents and delegated execution.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Stable (86%)`
+
+Coverage is strong across docs, source, e2e tests, unit tests, and archive evidence for policy and approval behavior.
+
+## Quality Score
+
+- Score: `Beta (74%)`
+
+The design is mature, but policy semantics remain subtle for users and operators when CLI backends, subagents, plugin services, and elevated execution overlap.
+
+## Completeness Score
+
+- Score: `Stable (86%)`
+- Surface instructions: evaluated against `references/completeness/agent-runtime-and-provider-execution.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Tool availability rules, Sandboxed exec behavior, Approval flow, Elevated execution, Tool safety controls, Delegated tool access.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Operator docs explain the policy layers, but field reports still show confusion about what sandboxing does and does not constrain.
+- CLI backend approval forwarding is not as settled as the main embedded runtime path.
+- Per-agent and inherited policy behavior needs continued regression proof as subagents expand.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/gateway/sandbox-vs-tool-policy-vs-elevated.md` distinguishes sandbox, tool policy, and elevated exec; documents tool policy layers/rules, tool groups, sandboxed MCP server allow gates, elevated exec-only gates, and sandbox jail fixes.
+- `/Users/kevinlin/code/openclaw/docs/concepts/agent-loop.md` documents plugin hooks including `before_tool_call`, tool-call handling, and runtime event streams.
+- `/Users/kevinlin/code/openclaw/docs/tools/subagents.md` documents subagent tool policy, tool restriction, auth resolution, announce behavior, delivery routing, concurrency, liveness, and recovery.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/src/agents/agent-tools.ts` implements model-provider tool policy, local model tool suppression, exec config merging, tool policy setup, group/sender/sandbox/subagent/inherited policy, workspace/sandbox roots, `apply_patch` restrictions, exec tool setup, the tool policy pipeline, schema normalization, and `before_tool_call` hook wrapping.
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.ts` forwards plan, approval, command output, and patch events through runtime delivery.
+- `/Users/kevinlin/code/openclaw/src/agents/cli-runner.ts` persists approved CLI user turn transcripts and runs CLI hooks around backend execution.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/agents/bash-tools.exec-gateway-approval.e2e.test.ts` covers gateway-hosted exec approvals on separate connections.
+- `/Users/kevinlin/code/openclaw/src/agents/agent-tools.before-tool-call.integration.e2e.test.ts` covers normal `before_tool_call` behavior, parameter modification, blocking, deduplication, and context.
+- `/Users/kevinlin/code/openclaw/src/agents/openclaw-tools.subagents.sessions-spawn.lifecycle.test.ts` covers subagent lifecycle, cleanup, timeout handling, account routing, announce behavior, and policy-adjacent session behavior.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/src/agents/agent-tools.message-provider-policy.test.ts` covers provider-based message/tool policy behavior.
+- `/Users/kevinlin/code/openclaw/src/auto-reply/reply/agent-runner-execution.test.ts` covers approval, command output, patch event forwarding, tool progress details, and streamed tool result delivery.
+- `/Users/kevinlin/code/openclaw/src/agents/transport-stream-shared.test.ts` covers safe transport behavior for tool payloads.
+
+### Gitcrawl queries
+
+- `gitcrawl --json search issues -R openclaw/openclaw "exec approvals tool policy sandbox agent tool"` returned #44253 on per-agent `tools.selfDeny`, #69512 on forwarding `exec-approvals.json` allowlists to `claude-cli` backend sessions, #78965 on local user sandbox backend, #48532 on security by intent, #67440 on optional TOTP for exec approvals, #48503 on enriching `before_tool_call` events with action classification/input provenance, and #82548 on safety/quality observability events.
+- `gitcrawl --json search issues -R openclaw/openclaw "claude-cli codex cli harness subagent sessions_spawn"` returned #73097 on PI harness ignoring `cliBackends` configuration and splitting subagent execution from chat path.
+
+### Discrawl queries
+
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "exec approvals tool policy"` returned May 2026 release testing notes covering auth/profile, sandbox policy, and exec approvals; discussions of node file fetch policy; explanations that denying `exec` at agent tool policy level does not sandbox plugins/services; comments that sandbox/tool-policy/exec-approval controls are useful but not solved defaults; and issue-closing comments for related controls.
+- `/Users/kevinlin/.local/bin/discrawl search --limit 10 "sessions_spawn claude-cli"` returned Claude CLI and ACP runtime discussions that affect tool permissions, sandbox boundaries, and subagent UX.
--- a/docs/maturity-scorecard/inventory/android-app/background-service-reconnect-presence.md
+++ b/docs/maturity-scorecard/inventory/android-app/background-service-reconnect-presence.md
@@ -0,0 +1,109 @@
+---
+title: "Android app - Background Service, Reconnect, and Presence Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Android app - Background Service, Reconnect, and Presence Maturity Note
+
+## Summary
+
+Android background operation is implemented around a foreground service, persistent notification, reconnecting Gateway sessions, presence-alive beacons, notification-listener state, and foreground-service microphone type switching. Coverage is Alpha because docs and source cover the intended behavior but no live backgrounding scorecard was found. Quality is the weakest Android component: archive evidence includes a foreground-service crash issue and an active PR to avoid persistent `dataSync` foreground service use.
+
+## Category Scope
+
+- `NodeForegroundService`, persistent notification, background reconnect, node presence beacons, notification listener state, Gateway session reconnect, and reconnect after app backgrounding.
+- Out of scope: individual node command handlers except where foreground/background state changes command availability.
+
+## Features
+
+- Background reconnect and presence: Foreground-service presence, reconnect, and node presence behavior.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Alpha (62%)`
+- Positive signals: Docs explicitly say Android keeps the Gateway connection alive through a foreground service, auto-reconnects after first pairing, and sends `node.presence.alive` when backgrounded while connected. Source implements foreground notification state, reconnect loop, presence beacon payload/skip logic, and reconnect tests.
+- Negative signals: No live Android backgrounding scenario was found that proves app background, foreground-service notification, presence beacon handling, Gateway restart, network loss, and app relaunch together.
+- Integration gaps: Need a real-device background/reconnect scorecard across Android 14/15 service restrictions, battery saver, network changes, Gateway restart, and Talk Mode microphone service promotion/demotion.
+
+## Quality Score
+
+- Score: `Alpha (55%)`
+- Gitcrawl reports: `ForegroundServiceStartNotAllowedException Android` found issue #64903 for Android app crashes on `NodeForegroundService startForeground` with `ForegroundServiceStartNotAllowedException` and PR #80082 `fix(android): avoid dataSync FGS for persistent node`.
+- Discrawl reports: `Android foreground service reconnect presence` returned no direct hits. Broader support context under node capability searches notes mobile node background state causes Canvas/camera/screen failures.
+- Good qualities: The service uses a low-importance persistent notification, updates title/text from runtime state, adds a Disconnect action, switches service type for Talk Mode, and presence beacon responses require `handled: true` before counting durable last-seen updates.
+- Bad qualities: Android foreground-service policy is a live crash risk, persistent node operation touches OS service quotas, and docs do not provide a current operator recipe for battery optimization, service denial, or reconnect triage.
+- Excluded from quality: Test coverage and runtime-flow proof were not used to raise or lower Quality.
+
+## Completeness Score
+
+- Score: `Alpha (62%)`
+- Surface instructions: evaluated against `references/completeness/android-app.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Background reconnect and presence.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Prove background reconnect and presence on current Android OS versions with real devices.
+- Add operator guidance for foreground-service denial, battery saver, OEM background restrictions, and notification permission states.
+- Clarify which commands intentionally fail while the app is backgrounded and how to recover.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/platforms/android.md` documents foreground service connection keepalive, auto-reconnect on launch, and presence alive beacons after authenticated node session connect and backgrounding.
+- `/Users/kevinlin/code/openclaw/apps/android/README.md` lists authenticated background presence beacons and push notifications in the rebuild checklist.
+- `/Users/kevinlin/code/openclaw/docs/nodes/troubleshooting.md` is linked as related Android node troubleshooting.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt` starts a foreground service, maintains the persistent notification, exposes Disconnect, and promotes service types for Talk Mode.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/ConnectionManager.kt` builds node connect options, user agent, advertised capabilities, and TLS policy used on reconnect.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/NodePresenceAliveBeacon.kt` builds and decodes `node.presence.alive`, throttles recent successes, and sanitizes failure reasons.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/gateway/GatewaySession.kt` owns connect, disconnect, reconnect, pause-after-auth-failure, and current connection closure.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/DeviceNotificationListenerService.kt` tracks notification-listener connection state and emits `notifications.changed` events.
+
+### Integration tests
+
+- No live background/reconnect Android scenario was found.
+- `/Users/kevinlin/code/openclaw/src/gateway/android-node.capabilities.live.test.ts` requires the app to stay unlocked and foregrounded for capability execution, which highlights the missing background scenario.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt` covers foreground-service notification/type behavior.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/gateway/GatewaySessionReconnectTest.kt` covers replacing active connections and reconnect pause behavior.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/node/NodePresenceAliveBeaconTest.kt` covers presence beacon payload/response helper behavior.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/node/ConnectionManagerTest.kt` covers connection manager behavior.
+
+### Gitcrawl queries
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "ForegroundServiceStartNotAllowedException Android" --json`
+
+Results:
+
+- Issue #64903 `Android app crashes on NodeForegroundService startForeground with ForegroundServiceStartNotAllowedException`.
+- PR #80082 `fix(android): avoid dataSync FGS for persistent node`.
+
+### Discrawl queries
+
+Query:
+
+`/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android foreground service reconnect presence"`
+
+Results:
+
+- No direct hits.
+
+Additional query context:
+
+- `/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android node capabilities gateway commands"` found support guidance that mobile node Canvas/camera/screen commands fail when the app is backgrounded.
--- a/docs/maturity-scorecard/inventory/android-app/camera-media-capture.md
+++ b/docs/maturity-scorecard/inventory/android-app/camera-media-capture.md
@@ -0,0 +1,111 @@
+---
+title: "Android app - Media Capture Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Android app - Media Capture Maturity Note
+
+## Summary
+
+Android media capture includes CameraX photo and clip capture, image payload resizing, camera HUD feedback, WebView Canvas/A2UI, photo-library access in the third-party flavor, and live capability checks for camera/canvas commands. Coverage is Alpha because the implementation is real but foreground-only and the Android camera docs are not fully aligned with the source command set. Quality is Alpha because permissions, payload limits, WebView readiness, Play flavor restrictions, and foreground state make the operator path fragile.
+
+## Category Scope
+
+Included in this category:
+
+- Camera and media capture: Camera listing, capture, photo, screen, and media capture behavior.
+
+## Features
+
+- Camera and media capture: Camera listing, capture, photo, screen, and media capture behavior.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Alpha (66%)`
+- Positive signals: Android docs cover Canvas and camera foreground commands; source implements CameraX photo/clip capture, permission prompting, payload guards, A2UI/WebView actions, and photo-library access where the flavor permits it. The live capability suite profiles `camera.list`, `camera.snap`, `camera.clip`, `canvas.*`, and `canvas.a2ui.*`.
+- Negative signals: Live capability tests are preconditioned on a paired, foregrounded, unlocked app and require the Screen tab for Canvas/A2UI. Docs for shared camera commands only list Android `camera.list` in the Android section even though source and platform docs expose snap/clip.
+- Integration gaps: Need a real-device Android media scorecard that keeps the app foregrounded, grants camera/mic/photo permissions, invokes front/back photo, short clip with/without audio, Canvas navigate/eval/snapshot, and records background failure behavior.
+
+## Quality Score
+
+- Score: `Alpha (62%)`
+- Gitcrawl reports: `camera.snap Android` found issue #87058 where Android node connected but advertised zero commands; the snippet notes `camera.snap`, `camera.clip`, and `canvas.*` are high-risk gated commands. `photos.latest Android` returned no direct hits.
+- Discrawl reports: Search found January support messages describing Android node camera snap/clip, Canvas, voice wake, and screen recording as supported node abilities, and warning that nodes are often offline or foreground dependent.
+- Good qualities: Camera commands request runtime permissions, clamp clip duration, cap payload size, recompress JPEGs under API limits, show camera HUD state, and separate Play flavor from third-party photo access.
+- Bad qualities: Media commands are foreground-only, WebView/A2UI depends on Screen tab readiness and Gateway canvas host reachability, and Play flavor removes photo-library permissions. Source/docs alignment is imperfect for Android camera command details.
+- Excluded from quality: Test coverage and runtime-flow proof were not used to raise or lower Quality.
+
+## Completeness Score
+
+- Score: `Alpha (66%)`
+- Surface instructions: evaluated against `references/completeness/android-app.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Camera and media capture.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Align `/nodes/camera` Android command docs with source and platform docs for `camera.snap` and `camera.clip`.
+- Add a foreground/background media failure-mode runbook with exact operator messages.
+- Decide whether `photos.latest` is supported only for third-party builds or should have a Play-safe replacement.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/platforms/android.md` documents Canvas host, `canvas.eval`, `canvas.snapshot`, `canvas.navigate`, A2UI commands, and foreground-only camera commands `camera.snap` and `camera.clip`.
+- `/Users/kevinlin/code/openclaw/docs/nodes/camera.md` documents Android camera settings, permissions, foreground requirement, and `camera.list`; its Android command list is narrower than the source and platform page.
+- `/Users/kevinlin/code/openclaw/apps/android/README.md` documents Screen tab requirements for A2UI integration tests and says Play builds remove photo-library access.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/CameraCaptureManager.kt` implements CameraX device listing, photo capture, clip recording, permission requests, EXIF rotation, JPEG scaling, and payload limits.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/CameraHandler.kt` handles `camera.list`, `camera.snap`, `camera.clip`, HUD state, debug logging, clip size limits, and base64 payloads.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/PhotosHandler.kt` implements `photos.latest` with permission checks, latest image query, resizing, and base64 budget caps.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/CanvasScreen.kt` implements the WebView Canvas, safe browsing settings, WebMessage A2UI bridge, visibility lifecycle, and render-process handling.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/InvokeDispatcher.kt` enforces foreground requirement for camera and canvas commands.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/gateway/android-node.capabilities.live.test.ts` includes profiles for `camera.list`, `camera.snap`, `camera.clip`, `canvas.present`, `canvas.navigate`, `canvas.eval`, `canvas.snapshot`, and A2UI push/reset commands.
+- `/Users/kevinlin/code/openclaw/apps/android/scripts/perf-online-benchmark.sh` checks Screen tab WebView availability before running the screen benchmark.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/node/CameraHandlerTest.kt`, `JpegSizeLimiterTest.kt`, `PhotosHandlerTest.kt`, `CanvasControllerSnapshotParamsTest.kt`, `CanvasActionTrustTest.kt`, and `CanvasA2UIActionBridgeTest.kt` cover media and canvas helpers.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/node/InvokeDispatcherTest.kt` covers command dispatch behavior.
+
+### Gitcrawl queries
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "camera.snap Android" --json`
+
+Results:
+
+- Issue #87058 `Android node connects but advertises zero commands ...`; snippet notes `camera.snap`, `camera.clip`, and `canvas.*` as correctly gated high-risk commands.
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "photos.latest Android" --json`
+
+Results:
+
+- No direct hits.
+
+### Discrawl queries
+
+Query:
+
+`/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android camera snap screen canvas"`
+
+Results:
+
+- 2026-01-03 support messages describe Android nodes as paired companion devices that can expose camera snap/clip, Canvas, screen recording, and audio/TTS surfaces, while noting real availability depends on node connectivity.
--- a/docs/maturity-scorecard/inventory/android-app/chat-sessions-ui.md
+++ b/docs/maturity-scorecard/inventory/android-app/chat-sessions-ui.md
@@ -0,0 +1,103 @@
+---
+title: "Android app - Mobile Chat Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Android app - Mobile Chat Maturity Note
+
+## Summary
+
+The Android app has a substantial mobile chat surface: session selection, history, optimistic sends, streaming assistant text, pending tool-call display, image attachments, thinking controls, markdown rendering, and online chat benchmark support. Coverage reaches Beta because the implementation spans Gateway chat RPCs and online UI proof, though no full Play-installed chat scenario was found. Quality stays Alpha because active archive evidence includes chat copy/reply review issues and the current source still depends on fast-moving mobile UI paths.
+
+## Category Scope
+
+Included in this category:
+
+- Chat tab: Chat tab, session list/filtering, composer, image attachments, message parsing/rendering, model/provider status adjacent to chat, and Gateway chat RPC integration
+
+## Features
+
+- Chat tab: Chat tab, session list/filtering, composer, image attachments, message parsing/rendering, model/provider status adjacent to chat, and Gateway chat RPC integration
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Beta (70%)`
+- Positive signals: Docs describe `chat.history`, `chat.send`, `chat.subscribe`, session selection, display normalization, and best-effort push updates. Source implements history bootstrap, session switching, optimistic messages, streaming assistant text, pending tool calls, abort/refresh, attachments, and markdown rendering. The online benchmark explicitly checks connected state and live chat composer.
+- Negative signals: Evidence is stronger for source/unit/UI benchmark slices than for an end-to-end Play-installed chat path through install, pair, send, stream, background, reconnect, and resume.
+- Integration gaps: Need recurring mobile chat QA that sends text and images, changes sessions, streams a tool-using answer, backgrounds/reopens the app, and verifies history parity with another client.
+
+## Quality Score
+
+- Score: `Alpha (66%)`
+- Gitcrawl reports: `Android message copy text selection chat screen` found issue #57754 and PR #59603 for chat copy/text selection. The PR review record flagged reply quoting, attachment-only reply context, empty text actions, and reply send semantics.
+- Discrawl reports: Search found GitHub mirror review comments on PR #59603 that identify user-visible reply/copy problems around multiline quoting, image-only messages, blank copy/share payloads, and local-only reply UI state.
+- Good qualities: Chat logic tracks pending run IDs, normalizes sessions, strips noisy model-control/tool-call text from history, supports image attachments with size handling, and separates visible health/errors from composer send enablement.
+- Bad qualities: Reply/copy behavior has had several subtle user-facing regressions, mobile chat UI is still being actively rebuilt, and session continuity across network changes/backgrounding lacks a published Android-specific runbook result.
+- Excluded from quality: Test coverage and runtime-flow proof were not used to raise or lower Quality.
+
+## Completeness Score
+
+- Score: `Beta (70%)`
+- Surface instructions: evaluated against `references/completeness/android-app.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Chat tab.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Add a mobile chat parity smoke against WebChat/TUI for history, streaming, sessions, attachments, and abort.
+- Confirm reply/copy/text-selection behavior after the PR #59603 review findings.
+- Make reconnect and session-resume state explicit in Android chat diagnostics.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/platforms/android.md` documents Chat tab history via `chat.history`, send via `chat.send`, best-effort `chat.subscribe`, session selection, and display normalization behavior.
+- `/Users/kevinlin/code/openclaw/apps/android/README.md` says the rebuild includes restyled Chat UI, streaming support, and push notifications for gateway/chat status updates.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/chat/ChatController.kt` implements session loading/switching, health, optimistic sends, pending runs, streaming assistant text, tool-call state, history, and Gateway `chat.send`.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/chat/ChatScreen.kt` composes chat header, notices, message list, attachments, voice shortcut, thinking level, refresh/abort, and send path.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/chat/ChatComposer.kt` implements composer controls, thinking selector, attachment strip, refresh, abort, and send enablement.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/chat/ChatMarkdown.kt` renders markdown blocks, code, tables, task lists, links, images, and selection containers.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/SessionsScreen.kt` renders recent/live session filters, sorting, and active-session rows.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/apps/android/scripts/perf-online-benchmark.sh` verifies the app reaches a visible connected state and that the live chat composer is present, then runs chat session-switch or scroll benchmarks.
+- No full Android chat e2e through a real Gateway answer and another client history parity check was found.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/chat/ChatControllerMessageIdentityTest.kt`, `ChatControllerSessionPolicyTest.kt`, and `ChatMessageContentParsingTest.kt` cover chat model/controller behavior.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/ui/chat/ChatComposerDraftTest.kt`, `ChatImageCodecTest.kt`, `ChatMarkdownTest.kt`, `ChatSheetContentTest.kt`, and `SessionFiltersTest.kt` cover chat UI helpers.
+
+### Gitcrawl queries
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "Android message copy text selection chat screen" --json`
+
+Results:
+
+- Issue #57754 `Android: Add message copy and text selection to chat screen`.
+- PR #59603 `feat(android): Add message copy and text selection to chat screen`.
+
+### Discrawl queries
+
+Query:
+
+`/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android chat screen message copy"`
+
+Results:
+
+- 2026-04-03 GitHub mirror review comments on PR #59603 flagged multiline quote formatting, attachment-only reply context, empty copy/share actions for image-only messages, and missing reply target in outgoing send path.
--- a/docs/maturity-scorecard/inventory/android-app/gateway-pairing-security.md
+++ b/docs/maturity-scorecard/inventory/android-app/gateway-pairing-security.md
@@ -0,0 +1,108 @@
+---
+title: "Android app - Connection Setup Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Android app - Connection Setup Maturity Note
+
+## Summary
+
+Android pairing and Gateway security have substantial implementation depth: setup-code/manual flows, mDNS and wide-area DNS-SD discovery, secure endpoint validation, device-token persistence, TLS fingerprint handling, node and operator roles, and reconnect policy. Coverage is Alpha near Beta because source and unit tests are strong but live Android pairing proof is preconditioned rather than turnkey. Quality remains Alpha because archive evidence shows repeated operator confusion around auth, LAN addressing, protocol/version skew, and manual `ws://` parsing.
+
+## Category Scope
+
+Included in this category:
+
+- Gateway discovery: Gateway discovery, setup-code and manual endpoint parsing, WS/WSS connection setup, TLS trust decisions, device identity, stored device tokens, node/operator auth, and connection error handling
+
+## Features
+
+- Gateway discovery: Gateway discovery, setup-code and manual endpoint parsing, WS/WSS connection setup, TLS trust decisions, device identity, stored device tokens, node/operator auth, and connection error handling
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Alpha (68%)`
+- Positive signals: Docs describe mDNS, setup-code/manual connection, secure remote endpoint rules, Tailscale Serve guidance, pairing approval commands, auto-reconnect, and node status checks. Unit tests cover endpoint parsing, auth payloads, token storage, TLS probe cleanup, reconnect, and setup flow logic.
+- Negative signals: The main live Android capability suite assumes the app is already installed, reachable, paired, approved, and foregrounded. No clean first-run Android connect-to-approval live scenario was found.
+- Integration gaps: Need a single live scenario that starts a fresh Gateway, connects Android by setup code and manual URL, exercises TLS trust/cleartext policy, approves pairing, verifies node/operator sessions, and records reconnection after auth failure.
+
+## Quality Score
+
+- Score: `Alpha (64%)`
+- Gitcrawl reports: `Android pairing websocket TLS manual LAN setup protocol mismatch` found issue #87216 for manual LAN setup parsing `ws://` as host `ws`. Broader `Android app` search also surfaced #85966 for silent WebSocket close after node pairing and #78807 for private LAN pairing auth.
+- Discrawl reports: Search found a March 7 GitHub mirror comment on #16638 where Android pairing with `gateway.auth.token` still hit `device signature invalid`, plus a February support thread walking a user through LAN IP, reachability, and auth/pairing diagnosis.
+- Good qualities: The endpoint parser blocks insecure remote `ws://` while permitting loopback, emulator bridge, and private LAN hosts; stored device tokens are scoped by device and role; discovery TXT hints are not treated as authoritative TLS pins; node and operator roles are separated.
+- Bad qualities: The user-facing failure surface is still easy to hit: wrong LAN host, token auth, stale protocol, TLS trust, and pairing retry states can all look like generic connection failure.
+- Excluded from quality: Test coverage and runtime-flow proof were not used to raise or lower Quality.
+
+## Completeness Score
+
+- Score: `Alpha (68%)`
+- Surface instructions: evaluated against `references/completeness/android-app.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Gateway discovery.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Add a scripted first-run pairing smoke for setup code, manual LAN, and remote WSS.
+- Improve operator copy around pairing/auth versus wrong address versus TLS trust.
+- Keep Play Store version skew visible in pairing errors so old clients fail with an actionable upgrade message.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/platforms/android.md` documents the Android to Gateway WebSocket path, device pairing role, secure endpoint rules, setup-code/manual modes, pairing approval, auto-reconnect, and status verification.
+- `/Users/kevinlin/code/openclaw/apps/android/README.md` documents USB-only gateway testing with `adb reverse` and Connect/Pair steps.
+- `/Users/kevinlin/code/openclaw/docs/gateway/bonjour.md` is linked from the Android runbook for discovery debugging.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/gateway/GatewayDiscovery.kt` implements local NSD/mDNS plus optional wide-area DNS-SD discovery.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/GatewayConfigResolver.kt` decodes setup codes, parses manual endpoints, and enforces secure remote URL rules.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/gateway/GatewaySession.kt` manages WebSocket connect, node/operator auth sources, RPCs, reconnect, and invoke handling.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/gateway/DeviceAuthStore.kt` persists device tokens and scopes by normalized device and role.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/ConnectionManager.kt` builds node/operator connect options and resolves TLS parameters.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/gateway/android-node.capabilities.live.test.ts` connects a Gateway client, selects an Android node, verifies paired/connected state, and uses remote config for remote runs, but requires manual setup first.
+- No clean Android first-run pairing e2e was found.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/gateway/GatewaySessionReconnectTest.kt` covers reconnect and pairing-required pause/retry behavior.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/gateway/GatewaySessionInvokeTest.kt`, `GatewaySessionInvokeTimeoutTest.kt`, `DeviceAuthPayloadTest.kt`, `DeviceAuthStoreTest.kt`, `GatewayBootstrapAuthTest.kt`, `GatewayConfigResolverTest.kt`, and `OnboardingFlowLogicTest.kt` cover core auth, invoke, parsing, and onboarding logic.
+
+### Gitcrawl queries
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "Android pairing websocket TLS manual LAN setup protocol mismatch" --json`
+
+Results:
+
+- Issue #87216 `Android manual LAN setup parses ws:// as host ws and resolves http://ws:<port>`.
+
+Additional query context:
+
+- `gitcrawl search openclaw/openclaw --query "Android app" --json` found #85966 `Android UI/operator WebSocket closes silently ... after successful node pair` and #78807 `fix(mobile): allow private LAN pairing auth`.
+
+### Discrawl queries
+
+Query:
+
+`/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android manual LAN ws host pairing"`
+
+Results:
+
+- 2026-03-07 GitHub mirror comment on #16638 reports Android node cannot pair when `gateway.auth.token` is configured and still hits `device signature invalid`.
+- 2026-02-06 support thread explains LAN IP, reachability, firewall/client isolation, and auth/pairing as likely Android connection blockers.
--- a/docs/maturity-scorecard/inventory/android-app/install-release-distribution.md
+++ b/docs/maturity-scorecard/inventory/android-app/install-release-distribution.md
@@ -0,0 +1,117 @@
+---
+title: "Android app - Distribution Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Android app - Distribution Maturity Note
+
+## Summary
+
+The Android app has a public Google Play install path, source build/run docs, Play and third-party product flavors, signed AAB release automation, version-code auto-bumping, and startup/performance scripts. Coverage remains Alpha because the app README still marks the rebuild as extremely alpha and leaves full end-to-end QA and release hardening unchecked. Quality is also Alpha: the Play policy split is a strong design choice, but archive evidence includes an outdated Play Store protocol mismatch and an open request for prebuilt APK release artifacts.
+
+## Category Scope
+
+Included in this category:
+
+- Public Google Play install path: Public Google Play install path and source build/run entrypoints
+- Manual install path: Manual install path and Google Play distribution behavior.
+- Release smoke and startup performance: Release smoke and startup performance checks for Android app distribution.
+
+## Features
+
+- Public Google Play install path: Public Google Play install path and source build/run entrypoints
+- Manual install path: Manual install path and Google Play distribution behavior.
+- Release smoke and startup performance: Release smoke and startup performance checks for Android app distribution.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Alpha (60%)`
+- Positive signals: Public install is documented, source build/run commands exist, package scripts expose assemble/install/test/release tasks, release automation builds signed Play and third-party AABs, and Android benchmark scripts cover startup and online UI paths.
+- Negative signals: The app README still labels the rebuild extremely alpha and leaves full end-to-end QA and release hardening incomplete. The release path is mostly documented and scripted, but no recurring public release smoke record was found.
+- Integration gaps: Need a repeatable release checklist that installs the Play artifact, pairs it to a current Gateway, runs chat/voice/camera/background scenarios, verifies version compatibility, and records Play Console policy status.
+
+## Quality Score
+
+- Score: `Alpha (62%)`
+- Gitcrawl reports: `Play Store Android app protocol mismatch` found issue #85971 for Play Store Android app v2026.4.5 protocol mismatch against Gateway >= v2026.5.12 and issue #87216 as a related manual LAN setup protocol-mismatch report. `Android APK releases` found issue #9443 requesting prebuilt Android APK releases.
+- Discrawl reports: Search found a May 19 support message saying the Play Store app was outdated and had a protocol mismatch; the user built a newer app locally and then hit a connected/operator-offline state.
+- Good qualities: Play and third-party flavors separate Google Play restricted permissions from sideload-only SMS, call-log, and photo surfaces. Release signing properties are kept local-only, release bundles are copied to a predictable output directory, and release AAB signatures are verified.
+- Bad qualities: Distribution is still fragile for ordinary users because Play can lag Gateway protocol changes, APK artifacts are not fully productized, and docs explicitly say release hardening is unfinished.
+- Excluded from quality: Test coverage and runtime-flow proof were not used to raise or lower Quality.
+
+## Completeness Score
+
+- Score: `Alpha (60%)`
+- Surface instructions: evaluated against `references/completeness/android-app.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Public Google Play install path, Manual install path, Release smoke and startup performance.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Publish and record a current Play smoke path against the current Gateway protocol.
+- Decide whether GitHub release APKs or third-party AAB/APK artifacts are part of the supported distribution promise.
+- Add release hardening evidence for Play policy declarations, app signing, version skew, rollback, and fresh-install pairing.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/platforms/android.md` links the official Google Play app, describes Android as a companion node, and points to source under `apps/android`.
+- `/Users/kevinlin/code/openclaw/apps/android/README.md` marks the rebuild as extremely alpha, lists rebuild checklist items, documents Play and third-party builds, and calls out full end-to-end QA and release hardening as unchecked.
+- `/Users/kevinlin/code/openclaw/README.md` lists Android as an optional node with Connect, Chat, Voice, Canvas, Camera, Screen, and device command families.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/package.json` defines `android:assemble`, `android:install`, `android:bundle:release`, `android:test`, `android:test:integration`, lint, and third-party variants.
+- `/Users/kevinlin/code/openclaw/apps/android/app/build.gradle.kts` sets `applicationId = "ai.openclaw.app"`, `minSdk = 31`, `targetSdk = 36`, Play and third-party flavors, release signing checks, R8/resource shrinking, lint warnings as errors, and version `2026.5.28`.
+- `/Users/kevinlin/code/openclaw/apps/android/scripts/build-release-aab.ts` auto-bumps version name/code, builds Play and third-party release bundles, verifies signatures with `jarsigner`, and prints SHA-256 hashes.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/play/AndroidManifest.xml` removes restricted media permissions from the Play flavor; `/Users/kevinlin/code/openclaw/apps/android/app/src/thirdParty/AndroidManifest.xml` adds SMS and call-log permissions for the third-party flavor.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/apps/android/benchmark/src/main/java/ai/openclaw/app/benchmark/StartupMacrobenchmark.kt` and `apps/android/scripts/perf-startup-benchmark.sh` cover startup measurement.
+- `/Users/kevinlin/code/openclaw/apps/android/scripts/perf-online-benchmark.sh` measures launch-to-connected, Screen tab, and Chat tab paths on a connected device.
+- No current Play Store install to paired Gateway release-smoke artifact was found.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/ui/OnboardingFlowLogicTest.kt` covers onboarding flow logic adjacent to install.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/SecurePrefsTest.kt` and `SecurePrefsNotificationForwardingTest.kt` cover stored app state used after install.
+- Release AAB automation itself does not appear to have a dedicated unit test.
+
+### Gitcrawl queries
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "Play Store Android app protocol mismatch" --json`
+
+Results:
+
+- Issue #85971 `[Bug] Play Store Android app v2026.4.5 protocol mismatch against Gateway >= v2026.5.12 - clawx user report`.
+- Issue #87216 `Android manual LAN setup parses ws:// as host ws and resolves http://ws:<port>`.
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "Android APK releases" --json`
+
+Results:
+
+- Issue #9443 `Request: Prebuilt Android APK releases`.
+
+### Discrawl queries
+
+Query:
+
+`/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android Play Store protocol mismatch"`
+
+Results:
+
+- 2026-05-19 support message: a user built the Android app locally because the Play Store app was outdated and had a protocol mismatch; the newer local build connected by Tailscale but reported operator offline.
--- a/docs/maturity-scorecard/inventory/android-app/node-device-capabilities.md
+++ b/docs/maturity-scorecard/inventory/android-app/node-device-capabilities.md
@@ -0,0 +1,124 @@
+---
+title: "Android app - Device Runtime Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Android app - Device Runtime Maturity Note
+
+## Summary
+
+Android node capabilities are broad: device status/info/permissions/health, notifications, system notify, contacts, calendar, location, motion, camera, Canvas/A2UI, Talk PTT, and flavor-gated SMS/call-log/photos. Coverage reaches Beta because the Gateway live capability test executes the advertised non-interactive command surface against a paired Android node. Quality remains Alpha because archive evidence includes zero-command advertisement failures, notification forwarding cross-session risk, and multiple open requests for additional native Android capability families.
+
+## Category Scope
+
+Included in this category:
+
+- Background reconnect and presence: Foreground-service presence, reconnect, and node presence behavior.
+- Device command availability: Android device command availability and capability advertisement.
+
+## Features
+
+- Background reconnect and presence: Foreground-service presence, reconnect, and node presence behavior.
+- Device command availability: Android device command availability and capability advertisement.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Beta (70%)`
+- Positive signals: Docs enumerate Android command families and flavor-dependent availability. Source has a central registry for advertised capabilities and commands, dispatcher gates for foreground and permission-sensitive commands, and handlers across the major device surfaces. The live capability test reads `node.describe`, applies the effective Gateway allowlist, invokes advertised non-interactive commands, and fails on unmapped command profiles.
+- Negative signals: The live suite is preconditioned and excludes interactive screen-recording consent. It validates command contracts but does not prove every user-facing permission grant or long-lived device-state workflow.
+- Integration gaps: Need a full Android node command scorecard that records command availability across Play and third-party builds, denied/granted permissions, foreground/background state, and Gateway allowlist/denylist policy.
+
+## Quality Score
+
+- Score: `Alpha (63%)`
+- Gitcrawl reports: `notifications.list Android node` found issue #48516 for notification forwarding causing cross-session replies and issue #87058 for Android node connecting but advertising zero commands. `Android Health Connect read-only node commands` found #78611, and `Google Home API bridge Android app native smart-home` found #78476 as future capability requests.
+- Discrawl reports: Search found a GitHub mirror review note that the live Android suite now filters declared commands by effective policy allowlist, a review note asking to add `callLog.search` to live capability profiles, and support guidance that Canvas/camera/screen commands fail when a mobile node is backgrounded or does not advertise the capability.
+- Good qualities: Capability advertisement is data-driven, sensitive surfaces are gated by build flavor and runtime availability, command dispatch returns structured errors, and Gateway policy is applied before live command execution.
+- Bad qualities: Capability shape is large and permission-dependent; notification events can affect chat/session routing; Play flavor removes several high-value device commands; and future native Android capability asks are already accumulating.
+- Excluded from quality: Test coverage and runtime-flow proof were not used to raise or lower Quality.
+
+## Completeness Score
+
+- Score: `Beta (70%)`
+- Surface instructions: evaluated against `references/completeness/android-app.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Background reconnect and presence, Device command availability.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Record command availability matrices for Play versus third-party flavor.
+- Add release-smoke evidence for notification forwarding session routing and policy filters.
+- Keep live capability profiles in lockstep with every newly advertised Android command.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/platforms/android.md` lists command families: device status/info/permissions/health, notifications, photos, contacts, calendar, call log, SMS, motion, camera, Canvas, and Talk.
+- `/Users/kevinlin/code/openclaw/apps/android/README.md` documents Google Play restricted permissions and the Play versus third-party flavor split.
+- `/Users/kevinlin/code/openclaw/docs/plugins/sdk-runtime.md` documents paired node invocation from Gateway-loaded plugins and CLI commands.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/NodeRuntime.kt` wires handlers, capability flags, node/operator sessions, sensitive feature config, and command dispatch.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/InvokeCommandRegistry.kt` defines advertised capabilities and commands plus availability gates.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/InvokeDispatcher.kt` routes commands and enforces foreground, debug, permission, flavor, and availability errors.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/node/DeviceHandler.kt`, `NotificationsHandler.kt`, `ContactsHandler.kt`, `CalendarHandler.kt`, `LocationHandler.kt`, `MotionHandler.kt`, `SystemHandler.kt`, `SmsHandler.kt`, and `CallLogHandler.kt` implement the command families.
+- `/Users/kevinlin/code/openclaw/src/gateway/android-node.capabilities.live.test.ts` is the Gateway-side live capability harness.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/gateway/android-node.capabilities.live.test.ts` connects to a Gateway, selects a paired Android node, reads `node.describe`, resolves allowlist policy, invokes every mapped advertised non-interactive command, and verifies payload contracts or expected deterministic errors.
+- The suite explicitly skips interactive screen-recording consent.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/node/InvokeCommandRegistryTest.kt`, `InvokeDispatcherTest.kt`, `DeviceHandlerTest.kt`, `NotificationsHandlerTest.kt`, `DeviceNotificationListenerServiceTest.kt`, `ContactsHandlerTest.kt`, `CalendarHandlerTest.kt`, `LocationHandlerTest.kt`, `MotionHandlerTest.kt`, and `SystemHandlerTest.kt` cover core command behavior.
+- `/Users/kevinlin/code/openclaw/src/gateway/android-node.capabilities.policy-config.test.ts` and `android-node.capabilities.policy-source.test.ts` cover live-suite policy config behavior.
+
+### Gitcrawl queries
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "notifications.list Android node" --json`
+
+Results:
+
+- Issue #48516 `Android node notification forwarding causes cross-session replies (WhatsApp duplicate sends to wrong group)`.
+- Issue #87058 `Android node connects but advertises zero commands ...`.
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "Android Health Connect read-only node commands" --json`
+
+Results:
+
+- Issue #78611 `[Feature]: Android Health Connect read-only node commands`.
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "Google Home API bridge Android app native smart-home" --json`
+
+Results:
+
+- Issue #78476 `Feature: Google Home API bridge in Android app for native smart-home control`.
+
+### Discrawl queries
+
+Query:
+
+`/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android node capabilities gateway commands"`
+
+Results:
+
+- 2026-03-19 GitHub mirror review note says the live suite now treats policy allowlist as part of runnable preconditions.
+- 2026-03-13 GitHub mirror review note asks to add a `callLog.search` profile to Android live capability checks.
+- 2026-03-13 support thread explains Canvas/camera/screen capability failures when no paired node is connected, the mobile app is backgrounded, or capabilities are not advertised.
--- a/docs/maturity-scorecard/inventory/android-app/report.md
+++ b/docs/maturity-scorecard/inventory/android-app/report.md
@@ -0,0 +1,237 @@
+---
+title: "Android app Maturity Report"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Android app Maturity Report
+
+## Top-level scores
+
+These rollups are simple arithmetic means over the category-note numeric
+scores in
+`scores.yaml`. Percentages are rounded to the nearest whole number.
+
+- Coverage: `Alpha (65%)`
+- Quality: `Alpha (62%)`
+- Completeness: `Alpha (65%)`
+- LTS Features: `0/7`
+
+## Summary
+
+This report promotes the archived `android-app` maturity evidence from `/Users/kevinlin/tmp/maturity/android-app` into the current process-version-3 inventory contract.
+
+The category Coverage and Quality scores come from the archived evidence-backed score rows. Completeness is initialized from the same archived evidence breadth and known-gap record, then joined with the surface-specific completeness rubric referenced by taxonomy.
+
+## Matrix
+
+| Category                                        | LTS | Coverage      | Quality       | Completeness  | Features to evaluate                                                                        |
+| ----------------------------------------------- | --- | ------------- | ------------- | ------------- | ------------------------------------------------------------------------------------------- |
+| [Media Capture](camera-media-capture.md)        | ❌  | `Alpha (66%)` | `Alpha (62%)` | `Alpha (66%)` | Camera and media capture                                                                    |
+| [Mobile Chat](chat-sessions-ui.md)              | ❌  | `Beta (70%)`  | `Alpha (66%)` | `Beta (70%)`  | Chat tab                                                                                    |
+| [Connection Setup](gateway-pairing-security.md) | ❌  | `Alpha (68%)` | `Alpha (64%)` | `Alpha (68%)` | Gateway discovery                                                                           |
+| [Distribution](install-release-distribution.md) | ❌  | `Alpha (60%)` | `Alpha (62%)` | `Alpha (60%)` | Public Google Play install path, Manual install path, Release smoke and startup performance |
+| [Settings](settings-permissions-diagnostics.md) | ❌  | `Alpha (64%)` | `Alpha (66%)` | `Alpha (64%)` | Settings sheet                                                                              |
+| [Voice](voice-talk-wake.md)                     | ❌  | `Alpha (66%)` | `Alpha (60%)` | `Alpha (66%)` | Voice tab                                                                                   |
+| [Device Runtime](node-device-capabilities.md)   | ❌  | `Alpha (62%)` | `Alpha (55%)` | `Alpha (62%)` | Background reconnect and presence, Device command availability                              |
+
+## Scoring rubric
+
+- Coverage:
+  maturity-label rating for integration, e2e, live, or server/runtime flow
+  evidence across the category. Unit tests can provide supporting context but never make a
+  feature covered by themselves.
+- Quality:
+  maturity-label rating for implementation and operational robustness. Unit,
+  integration, e2e, live, and real runtime-flow test coverage are Coverage
+  inputs only; they do not raise or lower Quality.
+- Completeness:
+  maturity-label rating for how fully the category delivers the intended
+  surface-specific capability set. Use the taxonomy-linked completeness
+  instructions for this surface.
+- LTS:
+  calculated as `quality > 80 and coverage > 90`, or when the matching
+  taxonomy category sets `human_lts_override`.
+- Shared score bands:
+  `Lovable = 95-100`, `Stable = 80-95`, `Beta = 70-80`,
+  `Alpha = 50-70`, and `Experimental = 0-50`. At shared boundaries, choose the
+  higher maturity label.
+- Major quality/completeness gaps:
+  evidence text only, tracked in the detailed feature inventory rather than as a
+  separate scored dimension.
+
+## Detailed feature inventory
+
+### 1. Media Capture
+
+Search anchors: camera.list, camera.capture, screen capture.
+
+Category note: [Media Capture](camera-media-capture.md)
+
+Score decisions:
+
+- Coverage: `Alpha (66%)`
+- Quality: `Alpha (62%)`
+- Completeness: `Alpha (66%)`
+- LTS: ❌
+
+Features:
+
+- Camera and media capture: Camera listing, capture, photo, screen, and media capture behavior.
+
+Primary docs:
+
+- `docs/platforms/android.md`
+- `docs/nodes/camera.md`
+
+### 2. Mobile Chat
+
+Search anchors: Chat tab, chat.history, mobile UI.
+
+Category note: [Mobile Chat](chat-sessions-ui.md)
+
+Score decisions:
+
+- Coverage: `Beta (70%)`
+- Quality: `Alpha (66%)`
+- Completeness: `Beta (70%)`
+- LTS: ❌
+
+Features:
+
+- Chat tab: Chat tab, session list/filtering, composer, image attachments, message parsing/rendering, model/provider status adjacent to chat, and Gateway chat RPC integration
+
+Primary docs:
+
+- `docs/platforms/android.md`
+
+### 3. Connection Setup
+
+Search anchors: Setup Code, Manual, Bonjour.
+
+Category note: [Connection Setup](gateway-pairing-security.md)
+
+Score decisions:
+
+- Coverage: `Alpha (68%)`
+- Quality: `Alpha (64%)`
+- Completeness: `Alpha (68%)`
+- LTS: ❌
+
+Features:
+
+- Gateway discovery: Gateway discovery, setup-code and manual endpoint parsing, WS/WSS connection setup, TLS trust decisions, device identity, stored device tokens, node/operator auth, and connection error handling
+
+Primary docs:
+
+- `docs/platforms/android.md`
+- `docs/gateway/bonjour.md`
+- `docs/gateway/pairing.md`
+
+### 4. Distribution
+
+Search anchors: Google Play, Manual, Startup macrobenchmark.
+
+Category note: [Distribution](install-release-distribution.md)
+
+Score decisions:
+
+- Coverage: `Alpha (60%)`
+- Quality: `Alpha (62%)`
+- Completeness: `Alpha (60%)`
+- LTS: ❌
+
+Features:
+
+- Public Google Play install path: Public Google Play install path and source build/run entrypoints
+- Manual install path: Manual install path and Google Play distribution behavior.
+- Release smoke and startup performance: Release smoke and startup performance checks for Android app distribution.
+
+Primary docs:
+
+- `docs/platforms/android.md`
+
+### 5. Settings
+
+Search anchors: Settings sheet, Notification forwarding, diagnostics.
+
+Category note: [Settings](settings-permissions-diagnostics.md)
+
+Score decisions:
+
+- Coverage: `Alpha (64%)`
+- Quality: `Alpha (66%)`
+- Completeness: `Alpha (64%)`
+- LTS: ❌
+
+Features:
+
+- Settings sheet: Settings sheet and settings detail screens, permission request UX, notification forwarding controls, Nodes & Devices status, provider/model diagnostics, secure preferences, and copyable Gateway diagnostic report
+
+Primary docs:
+
+- `docs/platforms/android.md`
+
+### 6. Voice
+
+Search anchors: Talk Mode, Voice tab, wake.
+
+Category note: [Voice](voice-talk-wake.md)
+
+Score decisions:
+
+- Coverage: `Alpha (66%)`
+- Quality: `Alpha (60%)`
+- Completeness: `Alpha (66%)`
+- LTS: ❌
+
+Features:
+
+- Voice tab: Voice tab, manual mic capture, Talk Mode listen/think/speak loop, Gateway Talk config, talk.speak, realtime relay mode, voice capture service type, and voice e2e receiver/script
+
+Primary docs:
+
+- `docs/platforms/android.md`
+- `docs/nodes/talk.md`
+
+### 7. Device Runtime
+
+Search anchors: foreground service, node.presence.alive, background reconnect, Additional Android command families, node capabilities, command handling.
+
+Category note: [Device Runtime](node-device-capabilities.md)
+
+Score decisions:
+
+- Coverage: `Alpha (62%)`
+- Quality: `Alpha (55%)`
+- Completeness: `Alpha (62%)`
+- LTS: ❌
+
+Features:
+
+- Background reconnect and presence: Foreground-service presence, reconnect, and node presence behavior.
+- Device command availability: Android device command availability and capability advertisement.
+
+Primary docs:
+
+- `docs/platforms/android.md`
+- `docs/nodes/troubleshooting.md`
+- `docs/gateway/protocol.md`
+
+## Recommended scorecard interpretation
+
+Use this migrated score as the current inventory baseline. Refresh individual categories with live category-agent research before treating a high score as an LTS promotion gate.
+
+## Out of scope for this surface
+
+- Redefining taxonomy category boundaries; taxonomy remains the source of truth for category identity, features, docs, and search anchors.
+
+## Audit provenance
+
+- Score source:
+  `docs/kevinslin/maturity-scorecard/inventory/android-app/scores.yaml`.
+- Taxonomy metadata source:
+  `.agents/skills/claw-score/taxonomy.yaml`.
+- Archived evidence source:
+  `/Users/kevinlin/tmp/maturity/android-app`.
--- a/docs/maturity-scorecard/inventory/android-app/scores.yaml
+++ b/docs/maturity-scorecard/inventory/android-app/scores.yaml
@@ -0,0 +1,38 @@
+version: 1
+process_version: 3
+data:
+  - name: Media Capture
+    category_note: camera-media-capture.md
+    coverage: 66
+    quality: 62
+    completeness: 66
+  - name: Mobile Chat
+    category_note: chat-sessions-ui.md
+    coverage: 70
+    quality: 66
+    completeness: 70
+  - name: Connection Setup
+    category_note: gateway-pairing-security.md
+    coverage: 68
+    quality: 64
+    completeness: 68
+  - name: Distribution
+    category_note: install-release-distribution.md
+    coverage: 60
+    quality: 62
+    completeness: 60
+  - name: Settings
+    category_note: settings-permissions-diagnostics.md
+    coverage: 64
+    quality: 66
+    completeness: 64
+  - name: Voice
+    category_note: voice-talk-wake.md
+    coverage: 66
+    quality: 60
+    completeness: 66
+  - name: Device Runtime
+    category_note: node-device-capabilities.md
+    coverage: 62
+    quality: 55
+    completeness: 62
--- a/docs/maturity-scorecard/inventory/android-app/settings-permissions-diagnostics.md
+++ b/docs/maturity-scorecard/inventory/android-app/settings-permissions-diagnostics.md
@@ -0,0 +1,112 @@
+---
+title: "Android app - Settings Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Android app - Settings Maturity Note
+
+## Summary
+
+Android settings and diagnostics cover a large operator surface: profile, Gateway settings, camera/location/mic/photos/motion/SMS/call-log/notification permissions, notification forwarding policy, nodes/devices view, provider/model state, and copyable Gateway diagnostics. Coverage is Alpha because the source and unit coverage are broad but no integrated operator recovery scenario was found. Quality is Alpha but stronger than the background service because the app has clear safety controls, policy filters, and copyable diagnostic text.
+
+## Category Scope
+
+Included in this category:
+
+- Settings sheet: Settings sheet and settings detail screens, permission request UX, notification forwarding controls, Nodes & Devices status, provider/model diagnostics, secure preferences, and copyable Gateway diagnostic report
+
+## Features
+
+- Settings sheet: Settings sheet and settings detail screens, permission request UX, notification forwarding controls, Nodes & Devices status, provider/model diagnostics, secure preferences, and copyable Gateway diagnostic report
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Alpha (64%)`
+- Positive signals: Docs describe permission prerequisites and notification forwarding controls. Source implements permission launchers, rationale/settings dialogs, notification allowlist/blocklist/quiet-hours/rate-limit/session controls, node/device status panels, and diagnostic report copy. Unit tests cover several settings and policy helpers.
+- Negative signals: No integrated Android operator recovery flow was found for "Gateway offline", "pairing/auth failure", "missing permission", "notification listener disabled", and "node capability unavailable" from one UI path.
+- Integration gaps: Need a settings/diagnostics scenario that starts from common failures, copies diagnostics, changes permissions/policy, reconnects Gateway, and verifies the corresponding command/capability state changes.
+
+## Quality Score
+
+- Score: `Alpha (66%)`
+- Gitcrawl reports: `Android light mode theme toggle` found issue #87688 requesting a light mode/theme toggle. More capability-specific searches found future Health Connect and Google Home requests, which imply settings will need more capability management as Android expands.
+- Discrawl reports: `Android settings permissions diagnostics notification forwarding` returned no direct hits.
+- Good qualities: Permission prompts are centralized and can show rationale/settings dialogs; notification forwarding has allowlist/blocklist, quiet hours, rate limiting, session key, safer self-package handling, and app-picker UI; Gateway diagnostics text tells users what commands and facts to provide.
+- Bad qualities: Operator recovery is spread across several screens, theme/accessibility customization is incomplete, and there is no recorded live flow tying settings changes to Gateway/node capability changes.
+- Excluded from quality: Test coverage and runtime-flow proof were not used to raise or lower Quality.
+
+## Completeness Score
+
+- Score: `Alpha (64%)`
+- Surface instructions: evaluated against `references/completeness/android-app.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Settings sheet.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Add a compact Android diagnostics runbook for connection, permissions, notification forwarding, and node command availability.
+- Add live proof that settings toggles update advertised capabilities without stale Gateway state.
+- Decide whether theme/accessibility options are part of the Android app support promise before promotion.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/platforms/android.md` documents Android permissions, notification forwarding controls, connection diagnostics, and related troubleshooting links.
+- `/Users/kevinlin/code/openclaw/apps/android/README.md` documents rebuild items for settings restyle, permission requests in onboarding/settings, push notifications, security hardening, and Play restricted permissions.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/SettingsSheet.kt` implements broad settings and permission controls, notification forwarding UI, assistant role state, camera/location/mic/photos/motion/SMS/call-log availability, and installed-app picker state.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/NodesDevicesSettingsScreen.kt` shows live nodes, paired devices, pending device requests, status badges, and refresh/error states.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/PermissionRequester.kt` centralizes missing-permission requests, rationale dialogs, timeouts, and settings redirects.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/NotificationForwardingPolicy.kt` implements package allow/block filtering, quiet-hours evaluation, and burst limiting.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/GatewayDiagnostics.kt` builds a copyable diagnostic prompt with screen, app version, device, Android SDK, gateway address, and status/error.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/SecurePrefs.kt` persists app, Gateway, notification, and device settings.
+
+### Integration tests
+
+- No integrated Android settings/operator recovery scenario was found.
+- `/Users/kevinlin/code/openclaw/src/gateway/android-node.capabilities.live.test.ts` indirectly depends on settings-controlled command availability and policy allowlists.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/PermissionRequesterTest.kt`, `NotificationForwardingPolicyTest.kt`, `SecurePrefsTest.kt`, and `SecurePrefsNotificationForwardingTest.kt` cover permission and settings helpers.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/ui/SettingsSheetNotificationAppsTest.kt`, `ProviderModelStatusTest.kt`, and `GatewayConfigResolverTest.kt` cover settings UI helpers.
+
+### Gitcrawl queries
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "Android light mode theme toggle" --json`
+
+Results:
+
+- Issue #87688 `Android app: Add light mode / theme toggle`.
+- Issue #28300 `Theme Customization System - Preset Themes + Custom Theme Studio` as adjacent theme work.
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "Android app settings permissions diagnostics" --json`
+
+Results:
+
+- No direct hits.
+
+### Discrawl queries
+
+Query:
+
+`/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android settings permissions diagnostics notification forwarding"`
+
+Results:
+
+- No direct hits.
--- a/docs/maturity-scorecard/inventory/android-app/voice-talk-wake.md
+++ b/docs/maturity-scorecard/inventory/android-app/voice-talk-wake.md
@@ -0,0 +1,107 @@
+---
+title: "Android app - Voice Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Android app - Voice Maturity Note
+
+## Summary
+
+Android voice has moved beyond a placeholder: the app exposes manual mic dictation and Talk Mode UI, Gateway `talk.speak` synthesis with local fallback, realtime relay plumbing, foreground-service microphone type switching, voice e2e scripts, and focused unit coverage. Coverage is Alpha because the strongest e2e artifact is a debug-script path rather than a repeated user install flow. Quality is Alpha because the archive records voice churn, a prior mic-thrashing loop, and unresolved requests around agent/session switching and per-agent TTS voice.
+
+## Category Scope
+
+Included in this category:
+
+- Voice tab: Voice tab, manual mic capture, Talk Mode listen/think/speak loop, Gateway Talk config, talk.speak, realtime relay mode, voice capture service type, and voice e2e receiver/script
+
+## Features
+
+- Voice tab: Voice tab, manual mic capture, Talk Mode listen/think/speak loop, Gateway Talk config, talk.speak, realtime relay mode, voice capture service type, and voice e2e receiver/script
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Alpha (66%)`
+- Positive signals: Docs describe manual mic and Talk capture modes, Android 14+ foreground-service microphone requirements, Gateway `talk.speak`, local TTS fallback, realtime Gateway relay conditions, and Voice Wake being disabled in UX/runtime. The debug voice e2e script can run normal and realtime voice paths through an installed debug app.
+- Negative signals: Voice proof depends on debug receiver/script setup and synthetic transcripts; no recurring real-device audio latency, microphone permission, speech-recognizer failure, provider fallback, and background/foreground scenario was found.
+- Integration gaps: Need a signed-app voice scorecard that grants microphone permission, runs manual mic and Talk Mode, exercises `talk.speak` fallback, verifies realtime relay when configured, backgrounds/reopens the app, and records failure classifications.
+
+## Quality Score
+
+- Score: `Alpha (60%)`
+- Gitcrawl reports: `Android Talk Mode` found issue #56613 requesting Voice/Talk tab agent switching and per-agent TTS voice, plus PR #80082 adjusting Android foreground-service use for Talk Mode. `Android app` search also surfaces the broader app rebuild/release context.
+- Discrawl reports: Search found a landed PR comment for #66179 exposing Talk Mode in UI and foreground microphone permission; a comment closing #47883 after replacing a mic thrashing loop with the new manual mic path; and a support message noting older Android partial voice behavior and voice-wake/talk-mode churn.
+- Good qualities: The current source separates manual mic from Talk Mode, checks microphone permission, handles speech recognizer availability, tracks listening/speaking state, pauses capture during TTS, and falls back from Gateway `talk.speak` when eligible.
+- Bad qualities: Voice behavior is constrained by Android speech recognizer availability, audio focus, foreground-service policy, provider configuration, and session routing. User-facing requests for Talk tab agent/session switching remain open.
+- Excluded from quality: Test coverage and runtime-flow proof were not used to raise or lower Quality.
+
+## Completeness Score
+
+- Score: `Alpha (66%)`
+- Surface instructions: evaluated against `references/completeness/android-app.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Voice tab.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Add real-device audio and provider failure scorecards for manual mic and Talk Mode.
+- Add Voice tab session/agent selection if Android is expected to match Chat session controls.
+- Keep docs aligned with actual Voice Wake state; current docs correctly say Android Voice Wake remains disabled.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/platforms/android.md` documents manual Mic, continuous Talk Mode, foreground-service microphone behavior, Gateway `talk.speak`, local TTS fallback, realtime relay conditions, and disabled Voice Wake.
+- `/Users/kevinlin/code/openclaw/apps/android/README.md` lists Voice tab full functionality in the rebuild checklist and documents the `voice-e2e.sh` script.
+- `/Users/kevinlin/code/openclaw/docs/nodes/talk.md` is the shared Talk behavior reference.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/ui/VoiceScreen.kt` exposes manual dictation and Talk UI, permission prompts, speaker toggle, status, and transcript rendering.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/voice/MicCaptureManager.kt` implements manual mic transcription, queueing, Gateway send, TTS pause/resume, and pending run timeout.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkModeManager.kt` implements Talk Mode listening, speech recognizer lifecycle, chat finalization, realtime relay, audio playback, and interruption controls.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/voice/TalkSpeakClient.kt` calls Gateway `talk.speak` and classifies local fallback.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/main/java/ai/openclaw/app/NodeForegroundService.kt` promotes Talk Mode to `dataSync|microphone`.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/debug/java/ai/openclaw/app/VoiceE2eReceiver.kt` supports debug voice e2e orchestration.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/apps/android/scripts/voice-e2e.sh` installs the debug app, grants `RECORD_AUDIO`, uses `adb reverse`, drives normal and realtime voice modes through `VoiceE2eReceiver`, captures screenshots, and saves filtered logcat.
+- No repeated signed Play build voice scenario was found.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/voice/MicCaptureManagerTest.kt`, `TalkModeManagerTest.kt`, `TalkSpeakClientTest.kt`, `TalkAudioPlayerTest.kt`, `TalkDirectiveParserTest.kt`, `TalkModeConfigParsingTest.kt`, `VoiceWakeCommandExtractorTest.kt`, `VoiceWakeManagerTest.kt`, and `ChatEventTextTest.kt` cover the main voice helpers.
+- `/Users/kevinlin/code/openclaw/apps/android/app/src/test/java/ai/openclaw/app/NodeForegroundServiceTest.kt` covers foreground-service type behavior.
+
+### Gitcrawl queries
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "Android Talk Mode" --json`
+
+Results:
+
+- Issue #56613 `[Feature]: Talk/Voice tab - agent/session switching + per-agent TTS voice`.
+- PR #80082 `fix(android): avoid dataSync FGS for persistent node`.
+
+### Discrawl queries
+
+Query:
+
+`/Users/kevinlin/.local/bin/discrawl search --mode fts --limit 5 "Android Talk Mode Voice tab"`
+
+Results:
+
+- 2026-04-25 GitHub mirror comment on #66179 says Android Talk Mode UI and foreground microphone permission landed.
+- 2026-04-25 GitHub mirror comment on #47883 says the prior mic thrashing loop was replaced by manual Voice tab backed by `MicCaptureManager`.
+- 2026-03-28 GitHub mirror issue #56613 requests Voice/Talk tab agent switching and per-agent TTS voice.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/auth-onboarding-and-credential-profile-health.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/auth-onboarding-and-credential-profile-health.md
@@ -0,0 +1,147 @@
+---
+title: "Anthropic provider path - Provider Auth and Recovery Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Anthropic provider path - Provider Auth and Recovery Maturity Note
+
+## Summary
+
+Anthropic auth has first-class docs and source paths for API keys, Claude CLI
+credential reuse, setup-token profiles, auth profile ordering, and doctor hints.
+Coverage is Stable because the direct API-key and Claude CLI paths are present
+in docs, plugin registration, provider auth choices, config defaults, and
+focused tests. Quality is Beta because GitHub and Discord archive evidence still
+shows users hitting orphaned profiles, gateway-host credential mismatch, stale
+setup-token or OAuth behavior, and "No API key found" confusion.
+
+## Category Scope
+
+Included in this category:
+
+- API-key onboarding: Covers API-key onboarding across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
+- Claude CLI credential reuse: Covers Claude CLI credential reuse across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
+- Setup-token auth: Covers Setup-token auth across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
+- Auth profile health: Covers Auth profile health across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
+- Model status: Covers Model status across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+- Usage windows: Covers Usage windows across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+- Cooldown/profile reporting: Covers Cooldown/profile reporting across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+- Long-context recovery: Covers Long-context recovery across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+- Fallback guidance: Covers Fallback guidance across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+
+## Features
+
+- API-key onboarding: Covers API-key onboarding across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
+- Claude CLI credential reuse: Covers Claude CLI credential reuse across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
+- Setup-token auth: Covers Setup-token auth across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
+- Auth profile health: Covers Auth profile health across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
+- Model status: Covers Model status across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+- Usage windows: Covers Usage windows across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+- Cooldown/profile reporting: Covers Cooldown/profile reporting across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+- Long-context recovery: Covers Long-context recovery across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+- Fallback guidance: Covers Fallback guidance across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Stable (84%)`
+- Positive signals: Anthropic docs describe API-key, Claude CLI, and setup-token style paths; `extensions/anthropic/openclaw.plugin.json` publishes auth choices and setup env vars; `extensions/anthropic/register.runtime.ts` implements API-key auth, setup-token auth, Claude CLI migration, synthetic auth, and doctor hints.
+- Negative signals: Setup-token live proof is env-gated, and profile health depends on per-agent auth stores and gateway-host runtime state.
+- Integration gaps: The audit found strong focused tests and one live setup-token lane, but not a repeated release artifact proving API-key, setup-token, and Claude CLI migration across fresh hosts every release.
+
+## Quality Score
+
+- Score: `Beta (74%)`
+- Gitcrawl reports: #83268 reports Anthropic API keys being registered under an orphan `claude` provider and silently falling back to OAuth; #72255 reports orphaned per-agent credentials after config declarations are removed; #80514 reports a Claude Pro Max cap warning being classified as billing failure.
+- Discrawl reports: Discord archive results include "No API key found for provider anthropic" cases tied to shell versus daemon runtime mismatch, stale/broken auth stores, setup-token policy failures, and profile order confusion.
+- Good qualities: The provider owns explicit auth methods, validates setup-token shape, writes auth profiles with locks, resolves Claude CLI native auth only for the synthetic `claude-cli` provider, and emits doctor guidance for legacy profile repair.
+- Bad qualities: Users still need to understand several credential planes: gateway token, Anthropic API key, Anthropic setup-token, Claude CLI native auth, per-agent auth store, profile order, cooldown, and daemon environment.
+- Excluded from quality: Unit, integration, e2e, live, and real runtime-flow test presence or absence; those are Coverage inputs only.
+
+## Completeness Score
+
+- Score: `Stable (84%)`
+- Surface instructions: evaluated against `references/completeness/anthropic-provider-path.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for API-key onboarding, Claude CLI credential reuse, Setup-token auth, Auth profile health, Model status, Usage windows, Cooldown/profile reporting, Long-context recovery, Fallback guidance.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Setup-token behavior can be upstream-policy dependent and less predictable
+  than API-key auth.
+- Per-agent auth store routing and daemon environment mismatch remain frequent
+  support themes.
+- Anthropic API-key and Claude CLI routes share the provider label but have
+  materially different billing and operational behavior.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents API-key setup, Claude CLI setup, setup-token-style troubleshooting, `openclaw models list --provider anthropic`, and states that API keys are the clearest production path for long-lived gateways.
+- `/Users/kevinlin/code/openclaw/docs/gateway/doctor.md` documents OAuth expiry and stale Anthropic profile repair guidance.
+- `/Users/kevinlin/code/openclaw/docs/gateway/configuration-examples.md` includes Anthropic API-key profile examples and model/fallback config.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/openclaw.plugin.json` declares `providers: ["anthropic"]`, setup env vars `ANTHROPIC_OAUTH_TOKEN` and `ANTHROPIC_API_KEY`, and provider auth choices for Claude CLI, setup-token, and API key.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/register.runtime.ts` implements `runAnthropicSetupTokenAuth`, `runAnthropicSetupTokenNonInteractive`, `runAnthropicCliMigration`, `runAnthropicCliMigrationNonInteractive`, `resolveClaudeCliSyntheticAuth`, `createProviderApiKeyAuthMethod`, and `buildAnthropicAuthDoctorHint`.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/config-defaults.ts` resolves Anthropic default auth mode from profile order, API-key profiles, OAuth/token profiles, and env vars before seeding cache/heartbeat/default model behavior.
+- `/Users/kevinlin/code/openclaw/src/commands/doctor-claude-cli.ts` inspects Claude CLI command, credential readability, workspace/project directory health, and selected `claude-cli` runtime agents.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/agents/anthropic.setup-token.live.test.ts` env-gates a live setup-token profile smoke that resolves Anthropic models, extracts the profile API key, and completes a simple prompt.
+- `/Users/kevinlin/code/openclaw/test/scripts/package-acceptance-workflow.test.ts` verifies package-acceptance workflow wiring for Anthropic credentials and live Anthropic profiles.
+- `/Users/kevinlin/code/openclaw/src/commands/models.list.e2e.test.ts` covers provider catalog/auth rows that include configured provider behavior.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/index.test.ts` covers Claude CLI auth profile migration, synthetic OAuth/token auth, API defaulting, Anthropic config defaults, and doctor-profile hooks.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/provider-policy-api.test.ts` covers provider policy normalization, API-key defaults, Claude CLI config normalization, and thinking profile exposure.
+- `/Users/kevinlin/code/openclaw/src/commands/doctor-claude-cli.test.ts` covers Claude CLI doctor behavior.
+- `/Users/kevinlin/code/openclaw/src/llm/utils/oauth/anthropic.test.ts` covers Anthropic OAuth login and refresh utilities.
+
+### Gitcrawl queries
+
+Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic API key auth profile setup-token claude-cli"`
+
+Results:
+
+- #83268 `[Bug]: Anthropic API key pasted via wizard is registered under provider claude (orphan), silently falling back to OAuth`.
+- #72255 `[Bug]: Pi runtime silently uses orphaned credentials from per-agent auth-profiles.json after their declarations are removed from openclaw.json`.
+
+Query: `gitcrawl --json search issues -R openclaw/openclaw "anthropic claude-cli auth login setup token"`
+
+Results:
+
+- #70279 `claude-cli backend silently skipped on systemd-managed root gateway, never spawns subprocess`.
+- #72255 also appeared as an auth-store leak issue.
+
+Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic usage status API key Claude"`
+
+Results:
+
+- #83268 repeated the orphaned provider registration issue.
+- #80514 reported Claude Pro Max cap warning classification causing a false billing cooldown.
+
+### Discrawl queries
+
+Query: `discrawl search --limit 10 "Anthropic API key no credentials profile"`
+
+Results:
+
+- Returned support threads for "No API key found for provider anthropic" where shell status and daemon runtime disagreed, setup-token paths were confused with API keys, auth profiles won over env vars, and profile order/cooldown affected fallback behavior.
+
+Query: `discrawl search --limit 10 "Anthropic usage status Claude API key"`
+
+Results:
+
+- Returned discussions where users confused Claude account usage, Anthropic API-key billing, extra-usage errors, and active profile source in `openclaw models status`.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/claude-cli-runtime-and-session-bridge.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/claude-cli-runtime-and-session-bridge.md
@@ -0,0 +1,136 @@
+---
+title: "Anthropic provider path - Claude CLI Backend Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Anthropic provider path - Claude CLI Backend Maturity Note
+
+## Summary
+
+The Claude CLI runtime is a supported bundled backend with docs, plugin
+registration, MCP bridge config, live stdio session defaults, permission-mode
+normalization, session resume, and `/think` effort mapping. Coverage is Stable
+because the main runtime contract is documented and implemented. Quality is
+Alpha because archive evidence shows active user-visible failures around
+backend registration, systemd/root gateway execution, permissions, stream
+buffering, and session resume.
+
+## Category Scope
+
+This category covers OpenClaw's host-local Claude CLI path after auth is
+available: the `claude-cli` backend, its command/args/env defaults, MCP tool
+bridge, native tool mode, live stdio JSONL sessions, permission-mode mapping,
+thinking effort args, session id persistence, transcript validation, and
+fallback prelude behavior.
+
+## Features
+
+- Runtime selection: Covers Runtime selection across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+- Session continuity: Covers Session continuity across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+- MCP/tool bridge: Covers MCP/tool bridge across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+- Permission-mode mapping: Covers Permission-mode mapping across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+- Fallback prelude: Covers Fallback prelude across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Stable (82%)`
+- Positive signals: Docs cover Claude CLI setup, config, sessions, permissions, thinking effort, and fallback prelude; source registers a full backend with live stdio defaults and MCP bridge; tests cover backend registration and config normalization.
+- Negative signals: Live Claude CLI coverage is largely indirect through package-acceptance workflow definitions and plugin/unit tests rather than a single direct live runtime test in this audit.
+- Integration gaps: Channel-session and daemon/root gateway paths have archived failures that are not obviously covered by the focused backend tests.
+
+## Quality Score
+
+- Score: `Alpha (68%)`
+- Gitcrawl reports: #70279 reports the backend being skipped on a systemd-managed root gateway; #85408 reports hardcoded MCP flags blocking user-scope MCPs; #85601 reports a bundled MCP config tempDir race; #86050 reports Gateway buffering Claude CLI stream events; #78828 reports root gateway permission-mode stalls.
+- Discrawl reports: Discord archive results include `MissingAgentHarnessError: claude-cli is not registered` in Discord group chats while DMs worked, plus guidance showing config path divergence across session routing.
+- Good qualities: The backend has conservative default args, clears inherited Claude/Anthropic env that could steer child processes, serializes runs, validates project transcript resume, and maps OpenClaw exec policy into Claude permission mode.
+- Bad qualities: The path depends on external CLI installation, local login, host PATH, local project transcript files, channel/session runtime lookup, and provider-owned CLI behavior.
+- Excluded from quality: Unit, integration, e2e, live, and real runtime-flow test presence or absence; those are Coverage inputs only.
+
+## Completeness Score
+
+- Score: `Stable (82%)`
+- Surface instructions: evaluated against `references/completeness/anthropic-provider-path.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Runtime selection, Session continuity, MCP/tool bridge, Permission-mode mapping, Fallback prelude.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- The Claude CLI path is operationally sensitive to host setup and session
+  routing.
+- Group/channel session paths have shown runtime lookup divergence from DM/main
+  session paths.
+- Some fixes appear as active or recent PRs/issues, so the lived support record
+  is still noisy.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/gateway/cli-backends.md` documents the `claude-cli` backend, MCP bridge behavior, session support, native permission mapping, thinking effort mapping, login prerequisites, session resume, and fallback prelude.
+- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents Claude CLI as the host-local credential reuse path and warns about same-host expectations.
+- `/Users/kevinlin/code/openclaw/docs/gateway/config-agents.md` recommends canonical `anthropic/*` model refs plus model-scoped `agentRuntime.id: "claude-cli"`.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/cli-backend.ts` registers `claude-cli` with `bundleMcp`, Claude config-file bridge, native tool mode, stream-json args, live stdio sessions, workspace-scoped image args, session ids, raw transcript reseed, watchdog defaults, and serialization.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/cli-shared.ts` clears inherited Anthropic/Claude env vars, normalizes `--setting-sources`, maps OpenClaw exec policy to Claude permission mode, and maps OpenClaw thinking levels to `--effort`.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/config-defaults.ts` backfills `agentRuntime.id: "claude-cli"` for selected canonical Anthropic refs when Claude CLI auth is selected.
+- `/Users/kevinlin/code/openclaw/src/commands/doctor-claude-cli.ts` checks command resolution, credentials, workspace/project directory health, and active Claude CLI runtime agents.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/test/scripts/package-acceptance-workflow.test.ts` verifies live Anthropic and Claude CLI workflow wiring, including `OPENCLAW_LIVE_CLI_BACKEND_MODEL=claude-cli/claude-sonnet-4-6` and package install of `@anthropic-ai/claude-code`.
+- `/Users/kevinlin/code/openclaw/scripts/e2e/mcp-channels-docker.sh` and `/Users/kevinlin/code/openclaw/scripts/e2e/mcp-channels-docker-client.ts` cover MCP channel notification/permission framing adjacent to Claude channel mode.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/index.test.ts` verifies `claude-cli` backend registration, config defaults, auth migration, and synthetic auth.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/cli-shared.test.ts` verifies permission args, safe setting sources, effort mapping, config normalization, and transcript reseed config.
+- `/Users/kevinlin/code/openclaw/src/commands/doctor-claude-cli.test.ts` covers doctor diagnostics for the Claude CLI path.
+- `/Users/kevinlin/code/openclaw/src/plugins/bundle-claude-inspect.test.ts` covers bundled Claude inspection behavior.
+
+### Gitcrawl queries
+
+Query: `gitcrawl --json search issues -R openclaw/openclaw "claude-cli live session resume transcript missing permission mode"`
+
+Results:
+
+- Returned no direct results for that exact combined query.
+
+Query: `gitcrawl --json search issues -R openclaw/openclaw "Claude CLI OpenClaw MCP allowedTools permission-mode"`
+
+Results:
+
+- #85408 `openclaw agent CLI spawn hardcodes --strict-mcp-config + --allowedTools mcp__openclaw__*, blocking user-scope MCPs`.
+- #85601 `[regression] Bundled MCP config tempDir race still present`.
+- #86050 `[Bug]: Gateway buffers claude-cli stream events; surfaces only see the final assembled message`.
+- #78828 `Claude CLI on root gateway: inferred bypassPermissions breaks, acceptEdits partly works, blocked turns can stall until timeout`.
+
+Query: `gitcrawl --json search prs -R openclaw/openclaw "claude-cli"`
+
+Results:
+
+- Returned active/recent PRs including #73122 backend registration guardrails, #74990 subscription path in onboard wizard, #85505 host-only CLI auth epoch mode, #87702 env-var scrubbing when spawning Claude, #77148 session fork-on-resume, #86649 partial-message streaming deltas, and #86568 auth cooldown skip for CLI providers.
+
+### Discrawl queries
+
+Query: `discrawl search --limit 10 "Claude CLI OpenClaw auth login claude-cli"`
+
+Results:
+
+- Returned a May 26, 2026 support thread where Discord DMs worked but group chats failed with `MissingAgentHarnessError: Requested agent harness "claude-cli" is not registered`, plus older archive entries closing Claude CLI persistence issues and noting implemented CLI delegation.
+
+Query: `discrawl search --limit 10 "Anthropic usage status Claude API key"`
+
+Results:
+
+- Returned April 2026 guidance recommending the CLI subprocess path for Claude subscription usage and warning about direct API-key/API billing configuration.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/diagnostics-usage-status-and-fallback-recovery.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/diagnostics-usage-status-and-fallback-recovery.md
@@ -0,0 +1,142 @@
+---
+title: "Anthropic provider path - Diagnostics and Recovery Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Anthropic provider path - Diagnostics and Recovery Maturity Note
+
+## Summary
+
+Anthropic diagnostics cover `models status`, auth profile health, OAuth/token
+usage windows, doctor hints, long-context 429 troubleshooting, cooldowns, and
+fallback guidance. Coverage is Beta because important diagnostics exist across
+docs, source, and tests, but the diagnostic surface is split across model
+status, doctor, provider usage, and user-runbook flows. Quality is Beta because
+archive evidence shows users still struggle to distinguish Anthropic API-key
+billing, Claude account usage, cooldowns, extra-usage errors, and profile-store
+state.
+
+## Category Scope
+
+This category covers operator diagnostics and recovery for Anthropic provider
+failures: status output, usage windows, auth profile source reporting, cooldown
+and disabled profile reporting, doctor hints, long-context 429 remediation,
+missing credentials guidance, fallback setup, and provider error/billing
+classification.
+
+## Features
+
+- Model status: Covers Model status across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+- Usage windows: Covers Usage windows across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+- Cooldown/profile reporting: Covers Cooldown/profile reporting across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+- Long-context recovery: Covers Long-context recovery across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+- Fallback guidance: Covers Fallback guidance across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Beta (78%)`
+- Positive signals: Docs cover common Anthropic credential errors and long-context 429s; source fetches Claude usage windows, reports doctor hints, and handles provider auth profile ordering/cooldowns; tests cover usage fetch and doctor behavior.
+- Negative signals: Recovery is distributed across `models status`, `doctor`, troubleshooting docs, model fallback config, and auth-profile commands rather than one cohesive Anthropic diagnostics workflow.
+- Integration gaps: The audit did not find a single live failure-to-repair scenario test for Anthropic auth, usage, cooldown, and fallback recovery.
+
+## Quality Score
+
+- Score: `Beta (70%)`
+- Gitcrawl reports: #80514 reports Claude Pro Max cap warning being classified as billing failure; #83268 reports API-key profile orphaning; #63145 requests per-model health probing across configured models; PR #85666 skips Anthropic API keys for usage status; PR #87697 clears stale provider cooldowns after reauth.
+- Discrawl reports: Discord archive results include users seeing "out of extra usage", invalid bearer token, Anthropic API-key versus Claude account billing confusion, profile cooldown confusion, and daemon/auth-store mismatches.
+- Good qualities: Docs name concrete commands, usage fetch handles OAuth/web fallback, doctor reports stale OAuth profiles and refresh guidance, and troubleshooting distinguishes credential eligibility from config shape.
+- Bad qualities: Users still need to map upstream Anthropic billing and auth semantics to OpenClaw's auth-store, cooldown, fallback, and model-status vocabulary.
+- Excluded from quality: Unit, integration, e2e, live, and real runtime-flow test presence or absence; those are Coverage inputs only.
+
+## Completeness Score
+
+- Score: `Beta (78%)`
+- Surface instructions: evaluated against `references/completeness/anthropic-provider-path.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Model status, Usage windows, Cooldown/profile reporting, Long-context recovery, Fallback guidance.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- `models status` helps, but users still often need maintainer interpretation
+  to connect auth-store source, billing status, and model fallback behavior.
+- Usage status behavior differs by auth mode and scope.
+- The long-context 429 path is documented, but upstream eligibility remains
+  outside OpenClaw control.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` includes troubleshooting accordions for token invalidity, no API key, no profile, and all profiles in cooldown.
+- `/Users/kevinlin/code/openclaw/docs/gateway/troubleshooting.md` documents the exact long-context 429 symptom, commands to inspect logs/status/config, causes, and fix options.
+- `/Users/kevinlin/code/openclaw/docs/gateway/doctor.md` documents OAuth expiry/refresh behavior, Anthropic API-key or setup-token suggestions, and profile cooldown/disabled reporting.
+- `/Users/kevinlin/code/openclaw/docs/reference/prompt-caching.md` documents usage cache counters and Anthropic provider behavior.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/src/infra/provider-usage.fetch.claude.ts` fetches Anthropic OAuth usage windows, supports claude.ai web-session fallback for missing `user:profile` scope, and returns structured provider usage snapshots.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/register.runtime.ts` wires `fetchUsageSnapshot`, `resolveUsageAuth`, `buildAuthDoctorHint`, and `isCacheTtlEligible`.
+- `/Users/kevinlin/code/openclaw/src/commands/doctor-claude-cli.ts` checks Claude CLI command, credentials, workspace/project dirs, and profile store health.
+- `/Users/kevinlin/code/openclaw/src/agents/embedded-agent-helpers/provider-error-patterns.ts` and adjacent provider fallback helpers classify provider errors used in recovery/fallback decisions.
+- `/Users/kevinlin/code/openclaw/src/commands/models/list.status-command.ts` and related model list/status modules render provider/auth health.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/commands/models.list.e2e.test.ts` covers provider/auth catalog responsiveness in the models command surface.
+- `/Users/kevinlin/code/openclaw/test/scripts/package-acceptance-workflow.test.ts` verifies Anthropic credential requirements and live profile wiring in package acceptance.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/src/infra/provider-usage.fetch.claude.test.ts` covers Claude usage fetch behavior.
+- `/Users/kevinlin/code/openclaw/src/commands/doctor-claude-cli.test.ts` covers Claude CLI doctor diagnostics.
+- `/Users/kevinlin/code/openclaw/src/commands/models/list.status.test.ts` and related model status/list tests cover provider status rendering.
+- `/Users/kevinlin/code/openclaw/src/agents/embedded-agent-helpers/provider-error-patterns.test.ts` covers provider error classification.
+
+### Gitcrawl queries
+
+Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic usage status API key Claude"`
+
+Results:
+
+- #83268 reports API-key wizard provider orphaning.
+- #80514 reports Claude Pro Max cap warning being classified as a billing failure and creating a false cooldown.
+
+Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic 429 long context extra usage required fallback"`
+
+Results:
+
+- Returned no direct results for that exact issue query.
+
+Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic No API key found provider anthropic models status"`
+
+Results:
+
+- Returned related auth/provider-status issues including #63145 for per-model health checks and auth-profile issues from other providers.
+
+### Discrawl queries
+
+Query: `discrawl search --limit 10 "Anthropic usage status Claude API key"`
+
+Results:
+
+- Returned support threads about API-key billing versus Claude account usage, extra-usage errors, invalid bearer token, provider profile source, and what to inspect in `openclaw models status`.
+
+Query: `discrawl search --limit 10 "Claude 4.6 1M context Anthropic 429"`
+
+Results:
+
+- Returned long-context 429 troubleshooting guidance and extra-usage eligibility notes.
+
+Query: `discrawl search --limit 10 "Anthropic API key no credentials profile"`
+
+Results:
+
+- Returned profile/store mismatch, no-key/no-profile, and setup-token confusion threads.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/direct-anthropic-messages-transport-and-streaming.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/direct-anthropic-messages-transport-and-streaming.md
@@ -0,0 +1,142 @@
+---
+title: "Anthropic provider path - Request Transport and Turn Semantics Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Anthropic provider path - Request Transport and Turn Semantics Maturity Note
+
+## Summary
+
+The direct Anthropic Messages transport is deeply implemented: it builds
+Anthropic request payloads, handles API-key and OAuth/token headers, decodes
+SSE events, tracks usage, maps stop reasons, handles aborts, and supports
+Anthropic-compatible endpoints. Coverage is Stable because source and tests
+exercise the main payload and stream behavior. Quality is Beta because archive
+evidence shows recurring malformed/truncated stream and tool-call failures that
+have required repeated fixes.
+
+## Category Scope
+
+Included in this category:
+
+- API-key/OAuth transport: Covers API-key/OAuth transport across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
+- Messages payloads: Covers Messages payloads across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
+- Streaming decode: Covers Streaming decode across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
+- Usage and stop reasons: Covers Usage and stop reasons across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
+- Abort/error handling: Covers Abort/error handling across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
+- Tool-use blocks: Covers Tool-use blocks across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+- Tool-result replay: Covers Tool-result replay across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+- Partial JSON recovery: Covers Partial JSON recovery across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+- Native thinking: Covers Native thinking across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+- Signed/redacted thinking replay: Covers Signed/redacted thinking replay across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+
+## Features
+
+- API-key/OAuth transport: Covers API-key/OAuth transport across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
+- Messages payloads: Covers Messages payloads across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
+- Streaming decode: Covers Streaming decode across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
+- Usage and stop reasons: Covers Usage and stop reasons across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
+- Abort/error handling: Covers Abort/error handling across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
+- Tool-use blocks: Covers Tool-use blocks across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+- Tool-result replay: Covers Tool-result replay across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+- Partial JSON recovery: Covers Partial JSON recovery across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+- Native thinking: Covers Native thinking across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+- Signed/redacted thinking replay: Covers Signed/redacted thinking replay across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Stable (84%)`
+- Positive signals: Transport source covers client construction, headers, payloads, SSE event iteration, usage accounting, abort handling, and compatible endpoint behavior; unit tests cover direct Anthropic, OAuth, custom endpoints, malformed SSE, unsafe integer tool-use input, and abort behavior; a live transport test covers real HTTP stream abort.
+- Negative signals: Some live provider behaviors are env-gated and provider-specific stream drift cannot be fully proven from local tests.
+- Integration gaps: The audit found live abort proof and extensive unit coverage, but not repeated live proof for every Anthropic model/auth combination.
+
+## Quality Score
+
+- Score: `Beta (72%)`
+- Gitcrawl reports: #60593 reports recurring Anthropic streaming JSON parse errors where failover often failed; PR #62429 sanitized control characters in Anthropic streaming JSON; PR #61349 suppressed raw JSON parse errors from truncated tool-call streams; PR #86959 finalized abandoned managed-response streams to release sockets.
+- Discrawl reports: Discord archive results include session corruption from truncated streaming tool calls, raw parse errors sent to users, and Anthropic stream parse fixes.
+- Good qualities: The transport classifies malformed SSE as a stable transport error, preserves provider usage fields, avoids direct-Anthropic beta headers on custom hosts, cancels stalled reads on abort, and separates API-key from OAuth header behavior.
+- Bad qualities: Anthropic and Anthropic-compatible stream shapes have produced recurring operational incidents around malformed JSON, partial tool deltas, control characters, and aborted streams.
+- Excluded from quality: Unit, integration, e2e, live, and real runtime-flow test presence or absence; those are Coverage inputs only.
+
+## Completeness Score
+
+- Score: `Stable (84%)`
+- Surface instructions: evaluated against `references/completeness/anthropic-provider-path.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for API-key/OAuth transport, Messages payloads, Streaming decode, Usage and stop reasons, Abort/error handling, Tool-use blocks, Tool-result replay, Partial JSON recovery, Native thinking, Signed/redacted thinking replay.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Provider stream drift remains a recurring source of defects.
+- Some compatible providers need custom handling for endpoint classification,
+  cache markers, reasoning content, and stream sanitization.
+- Direct Anthropic model/auth combinations need recurring live proof beyond
+  local mock transport tests.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents API-key and Claude CLI routes, thinking defaults, prompt caching, fast mode, media, and 1M context behavior.
+- `/Users/kevinlin/code/openclaw/docs/reference/prompt-caching.md` documents Anthropic usage counters and cache behavior that the transport reports.
+- `/Users/kevinlin/code/openclaw/docs/gateway/troubleshooting.md` documents Anthropic long-context 429 errors and fallback guidance.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.ts` implements Anthropic SDK client construction, OAuth/API-key header handling, cache retention, SSE decoding, content/tool/thinking events, usage accounting, stop reason mapping, message conversion, image conversion, and tool conversion.
+- `/Users/kevinlin/code/openclaw/src/agents/anthropic-transport-stream.ts` implements guarded fetch transport, direct Anthropic model-id stripping, endpoint classification, beta headers, OAuth identity headers, usage/cost accounting, malformed stream classification, and abort-safe streaming.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/stream-wrappers.ts` composes beta-header, fast-mode, service-tier, and thinking-prefill wrappers around Anthropic streams.
+- `/Users/kevinlin/code/openclaw/src/agents/provider-transport-fetch.ts` provides guarded model fetch plumbing used by the transport.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/agents/anthropic-transport-stream.live.test.ts` starts a loopback HTTP SSE server and proves Anthropic transport aborts a real in-flight stream.
+- `/Users/kevinlin/code/openclaw/test/scripts/package-acceptance-workflow.test.ts` verifies live Anthropic gateway smoke profile wiring.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/src/agents/anthropic-transport-stream.test.ts` covers guarded fetch usage, model-id stripping, custom endpoint header behavior, malformed stream classification, unsafe integer preservation, OAuth identity/tool remapping, text/thinking blocks, aborts, and adaptive thinking request shape.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.test.ts` covers SDK client construction behavior and signed thinking replay payloads.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/stream-wrappers.test.ts` covers beta stripping, OAuth/default beta headers, service-tier injection/skips, and thinking prefill stripping.
+- `/Users/kevinlin/code/openclaw/src/agents/anthropic-payload-policy.test.ts` covers Anthropic cache and service-tier policy shaping.
+
+### Gitcrawl queries
+
+Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic tool call streaming JSON parse error"`
+
+Results:
+
+- #60593 `Recurring Anthropic streaming JSON parse errors (Sonnet 4.5 / Opus) - failover often fails to recover`.
+
+Query: `gitcrawl --json search prs -R openclaw/openclaw "anthropic streaming"`
+
+Results:
+
+- #62112 preserves Anthropic refusal handling.
+- #74432 honors `ANTHROPIC_BASE_URL`.
+- #86649 relays Claude CLI assistant partial messages as streaming deltas.
+- #75136 preserves Anthropic stream usage.
+- #62429 and #61349 appeared in archive results as stream/tool-call parse fixes.
+
+### Discrawl queries
+
+Query: `discrawl search --limit 10 "Anthropic tool call streaming parse JSON"`
+
+Results:
+
+- Returned April 2026 reports for session corruption from truncated Anthropic streaming tool calls, issue #69846, PR #62429 for control-character sanitization, PR #61349 for raw parse error suppression, and PR #44237 for recovering tool-call args from `partialJson`.
+
+Query: `discrawl search --limit 10 "Anthropic thinking signature cache control"`
+
+Results:
+
+- Returned no direct results for that exact query.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/media-understanding-and-document-inputs.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/media-understanding-and-document-inputs.md
@@ -0,0 +1,112 @@
+---
+title: "Anthropic provider path - Media Inputs Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Anthropic provider path - Media Inputs Maturity Note
+
+## Summary
+
+Anthropic media support is a smaller, clearly bounded part of the provider
+path. Docs state that the bundled Anthropic plugin registers image and PDF
+understanding, source registers image capability with native PDF document input
+metadata, and model metadata normalizes image-capable Claude rows. Coverage is
+Beta because source and docs are clear but live Anthropic media scenario proof
+is thinner than text/tool transport proof. Quality is Stable because the surface
+is small, directly mapped to provider capabilities, and the archive search did
+not find feature-specific user reports after freshness checks.
+
+## Category Scope
+
+Included in this category:
+
+- Image input: Covers Image input across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
+- PDF document input: Covers PDF document input across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
+- Media model fallback: Covers Media model fallback across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
+- Image tool results: Covers Image tool results across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
+
+## Features
+
+- Image input: Covers Image input across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
+- PDF document input: Covers PDF document input across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
+- Media model fallback: Covers Media model fallback across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
+- Image tool results: Covers Image tool results across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Beta (74%)`
+- Positive signals: Docs describe image and PDF understanding; manifest and provider source register media metadata; direct transport converts image blocks; tests cover image media metadata and image tool-result payload conversion.
+- Negative signals: The audit did not find a dedicated live Anthropic image/PDF scenario artifact or per-release media smoke result.
+- Integration gaps: Media support is covered more by provider registration and payload tests than by end-to-end media runs.
+
+## Quality Score
+
+- Score: `Stable (82%)`
+- Gitcrawl reports: The feature-specific GitHub issue query returned no direct Anthropic media reports after freshness checks.
+- Discrawl reports: The feature-specific Discord query returned no direct Anthropic media reports after freshness checks.
+- Good qualities: The capability is small, declarative, and aligned with modern Claude model metadata; source keeps media model defaults and native document input metadata in one plugin-owned surface.
+- Bad qualities: Docs say image and PDF understanding, while the provider capability list is `["image"]` plus separate `nativeDocumentInputs: ["pdf"]`; that split can require careful wording as docs evolve.
+- Excluded from quality: Unit, integration, e2e, live, and real runtime-flow test presence or absence; those are Coverage inputs only.
+
+## Completeness Score
+
+- Score: `Beta (74%)`
+- Surface instructions: evaluated against `references/completeness/anthropic-provider-path.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Image input, PDF document input, Media model fallback, Image tool results.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- No dedicated live Anthropic image/PDF proof was found in this audit.
+- PDF support is represented as native document input metadata rather than a
+  separate capability id.
+- Media generation is out of scope; this component is media understanding only.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents "Media understanding (image and PDF)", default model `claude-opus-4-7`, supported input images/PDF documents, and automatic routing through the Anthropic media understanding provider.
+- `/Users/kevinlin/code/openclaw/docs/gateway/config-agents.md` documents image resize behavior for Claude Opus 4.7 and other vision models.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/openclaw.plugin.json` declares media understanding provider metadata for Anthropic with capability `image`, default image model `claude-opus-4-7`, auto priority `20`, and native document input `pdf`.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/media-understanding-provider.ts` registers `anthropicMediaUnderstandingProvider` with image capabilities, default models, auto priority, native document inputs, and `describeImage`/`describeImages` helpers.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/register.runtime.ts` normalizes modern Claude models to include image input and model-specific media input sizing.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.ts` converts user image blocks and image tool-result blocks into Anthropic image content.
+- `/Users/kevinlin/code/openclaw/src/agents/anthropic-transport-stream.ts` performs transport-side image and tool-result conversion for Anthropic Messages payloads.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/test/scripts/package-acceptance-workflow.test.ts` includes live Anthropic profile wiring but does not by itself prove image/PDF scenarios.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/index.test.ts` covers stale text-only modern Claude vision row normalization and media metadata merge for `claude-opus-4-7`.
+- `/Users/kevinlin/code/openclaw/src/agents/anthropic-transport-stream.test.ts` covers image tool-result conversion and image payload shape.
+- `/Users/kevinlin/code/openclaw/src/agents/embedded-agent-runner/model.provider-runtime.test-support.ts` defines Anthropic vision model prefixes used in provider runtime test support.
+
+### Gitcrawl queries
+
+Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic media image PDF Claude"`
+
+Results:
+
+- Returned no direct results for Anthropic media/image/PDF reports.
+
+### Discrawl queries
+
+Query: `discrawl search --limit 10 "Anthropic media understanding image PDF Claude"`
+
+Results:
+
+- Returned no direct results for Anthropic media/image/PDF reports.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/model-catalog-aliases-and-runtime-policy.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/model-catalog-aliases-and-runtime-policy.md
@@ -0,0 +1,147 @@
+---
+title: "Anthropic provider path - Model and Runtime Selection Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Anthropic provider path - Model and Runtime Selection Maturity Note
+
+## Summary
+
+Anthropic model catalog coverage is Stable. The bundled manifest publishes
+direct Anthropic and Claude CLI model rows, source backfills current Claude 4.x
+variants, normalizes image and 1M context metadata, and maps selected Claude CLI
+auth to model-scoped runtime policy. Quality is Beta because users still hit
+model allowlist/catalog confusion, and current Claude model naming/metadata
+requires frequent forward-compatibility maintenance.
+
+## Category Scope
+
+Included in this category:
+
+- Bundled Claude catalog: Covers Bundled Claude catalog across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
+- Canonical anthropic refs: Covers Canonical anthropic refs across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
+- Claude CLI compatibility: Covers Claude CLI compatibility across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
+- Model picker availability: Covers Model picker availability across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
+- Capability metadata: Covers Capability metadata across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
+- Runtime selection: Covers Runtime selection across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+- Session continuity: Covers Session continuity across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+- MCP/tool bridge: Covers MCP/tool bridge across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+- Permission-mode mapping: Covers Permission-mode mapping across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+- Fallback prelude: Covers Fallback prelude across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+
+## Features
+
+- Bundled Claude catalog: Covers Bundled Claude catalog across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
+- Canonical anthropic refs: Covers Canonical anthropic refs across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
+- Claude CLI compatibility: Covers Claude CLI compatibility across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
+- Model picker availability: Covers Model picker availability across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
+- Capability metadata: Covers Capability metadata across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
+- Runtime selection: Covers Runtime selection across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+- Session continuity: Covers Session continuity across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+- MCP/tool bridge: Covers MCP/tool bridge across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+- Permission-mode mapping: Covers Permission-mode mapping across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+- Fallback prelude: Covers Fallback prelude across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Stable (84%)`
+- Positive signals: The bundled manifest includes direct Anthropic and Claude CLI models; docs describe canonical refs and runtime policy; source has forward-compatible model resolution, 1M context normalization, image-capability normalization, and alias migration; tests pin key model metadata behavior.
+- Negative signals: The catalog remains static/discovery-light for direct Anthropic and relies on source-maintained forward-compatibility for new Claude ids.
+- Integration gaps: Release proof for fresh upstream catalog drift is weaker than the source/unit-test proof.
+
+## Quality Score
+
+- Score: `Beta (76%)`
+- Gitcrawl reports: PR #75157 addresses catalog display names for agent models; PR #72404 defaults explicit-only vision-capable models to image-capable; PR #80394 adds per-agent model allowlists; PR #67731 pins Opus 4.7 variant resolution and thinking-default regression coverage.
+- Discrawl reports: Discord archive includes `claude-cli models not in catalog` and "only Sonnet available" support threads tied to allowlist/catalog configuration and cooldown confusion.
+- Good qualities: Canonical refs, alias handling, dynamic model fallback, 1M context metadata, image input normalization, and selected Claude CLI runtime backfill are centralized in the bundled provider.
+- Bad qualities: Operators can still confuse auth/profile state, configured model allowlists, provider catalog rows, and runtime selection when a desired Claude model does not appear or is unavailable.
+- Excluded from quality: Unit, integration, e2e, live, and real runtime-flow test presence or absence; those are Coverage inputs only.
+
+## Completeness Score
+
+- Score: `Stable (84%)`
+- Surface instructions: evaluated against `references/completeness/anthropic-provider-path.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Bundled Claude catalog, Canonical anthropic refs, Claude CLI compatibility, Model picker availability, Capability metadata, Runtime selection, Session continuity, MCP/tool bridge, Permission-mode mapping, Fallback prelude.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- New Claude model ids and dated variants require ongoing forward-compatibility
+  upkeep.
+- Docs and config guidance have moved from `claude-cli/*` refs toward canonical
+  `anthropic/*` refs plus runtime policy, while legacy configs still exist.
+- Model availability UX can still make catalog, allowlist, cooldown, and
+  credential problems look similar.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents `anthropic/*` refs, Claude CLI runtime override, Claude 4.6 thinking defaults, prompt caching, media support, and 1M context behavior.
+- `/Users/kevinlin/code/openclaw/docs/gateway/config-agents.md` documents runtime-policy precedence and recommends canonical `anthropic/claude-opus-4-7` plus `agentRuntime.id: "claude-cli"`.
+- `/Users/kevinlin/code/openclaw/docs/concepts/models.md` documents provider/model ref selection and fallback behavior used by Anthropic rows.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/openclaw.plugin.json` publishes static model catalog rows for `claude-cli` and `anthropic`, including Opus 4.7, Sonnet 4.6, Opus 4.6, reasoning flags, image input metadata, context windows, max tokens, provider endpoints, alias normalization, and provider request family.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/register.runtime.ts` resolves modern Claude model ids, applies GA 1M context windows, normalizes image media input, publishes Claude CLI catalog entries, and exposes thinking profiles.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/claude-model-refs.ts` canonicalizes Claude family aliases, upgrades old Claude 3/4 refs, and maps legacy `claude-cli/*` refs back to canonical Anthropic refs.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/config-defaults.ts` collects Claude CLI runtime refs and backfills `agentRuntime.id: "claude-cli"` when Claude CLI auth or model selection is active.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/commands/models.list.e2e.test.ts` covers provider catalog rows and model list behavior.
+- `/Users/kevinlin/code/openclaw/test/scripts/package-acceptance-workflow.test.ts` verifies live Anthropic model profile inputs such as `OPENCLAW_LIVE_GATEWAY_MODELS=anthropic/claude-opus-4-7` and Sonnet/Haiku model lists.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/index.test.ts` covers model API defaulting, Claude CLI allowlist backfill, shorthand refs, future Anthropic refs, Opus 4.7 resolution from templates, image media metadata, 1M context normalization, and synthetic auth.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/provider-policy-api.test.ts` covers public provider policy normalization and thinking profile exposure.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/cli-migration.test.ts` covers migration behavior from Claude CLI auth.
+- `/Users/kevinlin/code/openclaw/src/agents/model-catalog-lookup.ts` and adjacent tests cover model catalog lookup used by agent runtime selection.
+
+### Gitcrawl queries
+
+Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic model catalog claude opus sonnet haiku models list"`
+
+Results:
+
+- Returned no direct results for that exact issue query.
+
+Query: `gitcrawl --json search prs -R openclaw/openclaw "Anthropic model catalog Claude Opus 4.7 Sonnet 4.6"`
+
+Results:
+
+- #75157 `fix(ui): use catalog display names for agent models`.
+- #72404 `fix(models): default input=[text,image] for vision-capable explicit-only models`.
+- #80394 `feat(agents): per-agent model allowlist (with fallback to global)`.
+
+Query: `gitcrawl --json search prs -R openclaw/openclaw "Anthropic thinking"`
+
+Results:
+
+- #67731 `test(anthropic): pin Opus 4.7 variant resolution + thinking-default regression coverage`.
+- #70584 `fix: clamp effort=low/minimal to medium for claude-opus-4.7`.
+
+### Discrawl queries
+
+Query: `discrawl search --limit 10 "Anthropic model catalog claude opus sonnet OpenClaw"`
+
+Results:
+
+- Returned support threads for `claude-cli models not in catalog`, configuring Opus/Sonnet model allowlists, and users mistaking rate-limit cooldown for catalog changes.
+
+Query: `discrawl search --limit 10 "Claude CLI OpenClaw auth login claude-cli"`
+
+Results:
+
+- Returned implemented Claude CLI delegation notes and user support threads where model/runtime policy differed between session paths.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/prompt-caching-context-windows-and-request-knobs.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/prompt-caching-context-windows-and-request-knobs.md
@@ -0,0 +1,141 @@
+---
+title: "Anthropic provider path - Prompt Cache and Context Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Anthropic provider path - Prompt Cache and Context Maturity Note
+
+## Summary
+
+Anthropic prompt caching and request shaping are first-class: docs explain
+`cacheRetention`, 1M context windows, fast mode, and long-context troubleshooting;
+source injects cache markers, strips retired betas, applies service-tier knobs,
+and normalizes GA 1M context metadata. Coverage is Stable because docs, source,
+and tests cover the main knobs. Quality is Beta because Discord/GitHub archives
+show users still need help with cache TTL expectations, custom-provider long
+TTL limits, long-context 429s, and setup-token/API-key eligibility.
+
+## Category Scope
+
+Included in this category:
+
+- Cache retention: Covers Cache retention across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
+- System-prompt cache boundary: Covers System-prompt cache boundary across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
+- 1M context: Covers 1M context across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
+- Fast mode/service tier: Covers Fast mode/service tier across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
+- Cache diagnostics: Covers Cache diagnostics across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
+
+## Features
+
+- Cache retention: Covers Cache retention across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
+- System-prompt cache boundary: Covers System-prompt cache boundary across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
+- 1M context: Covers 1M context across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
+- Fast mode/service tier: Covers Fast mode/service tier across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
+- Cache diagnostics: Covers Cache diagnostics across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Stable (82%)`
+- Positive signals: Docs cover prompt caching, cacheRetention merge order, direct Anthropic 5-minute and 1-hour TTL behavior, 1M context, fast mode, and 429 remediation; source and tests cover cache markers, context window overrides, beta stripping, service-tier behavior, and cache defaults.
+- Negative signals: Live proof for cache-hit behavior and 1M context eligibility depends on upstream account state and is not fully covered by deterministic local tests.
+- Integration gaps: The audit found docs/source/test proof but not a repeated live Anthropic cache-hit and 1M-context release smoke artifact.
+
+## Quality Score
+
+- Score: `Beta (76%)`
+- Gitcrawl reports: #37966 reports `cacheRetention` ignored for LiteLLM-proxied Anthropic models; #62475 requests prompt-cache keep-warm pings; #63030 reports system prompt assembly drift causing Anthropic cache invalidation; PR #79370 fixes explicit cacheRetention for OpenRouter to Anthropic models.
+- Discrawl reports: Discord archive results include custom provider 1-hour cache questions, Haiku cache-hit confusion, system-prompt cache-boundary guidance, and 1M context 429 extra-usage troubleshooting.
+- Good qualities: Cache policy is centralized, long TTL is endpoint-gated, retired 1M beta headers are stripped, API-key auth gets conservative defaults, and docs distinguish direct Anthropic from custom/proxy behavior.
+- Bad qualities: Users still need to reason about TTLs, heartbeat, context pruning, long-context account eligibility, API-key versus setup-token behavior, and custom endpoint limitations.
+- Excluded from quality: Unit, integration, e2e, live, and real runtime-flow test presence or absence; those are Coverage inputs only.
+
+## Completeness Score
+
+- Score: `Stable (82%)`
+- Surface instructions: evaluated against `references/completeness/anthropic-provider-path.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Cache retention, System-prompt cache boundary, 1M context, Fast mode/service tier, Cache diagnostics.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- 1M context can be correctly sized locally while still failing upstream for
+  account eligibility.
+- Long cache TTL does not apply uniformly to arbitrary Anthropic-compatible
+  custom hosts, which is easy to misconfigure.
+- Cache-hit proof is mostly usage-derived and needs repeated live scenario
+  captures for release readiness.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents Claude 4.6 thinking defaults, `cacheRetention`, fast mode, media, 1M context, and troubleshooting for invalid/no credentials.
+- `/Users/kevinlin/code/openclaw/docs/reference/prompt-caching.md` documents Anthropic direct API caching, `cacheRetention` merge order, cache-ttl pruning, heartbeat keep-warm, direct Anthropic 1-hour TTL, OpenRouter Anthropic cache handling, and system-prompt cache boundaries.
+- `/Users/kevinlin/code/openclaw/docs/gateway/troubleshooting.md` documents `HTTP 429: rate_limit_error: Extra usage is required for long context requests` and fixes.
+- `/Users/kevinlin/code/openclaw/docs/gateway/heartbeat.md` documents heartbeat intervals, including a longer interval for Anthropic OAuth/token auth.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/config-defaults.ts` seeds `contextPruning.mode: "cache-ttl"`, heartbeat intervals, API-key `cacheRetention: "short"`, and Claude CLI runtime defaults.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/stream-wrappers.ts` strips retired context-1m betas, adds Anthropic beta headers, injects service-tier/fast-mode params for API keys, and strips unsafe thinking prefill.
+- `/Users/kevinlin/code/openclaw/src/agents/anthropic-payload-policy.ts` applies Anthropic cache-control markers to system and trailing user turns, respects system-prompt cache boundaries, gates long TTL by endpoint, and injects service tier.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.ts` resolves cache retention, applies cache control to system/tool/message payloads, and records cache usage counters.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/register.runtime.ts` applies GA 1M context metadata to modern Claude 4.x models.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/test/scripts/package-acceptance-workflow.test.ts` includes live Anthropic model profiles that exercise modern Claude model refs.
+- `/Users/kevinlin/code/openclaw/src/agents/anthropic.setup-token.live.test.ts` env-gates live setup-token completion, indirectly proving request knobs can coexist with resolved token auth.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/stream-wrappers.test.ts` covers beta stripping, OAuth beta preservation, service-tier injection/skips, fast mode behavior, and thinking prefill stripping.
+- `/Users/kevinlin/code/openclaw/src/agents/anthropic-payload-policy.test.ts` covers cache marker application, endpoint gating, system prompt boundary handling, and service-tier policy.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/stream-wrappers/anthropic-cache-control-payload.test.ts` covers cache-control payload marker behavior.
+- `/Users/kevinlin/code/openclaw/src/agents/embedded-agent-runner/extra-params.cache-retention-default.test.ts` covers Anthropic-family cache semantics and explicit retention.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/index.test.ts` covers 1M context normalization and API-key cacheRetention defaults.
+
+### Gitcrawl queries
+
+Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic 429 long context extra usage required fallback"`
+
+Results:
+
+- Returned no direct results for that exact GitHub issue query.
+
+Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic prompt caching cacheRetention"`
+
+Results:
+
+- #37966 `[Bug]: cacheRetention ignored for LiteLLM-proxied Anthropic models`.
+- #62475 requests prompt cache keep-warm pings.
+- #63030 reports system prompt assembly differences causing continuous Anthropic cache invalidation.
+
+Query: `gitcrawl --json search prs -R openclaw/openclaw "anthropic cacheRetention"`
+
+Results:
+
+- #79370 `fix(cache): honour explicit cacheRetention for OpenRouter to Anthropic models`.
+- #76741 `fix(kimi): strip anthropic cache markers`.
+
+### Discrawl queries
+
+Query: `discrawl search --limit 10 "Claude 4.6 1M context Anthropic 429"`
+
+Results:
+
+- Returned March 2026 support threads explaining `HTTP 429: rate_limit_error: Extra usage is required for long context requests`, extra-usage requirements, API-key eligibility, and config changes to remove `context1m`.
+
+Query: `discrawl search --limit 10 "Anthropic prompt caching cacheRetention OpenClaw"`
+
+Results:
+
+- Returned prompt caching support threads about Haiku cache-hit rates, configurable `cacheRetention`, static/dynamic system-prompt split, custom provider 1-hour cache limitations, and cache write spikes.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/report.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/report.md
@@ -0,0 +1,232 @@
+---
+title: "Anthropic provider path Maturity Report"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Anthropic provider path Maturity Report
+
+## Top-level scores
+
+These rollups are simple arithmetic means over the category-note numeric
+scores in
+`scores.yaml`. Percentages are rounded to the nearest whole number.
+
+- Coverage: `Stable (80%)`
+- Quality: `Beta (74%)`
+- Completeness: `Stable (80%)`
+- LTS Features: `0/5`
+
+## Summary
+
+This report promotes the archived `anthropic-provider-path` maturity evidence from `/Users/kevinlin/tmp/maturity/anthropic-provider-path` into the current process-version-3 inventory contract.
+
+The category Coverage and Quality scores come from the archived evidence-backed score rows. Completeness is initialized from the same archived evidence breadth and known-gap record, then joined with the surface-specific completeness rubric referenced by taxonomy.
+
+## Matrix
+
+| Category                                                                                     | LTS | Coverage       | Quality        | Completeness   | Features to evaluate                                                                                                                                                                                                          |
+| -------------------------------------------------------------------------------------------- | --- | -------------- | -------------- | -------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| [Provider Auth and Recovery](auth-onboarding-and-credential-profile-health.md)               | ❌  | `Beta (78%)`   | `Beta (70%)`   | `Beta (78%)`   | API-key onboarding, Claude CLI credential reuse, Setup-token auth, Auth profile health, Model status, Usage windows, Cooldown/profile reporting, Long-context recovery, Fallback guidance                                     |
+| [Model and Runtime Selection](model-catalog-aliases-and-runtime-policy.md)                   | ❌  | `Stable (82%)` | `Alpha (68%)`  | `Stable (82%)` | Bundled Claude catalog, Canonical anthropic refs, Claude CLI compatibility, Model picker availability, Capability metadata, Runtime selection, Session continuity, MCP/tool bridge, Permission-mode mapping, Fallback prelude |
+| [Request Transport and Turn Semantics](direct-anthropic-messages-transport-and-streaming.md) | ❌  | `Stable (82%)` | `Beta (72%)`   | `Stable (82%)` | API-key/OAuth transport, Messages payloads, Streaming decode, Usage and stop reasons, Abort/error handling, Tool-use blocks, Tool-result replay, Partial JSON recovery, Native thinking, Signed/redacted thinking replay      |
+| [Prompt Cache and Context](prompt-caching-context-windows-and-request-knobs.md)              | ❌  | `Stable (82%)` | `Beta (76%)`   | `Stable (82%)` | Cache retention, System-prompt cache boundary, 1M context, Fast mode/service tier, Cache diagnostics                                                                                                                          |
+| [Media Inputs](media-understanding-and-document-inputs.md)                                   | ❌  | `Beta (74%)`   | `Stable (82%)` | `Beta (74%)`   | Image input, PDF document input, Media model fallback, Image tool results                                                                                                                                                     |
+
+## Scoring rubric
+
+- Coverage:
+  maturity-label rating for integration, e2e, live, or server/runtime flow
+  evidence across the category. Unit tests can provide supporting context but never make a
+  feature covered by themselves.
+- Quality:
+  maturity-label rating for implementation and operational robustness. Unit,
+  integration, e2e, live, and real runtime-flow test coverage are Coverage
+  inputs only; they do not raise or lower Quality.
+- Completeness:
+  maturity-label rating for how fully the category delivers the intended
+  surface-specific capability set. Use the taxonomy-linked completeness
+  instructions for this surface.
+- LTS:
+  calculated as `quality > 80 and coverage > 90`, or when the matching
+  taxonomy category sets `human_lts_override`.
+- Shared score bands:
+  `Lovable = 95-100`, `Stable = 80-95`, `Beta = 70-80`,
+  `Alpha = 50-70`, and `Experimental = 0-50`. At shared boundaries, choose the
+  higher maturity label.
+- Major quality/completeness gaps:
+  evidence text only, tracked in the detailed feature inventory rather than as a
+  separate scored dimension.
+
+## Detailed feature inventory
+
+### 1. Provider Auth and Recovery
+
+Search anchors: API-key onboarding, Claude CLI credential reuse, Setup-token auth, Auth profile health, Model status, Usage windows, Cooldown/profile reporting, Long-context recovery, Fallback guidance.
+
+Category note: [Provider Auth and Recovery](auth-onboarding-and-credential-profile-health.md)
+
+Score decisions:
+
+- Coverage: `Beta (78%)`
+- Quality: `Beta (70%)`
+- Completeness: `Beta (78%)`
+- LTS: ❌
+
+Features:
+
+- API-key onboarding: Covers API-key onboarding across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
+- Claude CLI credential reuse: Covers Claude CLI credential reuse across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
+- Setup-token auth: Covers Setup-token auth across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
+- Auth profile health: Covers Auth profile health across Anthropic credential surface before a model request is made: onboarding choices, API-key storage, Claude CLI credential migration, setup-token validation, and related credential setup and health behavior.
+- Model status: Covers Model status across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+- Usage windows: Covers Usage windows across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+- Cooldown/profile reporting: Covers Cooldown/profile reporting across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+- Long-context recovery: Covers Long-context recovery across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+- Fallback guidance: Covers Fallback guidance across operator diagnostics and recovery for Anthropic provider failures: status output, usage windows, auth profile source reporting, cooldown and disabled profile reporting, and related diagnostics and recovery behavior.
+
+Primary docs:
+
+- `docs/providers/anthropic.md`
+- `docs/gateway/doctor.md`
+- `docs/gateway/configuration-examples.md`
+- `docs/gateway/troubleshooting.md`
+- `docs/reference/prompt-caching.md`
+
+### 2. Model and Runtime Selection
+
+Search anchors: Bundled Claude catalog, Canonical anthropic refs, Claude CLI compatibility, Model picker availability, Capability metadata, Runtime selection, Session continuity, MCP/tool bridge, Permission-mode mapping, Fallback prelude.
+
+Category note: [Model and Runtime Selection](model-catalog-aliases-and-runtime-policy.md)
+
+Score decisions:
+
+- Coverage: `Stable (82%)`
+- Quality: `Alpha (68%)`
+- Completeness: `Stable (82%)`
+- LTS: ❌
+
+Features:
+
+- Bundled Claude catalog: Covers Bundled Claude catalog across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
+- Canonical anthropic refs: Covers Canonical anthropic refs across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
+- Claude CLI compatibility: Covers Claude CLI compatibility across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
+- Model picker availability: Covers Model picker availability across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
+- Capability metadata: Covers Capability metadata across Anthropic model catalog and policy layer: bundled model rows, model aliases, current and future Claude model id normalization, runtime-provider selection, and related model catalog and policy behavior.
+- Runtime selection: Covers Runtime selection across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+- Session continuity: Covers Session continuity across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+- MCP/tool bridge: Covers MCP/tool bridge across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+- Permission-mode mapping: Covers Permission-mode mapping across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+- Fallback prelude: Covers Fallback prelude across OpenClaw's host-local Claude CLI path after auth is available: the `claude-cli` backend, its command/args/env defaults, MCP tool bridge, native tool mode, and related claude cli backend behavior.
+
+Primary docs:
+
+- `docs/providers/anthropic.md`
+- `docs/gateway/config-agents.md`
+- `docs/concepts/models.md`
+- `docs/gateway/cli-backends.md`
+
+### 3. Request Transport and Turn Semantics
+
+Search anchors: API-key/OAuth transport, Messages payloads, Streaming decode, Usage and stop reasons, Abort/error handling, Tool-use blocks, Tool-result replay, Partial JSON recovery, Native thinking, Signed/redacted thinking replay.
+
+Category note: [Request Transport and Turn Semantics](direct-anthropic-messages-transport-and-streaming.md)
+
+Score decisions:
+
+- Coverage: `Stable (82%)`
+- Quality: `Beta (72%)`
+- Completeness: `Stable (82%)`
+- LTS: ❌
+
+Features:
+
+- API-key/OAuth transport: Covers API-key/OAuth transport across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
+- Messages payloads: Covers Messages payloads across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
+- Streaming decode: Covers Streaming decode across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
+- Usage and stop reasons: Covers Usage and stop reasons across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
+- Abort/error handling: Covers Abort/error handling across direct Anthropic `api: "anthropic-messages"` request and stream behavior: API-key and OAuth transport setup, Anthropic beta headers, model-id normalization for direct hosts, payload construction, and related direct anthropic api behavior.
+- Tool-use blocks: Covers Tool-use blocks across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+- Tool-result replay: Covers Tool-result replay across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+- Partial JSON recovery: Covers Partial JSON recovery across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+- Native thinking: Covers Native thinking across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+- Signed/redacted thinking replay: Covers Signed/redacted thinking replay across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+
+Primary docs:
+
+- `docs/providers/anthropic.md`
+- `docs/reference/prompt-caching.md`
+- `docs/gateway/troubleshooting.md`
+- `docs/gateway/cli-backends.md`
+- `docs/concepts/model-providers.md`
+
+### 4. Prompt Cache and Context
+
+Search anchors: Cache retention, System-prompt cache boundary, 1M context, Fast mode/service tier, Cache diagnostics.
+
+Category note: [Prompt Cache and Context](prompt-caching-context-windows-and-request-knobs.md)
+
+Score decisions:
+
+- Coverage: `Stable (82%)`
+- Quality: `Beta (76%)`
+- Completeness: `Stable (82%)`
+- LTS: ❌
+
+Features:
+
+- Cache retention: Covers Cache retention across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
+- System-prompt cache boundary: Covers System-prompt cache boundary across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
+- 1M context: Covers 1M context across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
+- Fast mode/service tier: Covers Fast mode/service tier across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
+- Cache diagnostics: Covers Cache diagnostics across Anthropic-specific request knobs outside the core content stream: prompt cache retention, cache-control markers, system-prompt cache boundary, 1M context sizing, and related prompt cache and context behavior.
+
+Primary docs:
+
+- `docs/providers/anthropic.md`
+- `docs/reference/prompt-caching.md`
+- `docs/gateway/troubleshooting.md`
+- `docs/gateway/heartbeat.md`
+
+### 5. Media Inputs
+
+Search anchors: Image input, PDF document input, Media model fallback, Image tool results.
+
+Category note: [Media Inputs](media-understanding-and-document-inputs.md)
+
+Score decisions:
+
+- Coverage: `Beta (74%)`
+- Quality: `Stable (82%)`
+- Completeness: `Beta (74%)`
+- LTS: ❌
+
+Features:
+
+- Image input: Covers Image input across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
+- PDF document input: Covers PDF document input across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
+- Media model fallback: Covers Media model fallback across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
+- Image tool results: Covers Image tool results across Anthropic media understanding as part of the provider path: image input support, PDF native document input metadata, default media model selection, auto-priority, and related media inputs behavior.
+
+Primary docs:
+
+- `docs/providers/anthropic.md`
+- `docs/gateway/config-agents.md`
+
+## Recommended scorecard interpretation
+
+Use this migrated score as the current inventory baseline. Refresh individual categories with live category-agent research before treating a high score as an LTS promotion gate.
+
+## Out of scope for this surface
+
+- Redefining taxonomy category boundaries; taxonomy remains the source of truth for category identity, features, docs, and search anchors.
+
+## Audit provenance
+
+- Score source:
+  `docs/kevinslin/maturity-scorecard/inventory/anthropic-provider-path/scores.yaml`.
+- Taxonomy metadata source:
+  `.agents/skills/claw-score/taxonomy.yaml`.
+- Archived evidence source:
+  `/Users/kevinlin/tmp/maturity/anthropic-provider-path`.
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/scores.yaml
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/scores.yaml
@@ -0,0 +1,28 @@
+version: 1
+process_version: 3
+data:
+  - name: Provider Auth and Recovery
+    category_note: auth-onboarding-and-credential-profile-health.md
+    coverage: 78
+    quality: 70
+    completeness: 78
+  - name: Model and Runtime Selection
+    category_note: model-catalog-aliases-and-runtime-policy.md
+    coverage: 82
+    quality: 68
+    completeness: 82
+  - name: Request Transport and Turn Semantics
+    category_note: direct-anthropic-messages-transport-and-streaming.md
+    coverage: 82
+    quality: 72
+    completeness: 82
+  - name: Prompt Cache and Context
+    category_note: prompt-caching-context-windows-and-request-knobs.md
+    coverage: 82
+    quality: 76
+    completeness: 82
+  - name: Media Inputs
+    category_note: media-understanding-and-document-inputs.md
+    coverage: 74
+    quality: 82
+    completeness: 74
--- a/docs/maturity-scorecard/inventory/anthropic-provider-path/tool-calls-replay-and-native-thinking.md
+++ b/docs/maturity-scorecard/inventory/anthropic-provider-path/tool-calls-replay-and-native-thinking.md
@@ -0,0 +1,136 @@
+---
+title: "Anthropic provider path - Tools and Thinking Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Anthropic provider path - Tools and Thinking Maturity Note
+
+## Summary
+
+Anthropic tool-call and native-thinking handling is broad: OpenClaw converts
+tools to Anthropic schemas, maps Claude Code tool names under OAuth, preserves
+signed/redacted thinking, sanitizes malformed replay, handles tool result media,
+and maps thinking levels to provider effort. Coverage is Stable because source
+and tests cover the key transformations and live replay. Quality is Beta
+because archived incidents show tool-call streaming and thinking replay have
+been frequent regression points.
+
+## Category Scope
+
+This category covers Anthropic-specific turn semantics inside agent runs:
+tool declarations, tool-use block conversion, tool-result conversion,
+tool-call id normalization, partial JSON handling, Claude Code tool-name
+mapping, native thinking blocks, redacted thinking, signed thinking replay,
+thinking effort/defaults, and turn validation for replay.
+
+## Features
+
+- Tool-use blocks: Covers Tool-use blocks across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+- Tool-result replay: Covers Tool-result replay across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+- Partial JSON recovery: Covers Partial JSON recovery across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+- Native thinking: Covers Native thinking across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+- Signed/redacted thinking replay: Covers Signed/redacted thinking replay across Anthropic-specific turn semantics inside agent runs: tool declarations, tool-use block conversion, tool-result conversion, tool-call id normalization, and related tools and thinking behavior.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Stable (82%)`
+- Positive signals: Source handles native Anthropic thinking, signed/redacted thinking replay, tool schema conversion, tool-result grouping, image tool results, Claude Code tool aliases, partial-json scratch cleanup, and replay validation; tests cover signed thinking, tool-use replay, malformed tool args, and live tool replay.
+- Negative signals: Several cases are covered by focused tests and env-gated live tests rather than always-on end-to-end Anthropic tool scenarios.
+- Integration gaps: Full live tool-call scenario proof is limited by `ANTHROPIC_LIVE_TEST` and provider credentials.
+
+## Quality Score
+
+- Score: `Beta (74%)`
+- Gitcrawl reports: #60593 tracks recurring Anthropic streaming JSON parse errors; PR #68565 preserves signed/redacted thinking blocks; PR #70372 suppresses thinking narration leakage; PR #87346 merges consecutive assistant turns in validation; PR #61151 drops `partialJson` streaming artifacts from session history repair.
+- Discrawl reports: Discord archive results include session corruption from truncated streaming tool calls, raw parse errors from Anthropic tool-call deltas, and downstream fixes for partial JSON recovery.
+- Good qualities: The implementation preserves provider-signed thinking, strips synthetic reasoning from native Anthropic replay, coalesces consecutive tool results, coerces malformed tool-call args, and avoids persisting streaming scratch buffers.
+- Bad qualities: Tool-call streaming is one of the highest-churn Anthropic edges because partial JSON, thinking signatures, provider-compatible endpoints, and replay validation interact.
+- Excluded from quality: Unit, integration, e2e, live, and real runtime-flow test presence or absence; those are Coverage inputs only.
+
+## Completeness Score
+
+- Score: `Stable (82%)`
+- Surface instructions: evaluated against `references/completeness/anthropic-provider-path.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Tool-use blocks, Tool-result replay, Partial JSON recovery, Native thinking, Signed/redacted thinking replay.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Live proof is strongest for synthetic replay acceptance, not every real tool
+  invocation type.
+- Tool-call and thinking behavior differs across direct Anthropic and
+  Anthropic-compatible providers, increasing maintenance pressure.
+- Historical incidents show partial JSON and thinking display can corrupt
+  session history or leak confusing text when not carefully normalized.
+
+## Evidence
+
+### Docs
+
+- `/Users/kevinlin/code/openclaw/docs/providers/anthropic.md` documents Claude 4.6 thinking defaults, `/think` overrides, cache behavior, and media/document handling.
+- `/Users/kevinlin/code/openclaw/docs/gateway/cli-backends.md` documents Claude CLI permission mode, `/think` effort mapping, MCP bridge tools, and session behavior.
+- `/Users/kevinlin/code/openclaw/docs/concepts/model-providers.md` covers model/provider behavior that feeds tool and thinking transport choices.
+
+### Source
+
+- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.ts` implements `convertTools`, `convertMessages`, `normalizeToolCallId`, signed/redacted thinking replay, `input_json_delta` accumulation, tool-use blocks, tool-result grouping, and thinking effort request construction.
+- `/Users/kevinlin/code/openclaw/src/agents/anthropic-transport-stream.ts` implements transport-side tool argument coercion, unsafe integer preservation, reasoning content handling for compatible streams, and native Anthropic thinking replay behavior.
+- `/Users/kevinlin/code/openclaw/src/agents/embedded-agent-runner/thinking.ts` wraps Anthropic streams with thinking recovery and blocks duplicate streaming retries after output begins.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/replay-policy.ts` defines Anthropic replay policy including strict tool ids, signature preservation, turn validation, and synthetic tool result allowance.
+
+### Integration tests
+
+- `/Users/kevinlin/code/openclaw/src/agents/embedded-agent-runner.anthropic-tool-replay.live.test.ts` env-gates live Anthropic replay acceptance for regular text, omitted reasoning placeholder, and tool-call replay history.
+- `/Users/kevinlin/code/openclaw/src/agents/anthropic-transport-stream.live.test.ts` covers live stream abort behavior adjacent to tool/thinking streaming.
+
+### Unit tests
+
+- `/Users/kevinlin/code/openclaw/src/agents/anthropic-transport-stream.test.ts` covers unsafe integer tool-use deltas, OAuth tool-name remapping, signed thinking ingest, multiple signature deltas, reasoning_content compatible replay, malformed tool schemas, malformed tool-call args, empty tool results, image tool results, and thinking effort mapping.
+- `/Users/kevinlin/code/openclaw/src/llm/providers/anthropic.test.ts` covers signed thinking replay payload preservation.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/index.test.ts` covers replay policy and native reasoning output mode.
+- `/Users/kevinlin/code/openclaw/extensions/anthropic/cli-shared.test.ts` covers Claude CLI thinking effort mapping.
+
+### Gitcrawl queries
+
+Query: `gitcrawl --json search issues -R openclaw/openclaw "anthropic thinking signature replay cache_control"`
+
+Results:
+
+- Returned no direct results for that exact issue query.
+
+Query: `gitcrawl --json search prs -R openclaw/openclaw "Anthropic thinking"`
+
+Results:
+
+- #70372 suppresses thinking narration leaking into channel messages for Anthropic/Bedrock.
+- #68565 preserves signed/redacted thinking blocks.
+- #87346 merges consecutive assistant turns in turn validation.
+- #85381 emits thinking_delta events and handles redacted single-block shape.
+
+Query: `gitcrawl --json search issues -R openclaw/openclaw "Anthropic tool call streaming JSON parse error"`
+
+Results:
+
+- #60593 reports recurring Anthropic streaming JSON parse errors.
+
+### Discrawl queries
+
+Query: `discrawl search --limit 10 "Anthropic tool call streaming parse JSON"`
+
+Results:
+
+- Returned archived user reports and PR notifications for truncated streaming tool-call corruption, raw parse errors, control-character sanitization, and tool-call argument recovery.
+
+Query: `discrawl search --limit 10 "Anthropic thinking signature cache control"`
+
+Results:
+
+- Returned no direct results for that exact query.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/background-task-ledger.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/background-task-ledger.md
@@ -0,0 +1,134 @@
+---
+title: "Automation: cron, hooks, tasks, polling - Background Tasks and Flows Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Automation: cron, hooks, tasks, polling - Background Tasks and Flows Maturity Note
+
+## Summary
+
+The background task ledger is well specified and implemented: it tracks detached ACP, subagent, cron, CLI, and media jobs; persists SQLite state; reconciles runtime backing; exposes CLI and Gateway methods; handles terminal notifications; and includes audit/maintenance. Quality is limited by restart/lost-task edge cases and operator confusion about the difference between task records and durable execution.
+
+## Category Scope
+
+Included in this category:
+
+- Task list/show/cancel: Covers Task list/show/cancel across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
+- Task notifications: Covers Task notifications across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
+- Task audit and maintenance: Covers Task audit and maintenance across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
+- Chat task board: Covers Chat task board across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
+- Task pressure status: Covers Task pressure status across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
+- Managed flows: Covers Managed flows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
+- Mirrored flows: Covers Mirrored flows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
+- openclaw tasks flow: Covers openclaw tasks flow across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
+- Flow audit and maintenance: Covers Flow audit and maintenance across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
+- Plugin managedFlows: Covers Plugin managedFlows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
+
+## Features
+
+- Task list/show/cancel: Covers Task list/show/cancel across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
+- Task notifications: Covers Task notifications across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
+- Task audit and maintenance: Covers Task audit and maintenance across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
+- Chat task board: Covers Chat task board across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
+- Task pressure status: Covers Task pressure status across task creation, status transitions, runtime types, owner/session access, and related background task ledger behavior.
+- Managed flows: Covers Managed flows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
+- Mirrored flows: Covers Mirrored flows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
+- openclaw tasks flow: Covers openclaw tasks flow across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
+- Flow audit and maintenance: Covers Flow audit and maintenance across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
+- Plugin managedFlows: Covers Plugin managedFlows across managed and mirrored flow modes, flow registry persistence, revision tracking, owner-scoped access, and related task flow behavior.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Stable (84%)`
+- Positive signals: The registry, store, maintenance, audit, reconcile, owner access, delivery, executor policy, status formatting, Gateway methods, and CLI commands all have focused tests.
+- Negative signals: Coverage is weaker for full restart scenarios where the Gateway is killed during active tasks and then reconciles mixed ACP/subagent/cron/CLI backing state in a real process.
+- Integration gaps: A restart-kill harness should create one task per runtime, force Gateway shutdown before drain, restart, and prove audit/maintenance outcomes, delivery notifications, and cleanup retention.
+
+## Quality Score
+
+- Score: `Beta (77%)`
+- Gitcrawl reports: PR #59719 tracks background exec liveness with CLI tasks; issue #42767 was closed after stale active tasks gained lost-state reconciliation; issue #66909 asked whether tasks resume after Gateway restart; issue #42246 requests batching/aggregation of outbound notifications for background tasks.
+- Discrawl reports: Maintainer reports mention PR #78575 for stale task audit entries after forced/timed-out restarts, and user discussions advise treating OpenClaw tasks/transcripts as an operator audit trail rather than the sole durable work queue.
+- Good qualities: The docs clearly state tasks are records, not schedulers; the registry persists to SQLite; reconciliation is runtime-aware; terminal rows retain for seven days; and task status output sanitizes internal runtime text.
+- Bad qualities: The lived record shows task `lost` behavior, restart semantics, and notification volume remain hard for users to reason about.
+- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
+
+## Completeness Score
+
+- Score: `Stable (84%)`
+- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Task list/show/cancel, Task notifications, Task audit and maintenance, Chat task board, Task pressure status, Managed flows, Mirrored flows, openclaw tasks flow, Flow audit and maintenance, Plugin managedFlows.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Restart behavior should be summarized in CLI status and docs with one explicit table: what can resume, what becomes lost, and what TaskFlow adds.
+- Notification aggregation or batching remains a user-facing need for busy background task deployments.
+- Maintenance should remain strongly observable because `lost` is a normal recovery signal, not just an error.
+
+## Evidence
+
+### Docs
+
+- `docs/automation/tasks.md` explains task sources, lifecycle, statuses, delivery, notify policies, audit, maintenance, chat `/tasks`, status integration, storage, and relation to cron/heartbeat/Task Flow.
+- `docs/automation/index.md` positions tasks as the detached-work ledger rather than a scheduler.
+- `docs/cli/tasks.md` documents CLI commands for listing, showing, cancelling, notifying, auditing, maintaining, and inspecting flows.
+
+### Source
+
+- `src/tasks/task-registry.ts`, `src/tasks/task-registry.store.ts`, `src/tasks/task-registry.store.sqlite.ts`, `src/tasks/task-registry.reconcile.ts`, `src/tasks/task-registry.audit.ts`, `src/tasks/task-registry.maintenance.ts`, and `src/tasks/task-registry.types.ts` implement task persistence, reconciliation, audit, and maintenance.
+- `src/tasks/task-executor.ts`, `src/tasks/task-executor-policy.ts`, `src/tasks/task-registry-delivery-runtime.ts`, and `src/tasks/task-status.ts` implement cancellation, notifications, delivery, and status formatting.
+- `src/gateway/server-methods/tasks.ts` and `src/commands/tasks.ts` expose Gateway and CLI task operations.
+
+### Integration tests
+
+- `src/gateway/server-methods/tasks.test.ts` covers Gateway methods for tasks.
+- `test/scripts/openclaw-test-state.test.ts` exercises broader OpenClaw test state that includes runtime state management.
+- No full process restart-kill e2e across all task runtime types was found.
+
+### Unit tests
+
+- `src/tasks/task-registry.test.ts`, `src/tasks/task-registry.store.test.ts`, `src/tasks/task-registry.audit.test.ts`, `src/tasks/task-registry.maintenance.issue-60299.test.ts`, and `src/tasks/task-registry.process-state.test.ts` cover registry behavior.
+- `src/tasks/task-executor.test.ts`, `src/tasks/task-executor-policy.test.ts`, `src/tasks/detached-task-runtime.test.ts`, and `src/tasks/task-status.test.ts` cover executor, notification, runtime, and status behavior.
+- `src/commands/tasks.test.ts`, `src/commands/tasks-json.test.ts`, and `src/commands/tasks-audit-system.ts` cover CLI formatting and system audit behavior.
+
+### Gitcrawl queries
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "tasks ledger stale lost maintenance cron subagent" --json --limit 5`
+
+Results:
+
+- No hits for the exact query.
+
+Fallback query:
+
+`gitcrawl search openclaw/openclaw --query "background tasks lost" --json --limit 5`
+
+Results:
+
+- PR #59719 fixes background exec liveness through CLI tasks.
+- Issue #42767 discusses long-running tasks stuck as running; current main reconciles orphaned active tasks to `lost`.
+- Issue #66909 asks whether tasks automatically resume after Gateway restart.
+- Issue #42246 requests configurable batching/aggregation for outbound background task notifications.
+
+### Discrawl queries
+
+Query:
+
+`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "background tasks lost"`
+
+Results:
+
+- Maintainers requested review of PR #78575, described as fixing stale task audit entries by marking running background tasks lost when forced/timed-out Gateway restart proceeds before drain completes.
+- Maintainer/user discussions explain that background tasks can become `lost` and recommend treating OpenClaw tasks/transcripts as operator audit trail, with Postgres/Redis for a durable external work ledger when needed.
+- Issue #66909 was closed after docs clarified that tasks persist tracking records but not execution state.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/channel-polling-webhooks.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/channel-polling-webhooks.md
@@ -0,0 +1,158 @@
+---
+title: "Automation: cron, hooks, tasks, polling - Event Ingress Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Automation: cron, hooks, tasks, polling - Event Ingress Maturity Note
+
+## Summary
+
+Channel ingress polling and webhook monitors are mature for high-traffic channels such as Telegram and Zalo, with detailed docs, startup behavior, lease/session handling, watchdogs, and tests. The quality ceiling is limited by the lived record: polling stall detection, webhook/polling mutual exclusion, schema drift, startup blocking, and network failures are frequent operational hazards.
+
+## Category Scope
+
+Included in this category:
+
+- Telegram long polling: Covers Telegram long polling across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
+- Telegram webhook mode: Covers Telegram webhook mode across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
+- Zalo polling/webhook mode: Covers Zalo polling/webhook mode across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
+- Polling stall diagnostics: Covers Polling stall diagnostics across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
+- iMessage watch fallback: Covers iMessage watch fallback across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
+- Gmail setup wizard: Covers Gmail setup wizard across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
+- Watcher start/serve: Covers Watcher start/serve across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
+- Tailscale/public routing: Covers Tailscale/public routing across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
+- Push token validation: Covers Push token validation across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
+- Gmail event routing: Covers Gmail event routing across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
+- POST /hooks/wake: Covers POST /hooks/wake across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
+- POST /hooks/agent: Covers POST /hooks/agent across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
+- Mapped hooks: Covers Mapped hooks across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
+- Hook auth policy: Covers Hook auth policy across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
+- Async dispatch: Covers Async dispatch across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
+
+## Features
+
+- Telegram long polling: Covers Telegram long polling across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
+- Telegram webhook mode: Covers Telegram webhook mode across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
+- Zalo polling/webhook mode: Covers Zalo polling/webhook mode across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
+- Polling stall diagnostics: Covers Polling stall diagnostics across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
+- iMessage watch fallback: Covers iMessage watch fallback across channel-level long polling and webhook modes, especially Telegram and Zalo; polling liveness, leases, watchdog thresholds, and related channel polling and webhooks behavior.
+- Gmail setup wizard: Covers Gmail setup wizard across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
+- Watcher start/serve: Covers Watcher start/serve across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
+- Tailscale/public routing: Covers Tailscale/public routing across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
+- Push token validation: Covers Push token validation across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
+- Gmail event routing: Covers Gmail event routing across `openclaw webhooks gmail setup`, `hooks.gmail` config, `gog gmail watch start/serve`, watcher startup and renewal, and related gmail pub/sub watchers behavior.
+- POST /hooks/wake: Covers POST /hooks/wake across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
+- POST /hooks/agent: Covers POST /hooks/agent across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
+- Mapped hooks: Covers Mapped hooks across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
+- Hook auth policy: Covers Hook auth policy across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
+- Async dispatch: Covers Async dispatch across `/hooks/wake`, `/hooks/agent`, mapped hooks under `/hooks/<name>`, token extraction, and related http webhooks behavior.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Beta (76%)`
+- Positive signals: Telegram and Zalo have focused coverage for polling status, transport state, liveness, leases, sessions, webhook status, webhook handlers, lifecycle, and polling media replies.
+- Negative signals: Long polling depends on real network behavior, host sleep, proxy/DNS/TLS state, Telegram/Zalo API behavior, and gateway event-loop load. Local tests cannot cover every failure mode.
+- Integration gaps: Add a network-fault harness for Telegram/Zalo monitors that simulates long-poll timeout, host sleep, active webhook conflict, schema config drift, and restart after event-loop stalls.
+
+## Quality Score
+
+- Score: `Beta (70%)`
+- Gitcrawl reports: PR #73884 fixes false Telegram polling-stall restarts; query fallback found issue #86535 where Telegram polling stall detector treats sleep as active `getUpdates` stall.
+- Discrawl reports: Review comments on PRs #41153, #70579, and #57737 focus on polling stall watchdog thresholds and schema metadata drift; user reports mention startup sequentially blocking on first Telegram poll under event-loop load.
+- Good qualities: Telegram has a configurable `pollingStallThresholdMs`, lease protection for one active poller per token, transport-dirty restart behavior, and docs for webhook vs long-polling. Zalo documents and tests polling/webhook mutual exclusion and media reply behavior.
+- Bad qualities: Polling health is sensitive to event-loop stalls, sleep, schema drift, and network failures. The real-world bug record is active and channel-specific.
+- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
+
+## Completeness Score
+
+- Score: `Beta (76%)`
+- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Telegram long polling, Telegram webhook mode, Zalo polling/webhook mode, Polling stall diagnostics, iMessage watch fallback, Gmail setup wizard, Watcher start/serve, Tailscale/public routing, Push token validation, Gmail event routing, POST /hooks/wake, POST /hooks/agent, Mapped hooks, Hook auth policy, Async dispatch.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Polling watchdogs should distinguish host sleep/event-loop pause from active API request stalls.
+- Channel config schema generation must stay tied to runtime fields like `pollingStallThresholdMs`.
+- Startup should avoid serial channel polling dependencies that can block the whole gateway under event-loop load.
+
+## Evidence
+
+### Docs
+
+- `docs/channels/telegram.md` documents long polling as default, webhook mode, `pollingStallThresholdMs`, getUpdates conflicts, liveness troubleshooting, and doctor/status probes.
+- `docs/channels/zalo.md` documents long-polling by default, webhook mode, and mutual exclusion between polling and webhook modes.
+- `docs/channels/troubleshooting.md` includes polling stall diagnostics.
+- `docs/channels/imessage-from-bluebubbles.md` notes iMessage watch behavior with a polling fallback.
+
+### Source
+
+- `extensions/telegram/src/monitor.ts`, `extensions/telegram/src/monitor-polling.runtime.ts`, `extensions/telegram/src/polling-liveness.ts`, `extensions/telegram/src/polling-lease.ts`, `extensions/telegram/src/polling-session.ts`, `extensions/telegram/src/polling-status.ts`, and `extensions/telegram/src/webhook-status.ts` implement Telegram polling/webhook monitoring.
+- `extensions/zalo/src/monitor.ts`, `extensions/zalo/src/monitor.webhook.ts`, `extensions/zalo/src/monitor-durable.ts`, and `extensions/zalo/src/outbound-media.ts` implement Zalo polling/webhook behavior.
+- `extensions/imessage/src/approval-reaction-poller.ts` implements an iMessage approval polling path.
+
+### Integration tests
+
+- `extensions/zalo/src/monitor.webhook-e2e.test.ts` covers Zalo webhook behavior.
+- `extensions/zalo/src/monitor.polling.media-reply.test.ts` covers Zalo polling with media replies.
+- Telegram monitor tests are mostly focused runtime tests rather than live API e2e.
+
+### Unit tests
+
+- `extensions/telegram/src/polling-status.test.ts`, `extensions/telegram/src/polling-transport-state.test.ts`, `extensions/telegram/src/polling-session.test.ts`, `extensions/telegram/src/polling-liveness.test.ts`, `extensions/telegram/src/polling-lease.test.ts`, and `extensions/telegram/src/webhook-status.test.ts` cover Telegram monitor pieces.
+- `extensions/zalo/src/monitor.lifecycle.test.ts`, `extensions/zalo/src/monitor.webhook.test.ts`, `extensions/zalo/src/monitor.polling.media-reply.test.ts`, and `extensions/zalo/src/monitor.image.polling.test.ts` cover Zalo behavior.
+- `extensions/imessage/src/approval-reaction-poller.test.ts` covers iMessage reaction polling.
+
+### Gitcrawl queries
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "channel polling webhook getUpdates polling stall Zalo Telegram" --json --limit 5`
+
+Results:
+
+- No hits for the exact query.
+
+Fallback query:
+
+`gitcrawl search openclaw/openclaw --query "pollingStallThresholdMs" --json --limit 5`
+
+Results:
+
+- PR #73884 fixes Telegram false polling-stall restarts.
+
+Fallback query:
+
+`gitcrawl search openclaw/openclaw --query "poll loop" --json --limit 5`
+
+Results:
+
+- Issue #86535 reports Telegram polling stall detector treating host sleep/event-loop pause as an active `getUpdates` stall.
+
+### Discrawl queries
+
+Query:
+
+`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "pollingStallThresholdMs"`
+
+Results:
+
+- PR #41153 closure says current main hardens Telegram polling-stall detection with a 120s default and configurable per-account override.
+- PR #70579 review warns that schema validation drift could reject tuned `pollingStallThresholdMs` configs.
+- PR #57737 review warns bundled schema metadata must be regenerated when adding `pollingStallThresholdMs`.
+
+Fallback query:
+
+`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "poll loop"`
+
+Results:
+
+- Discord user report says Telegram account startup can block sequentially on first poll when event-loop load is high, causing timeouts and multi-minute stalls.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/cron-delivery-alerts.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/cron-delivery-alerts.md
@@ -0,0 +1,105 @@
+---
+title: "Automation: cron, hooks, tasks, polling - Cron Delivery and Failure Alerts Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Automation: cron, hooks, tasks, polling - Cron Delivery and Failure Alerts Maturity Note
+
+## Summary
+
+Cron delivery is feature-rich: jobs can announce to channels, post webhook payloads, suppress runner fallback delivery, preserve last/current chat routing, mirror direct delivery into transcripts, suppress stale interim text, prefer descendant subagent output, and notify failure destinations. The implementation is powerful but complex enough that archive evidence still shows privacy and routing hazards around failure alerts and webhook mode.
+
+## Category Scope
+
+This category covers cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, failure destinations, skipped-run alerts, message-tool delivery awareness, descendant subagent delivery preference, and cleanup after isolated runs.
+
+## Features
+
+- Chat announce delivery: Covers Chat announce delivery across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
+- Webhook delivery: Covers Webhook delivery across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
+- Failure destinations: Covers Failure destinations across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
+- Skipped-run alerts: Covers Skipped-run alerts across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
+- Delivery previews: Covers Delivery previews across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Stable (82%)`
+- Positive signals: Delivery has focused coverage for direct delivery, delivery target resolution, failure alerts, delivery plans/previews, double-announce prevention, named-agent delivery, outbound runtime dispatch, stale interim suppression, and delivery status persistence.
+- Negative signals: Coverage is mostly simulated and component-level. Cross-channel live delivery, webhook delivery failure privacy, and descendant subagent delivery behavior are not proven by one broad e2e scenario.
+- Integration gaps: A live matrix should cover announce, webhook, no-deliver, explicit failure destinations, stale target rejection, skipped provider-preflight alerts, and descendant subagent final-output delivery across at least one chat channel and one webhook receiver.
+
+## Quality Score
+
+- Score: `Beta (74%)`
+- Gitcrawl reports: PR #85394 references failure-alert schema and cron-tool decomposition. The query found ongoing work near skipped-run alert behavior.
+- Discrawl reports: A review comment on PR #31059 warns that webhook-mode failure alerts without `to` could fall through to announce delivery and leak error details to chat targets.
+- Good qualities: Delivery planning is explicit, provider selector prefixes are validated, direct delivery uses idempotency keys and transient retry loops, stale deliveries can be skipped, and isolated cleanup closes tracked browser/MCP resources best-effort without masking the run result.
+- Bad qualities: Delivery has a large state space across channel routes, direct sends, fallback announcement, transcript mirroring, webhook mode, failure alerts, and subagent follow-up. The archive privacy warning shows mode boundaries can be easy to get wrong.
+- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
+
+## Completeness Score
+
+- Score: `Stable (82%)`
+- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Chat announce delivery, Webhook delivery, Failure destinations, Skipped-run alerts, Delivery previews.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Failure-alert mode should fail closed when destination fields are incomplete, with no fallback to a broader recipient than the operator selected.
+- Delivery documentation should include a compact routing table for `last`, explicit channel targets, provider-prefixed targets, webhook mode, and failure destinations.
+- A small local webhook fixture would make delivery regressions easier to prove without real channel credentials.
+
+## Evidence
+
+### Docs
+
+- `docs/automation/cron-jobs.md` documents delivery modes `announce`, `webhook`, and `none`; explicit channel targets; channel prefix validation; message-tool interaction; output language; failure destinations; skipped alerts; and troubleshooting for no delivery.
+- `docs/automation/tasks.md` documents cleanup and completion behavior for cron tasks and descendant subagent output preference.
+- `docs/channels/discord.md` includes channel-specific cron delivery behavior for Discord text announcements.
+
+### Source
+
+- `src/cron/delivery.ts`, `src/cron/delivery-plan.ts`, `src/cron/delivery-preview.ts`, `src/cron/delivery-context.ts`, `src/cron/delivery-field-schemas.ts`, and `src/cron/webhook-url.ts` implement delivery validation, planning, preview, and URL handling.
+- `src/cron/isolated-agent/delivery-dispatch.ts`, `src/cron/isolated-agent/delivery-target.ts`, `src/cron/isolated-agent/delivery-outbound.runtime.ts`, and `src/cron/isolated-agent/subagent-followup.ts` implement isolated direct delivery, idempotency, retries, transcript mirroring, and descendant-output preference.
+- `src/cron/service/initial-delivery.ts`, `src/cron/service/task-ledger.ts`, and `src/cron/service/timer.ts` feed initial delivery context, task state, and failure alerts from the scheduler.
+
+### Integration tests
+
+- `src/cron/isolated-agent.direct-delivery-core-channels.test.ts` covers direct delivery across core channel abstractions.
+- `src/cron/isolated-agent/delivery-dispatch.named-agent.test.ts` and `src/cron/isolated-agent/delivery-dispatch.double-announce.test.ts` exercise integrated isolated delivery dispatch cases.
+- `src/cron/isolated-agent.delivery-awareness.test.ts` covers awareness of agent-sent messages versus fallback delivery.
+
+### Unit tests
+
+- `src/cron/delivery.test.ts`, `src/cron/delivery-plan.test.ts`, `src/cron/delivery-preview.test.ts`, `src/cron/delivery.failure-notify.test.ts`, and `src/cron/delivery-context.test.ts` cover planning and alert logic.
+- `src/cron/isolated-agent/delivery-target.test.ts`, `src/cron/isolated-agent/channel-output-policy.test.ts`, and `src/cron/isolated-agent/subagent-followup.test.ts` cover target resolution, channel output rules, and descendant follow-up.
+- `src/cron/service.delivery-plan.test.ts`, `src/cron/service.failure-alert.test.ts`, and `src/cron/service.persists-delivered-status.test.ts` cover service-level delivery behavior.
+
+### Gitcrawl queries
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "cron delivery failure alerts announce webhook skipped" --json --limit 5`
+
+Results:
+
+- PR #85394, `refactor(cron-tool): decompose into per-action tools (WOR-317)`, includes failure-alert schema and skipped-run alert fields, showing this surface is actively evolving.
+
+### Discrawl queries
+
+Query:
+
+`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "cron delivery failure alerts announce webhook skipped"`
+
+Results:
+
+- Review comment on PR #31059 warns that `sendCronFailureAlert` in webhook mode without `to` could fall through to announce delivery and leak failure text to chat targets.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/cron-execution-diagnostics.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/cron-execution-diagnostics.md
@@ -0,0 +1,113 @@
+---
+title: "Automation: cron, hooks, tasks, polling - Cron Runs and Diagnostics Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Automation: cron, hooks, tasks, polling - Cron Runs and Diagnostics Maturity Note
+
+## Summary
+
+Cron execution has a mature service and isolated-agent implementation: it records run history, maps run outcomes, supports manual and due-only runs, enforces model selection, checks local provider reachability, records diagnostics, and creates task-ledger entries. Coverage is broad but mostly focused and simulated. Quality is limited by live reports around model preflight/fallback semantics and long-running deterministic jobs.
+
+## Category Scope
+
+This category covers scheduler dispatch, timer arming, manual/due runs, isolated agent execution, session identity, model selection, fallback policy, provider preflight, run timeouts, run diagnostics, run history, and task-ledger creation. It excludes delivery/alerts, which are scored separately.
+
+## Features
+
+- Manual cron runs: Covers Manual cron runs across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
+- Isolated cron execution: Covers Isolated cron execution across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
+- Model/provider preflight: Covers Model/provider preflight across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
+- Run history: Covers Run history across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
+- Timeout and denial diagnostics: Covers Timeout and denial diagnostics across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Stable (83%)`
+- Positive signals: The `src/cron/service.*.test.ts` and `src/cron/isolated-agent/*.test.ts` suites cover dispatch, timer rearming, restart catchup, model overrides, fallback policy, local model preflight, run diagnostics, timeout policy, meta-error status propagation, session-key isolation, and task-ledger creation.
+- Negative signals: The component has less live/e2e proof for real provider stalls, real Gateway restarts during active isolated runs, and long-running deterministic process supervision through the embedded tool path.
+- Integration gaps: A live scenario should prove a due isolated run through a real Gateway, a model-preflight skip, a timeout, a manual `cron.run --wait`, and durable `cron.runs` recovery after restart.
+
+## Quality Score
+
+- Score: `Beta (73%)`
+- Gitcrawl reports: Issue #79329 reports cron model preflight skipping an entire run when a local primary is unreachable instead of trying configured cloud fallbacks.
+- Discrawl reports: A maintainer/user thread on May 17 describes long-running deterministic cron work where Codex-native shell ownership can end before OpenClaw receives command output; the recommended pattern is embedded OpenClaw tools with `exec` plus `process` polling and an adequate `timeoutSeconds`.
+- Good qualities: Execution paths separate main-session system events from isolated/current/custom `agentTurn` jobs, enforce model allowlists before runner start, persist run logs and diagnostics, and classify skipped/error/timeout states instead of treating every assistant reply as success.
+- Bad qualities: Provider preflight and long-running process ownership remain operator-sensitive. The runtime has many guardrails, but the operational model still requires users to choose the right agent/tool execution path for deterministic shell work.
+- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
+
+## Completeness Score
+
+- Score: `Stable (83%)`
+- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Manual cron runs, Isolated cron execution, Model/provider preflight, Run history, Timeout and denial diagnostics.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- Model preflight should align with fallback expectations or make strict-primary skip behavior unmistakable to operators.
+- Long-running deterministic work needs a clearer first-class pattern or stronger docs that distinguish agent-turn orchestration from deterministic execution.
+- A restart-resume proof for active isolated cron runs would reduce uncertainty around timeout and task reconciliation behavior.
+
+## Evidence
+
+### Docs
+
+- `docs/automation/cron-jobs.md` documents execution styles, isolated session behavior, model/thinking/fallback precedence, local-provider preflight, timeout handling, run history, and manual `cron run --wait`.
+- `.mem/main/ref/cron-run-diagnostics.md` and `.mem/main/pkg/claw/flow/cron-run-diagnostics.md` describe diagnostic expectations and recent repair context for cron run failures.
+- `docs/automation/tasks.md` states that every cron execution creates a background task and explains cron-specific lost-state reconciliation.
+
+### Source
+
+- `src/cron/service/timer.ts`, `src/cron/service/ops.ts`, `src/cron/service/timeout-policy.ts`, and `src/cron/service/task-ledger.ts` own dispatch, manual runs, timeout policy, and task creation.
+- `src/cron/run-log.ts`, `src/cron/run-diagnostics.ts`, and `src/cron/retry-hint.ts` implement durable run history and diagnostic summaries.
+- `src/cron/isolated-agent/run.ts`, `src/cron/isolated-agent/model-selection.ts`, `src/cron/isolated-agent/model-preflight.runtime.ts`, `src/cron/isolated-agent/run-fallback-policy.ts`, and `src/cron/isolated-agent/session-key.ts` implement isolated agent execution, model choice, preflight, fallbacks, and session identity.
+
+### Integration tests
+
+- `src/cron/cron-protocol-conformance.test.ts` covers protocol-level cron behavior.
+- `src/cron/isolated-agent/model-preflight.runtime.test.ts` exercises runtime provider-preflight behavior rather than only pure functions.
+- `src/cron/isolated-agent/run.runtime-plugins.test.ts` exercises runtime plugin integration during cron runs.
+
+### Unit tests
+
+- `src/cron/service.restart-catchup.test.ts`, `src/cron/service.rearm-timer-when-running.test.ts`, `src/cron/service.prevents-duplicate-timers.test.ts`, `src/cron/service.every-jobs-fire.test.ts`, and `src/cron/service/timeout-policy.test.ts` cover scheduler execution mechanics.
+- `src/cron/isolated-agent/run.cron-model-override.test.ts`, `src/cron/isolated-agent/run.payload-fallbacks.test.ts`, `src/cron/isolated-agent/run.meta-error-status.test.ts`, `src/cron/isolated-agent/run.interim-retry.test.ts`, `src/cron/isolated-agent/run.live-session-model-switch.test.ts`, and `src/cron/isolated-agent/run.tools-allow.test.ts` cover isolated run behavior.
+- `src/cron/run-log.test.ts`, `src/cron/run-log.error-reason.test.ts`, and `src/cron/run-diagnostics.test.ts` cover run history and diagnostics.
+
+### Gitcrawl queries
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "cron timeout diagnostics model preflight run history" --json --limit 5`
+
+Results:
+
+- Issue #79329, `Cron model preflight skips entire run when local primary is unreachable, ignoring configured cloud fallbacks [AI]`, is the only hit and directly lowers quality for model-preflight semantics.
+
+### Discrawl queries
+
+Query:
+
+`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "cron timeout diagnostics model preflight run history"`
+
+Results:
+
+- No matching Discord messages returned for this exact query.
+
+Fallback query:
+
+`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "poll loop"`
+
+Results:
+
+- May 17 maintainer/user discussion recommends embedded OpenClaw execution with `exec` plus `process` for long-running deterministic cron work, and warns that Codex-native shell ownership can end before the final result is observed.
--- a/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/cron-job-lifecycle.md
+++ b/docs/maturity-scorecard/inventory/automation-cron-hooks-tasks-polling/cron-job-lifecycle.md
@@ -0,0 +1,139 @@
+---
+title: "Automation: cron, hooks, tasks, polling - Cron Jobs Maturity Note"
+version: 3
+last_refreshed: 2026-05-30
+last_refreshed_by: codex
+---
+
+# Automation: cron, hooks, tasks, polling - Cron Jobs Maturity Note
+
+## Summary
+
+Cron job authoring and schedule management are documented and implemented across CLI, Gateway RPC, and agent-tool entrypoints. The scheduler supports one-shot, interval, and cron-expression jobs with timezone and stagger controls, plus durable job and runtime state files. The main risk is not basic feature absence; it is schedule-state edge behavior, where the archives show recurring reports around unresolved or stale `nextRunAtMs`, manual-run deletion, and long-uptime scheduler behavior.
+
+## Category Scope
+
+Included in this category:
+
+- Create/edit/remove jobs: Covers Create/edit/remove jobs across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
+- Schedule types: Covers Schedule types across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
+- Timezone and stagger: Covers Timezone and stagger across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
+- Cron RPCs: Covers Cron RPCs across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
+- Agent cron tool: Covers Agent cron tool across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
+- Manual cron runs: Covers Manual cron runs across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
+- Isolated cron execution: Covers Isolated cron execution across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
+- Model/provider preflight: Covers Model/provider preflight across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
+- Run history: Covers Run history across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
+- Timeout and denial diagnostics: Covers Timeout and denial diagnostics across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
+- Chat announce delivery: Covers Chat announce delivery across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
+- Webhook delivery: Covers Webhook delivery across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
+- Failure destinations: Covers Failure destinations across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
+- Skipped-run alerts: Covers Skipped-run alerts across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
+- Delivery previews: Covers Delivery previews across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
+
+## Features
+
+- Create/edit/remove jobs: Covers Create/edit/remove jobs across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
+- Schedule types: Covers Schedule types across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
+- Timezone and stagger: Covers Timezone and stagger across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
+- Cron RPCs: Covers Cron RPCs across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
+- Agent cron tool: Covers Agent cron tool across cron job creation, listing, inspection, editing, and related cron job lifecycle behavior.
+- Manual cron runs: Covers Manual cron runs across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
+- Isolated cron execution: Covers Isolated cron execution across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
+- Model/provider preflight: Covers Model/provider preflight across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
+- Run history: Covers Run history across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
+- Timeout and denial diagnostics: Covers Timeout and denial diagnostics across scheduler dispatch, timer arming, manual/due runs, isolated agent execution, and related cron runs and diagnostics behavior.
+- Chat announce delivery: Covers Chat announce delivery across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
+- Webhook delivery: Covers Webhook delivery across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
+- Failure destinations: Covers Failure destinations across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
+- Skipped-run alerts: Covers Skipped-run alerts across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
+- Delivery previews: Covers Delivery previews across cron output delivery modes, channel target resolution, direct delivery retries, transcript mirroring, and related cron delivery and failure alerts behavior.
+
+## Archive Freshness
+
+- gitcrawl: `gitcrawl doctor --json` succeeded with `version=0.2.1`, `last_sync_at=2026-05-28T19:09:52.784704Z`, `repository_count=2`, `thread_count=29810`, `open_thread_count=11181`, `cluster_count=18594`, `db_path=/Users/kevinlin/.config/gitcrawl/stores/gitcrawl-store/data/openclaw__openclaw.sync.db`, `api_supported=false`, `github_token_present=false`, and `openai_key_present=true`.
+- discrawl: `discrawl status --json` succeeded with `state=current`, `generated_at=2026-05-30T14:10:20Z`, `last_sync_at=2026-05-29T19:27:40Z`, `messages=1487536`, `channels=25831`, `threads=25603`, `embedding_backlog=0`, `database_path=/Users/kevinlin/Library/Application Support/discrawl/discrawl.db`, `database_bytes=8035926016`, `share.remote=git@github.com-personal:openclaw/discord-store.git`, and `share.needs_update=true`.
+
+## Coverage Score
+
+- Score: `Stable (85%)`
+- Positive signals: CLI, Gateway RPC, and agent-tool paths share normalization through `src/cron/normalize.ts` and `src/gateway/server-methods/cron.ts`; targeted unit coverage exists for schedule parsing, schedule options, store migration, `nextRunAtMs` repair, top-of-hour staggering, list pagination, and manual runs.
+- Negative signals: Coverage is strongest at unit and focused service level; the evidence found fewer end-to-end proofs that create/edit/run flows survive long-running gateway uptime, hand-edited stores, and UI/CLI/agent-tool parity in one scenario.
+- Integration gaps: No single live/e2e scenario was found that starts a real Gateway, creates all three schedule types through different user surfaces, restarts the Gateway, and proves the same persisted jobs still compute the correct next run.
+
+## Quality Score
+
+- Score: `Beta (78%)`
+- Gitcrawl reports: Open threads include PR #52109 for high-frequency `every` schedule refire gaps, issue #81691 for exact-second future-slot repair, PR #75970 for malformed persisted jobs, issue #83538 for manual-run `deleteAfterRun` data loss, and issue #73166 for long-uptime scheduler stoppage.
+- Discrawl reports: Discord archive shows operator confusion around main-session cron rows that appeared enabled but had `lastError: "disabled"`, plus review discussion on unresolved next-run refire loops and schedule errors leaving timers idle.
+- Good qualities: The source has a clear service boundary, schema validation before service mutation, a job/state file split, timestamp validation, schedule identity tracking, and explicit docs for timezone, day-of-month/day-of-week OR behavior, stagger, and state-file handling.
+- Bad qualities: The lived bug record shows schedule-state repair remains subtle, and manual-run/delete semantics can surprise operators. Quality is limited by these operational edge cases, not by the test inventory.
+- Excluded from quality: Test inventory and runtime proof depth; they are coverage inputs only.
+
+## Completeness Score
+
+- Score: `Stable (85%)`
+- Surface instructions: evaluated against `references/completeness/automation-cron-hooks-tasks-polling.md`.
+- Positive signals: archived docs, source, test, Gitcrawl, and Discrawl evidence cover the taxonomy scope for Create/edit/remove jobs, Schedule types, Timezone and stagger, Cron RPCs, Agent cron tool, Manual cron runs, Isolated cron execution, Model/provider preflight, Run history, Timeout and denial diagnostics, Chat announce delivery, Webhook delivery, Failure destinations, Skipped-run alerts, Delivery previews.
+- Negative signals: the archived note predated process-version-3 Completeness scoring, so this score is initialized from the same evidence breadth and known-gap record used for the archived Coverage score.
+- Missing capability branches: see `## Known Gaps` and `## Evidence` below for the recorded missing branches and operator-visible caveats.
+
+## Known Gaps
+
+- A single operator-facing scenario proof should cover creating `at`, `every`, and `cron` jobs through CLI/Gateway/agent-tool paths, editing non-schedule metadata, preserving/repairing state, restarting the Gateway, and manually running a due and non-due job.
+- Manual run semantics need sharper operator visibility around `deleteAfterRun` and no-op due checks.
+- Schedule-state repair should remain a regression focus because multiple archive entries cluster around `nextRunAtMs` corruption, zero values, unresolved schedules, and exact-second slot identity.
+
+## Evidence
+
+### Docs
+
+- `docs/automation/cron-jobs.md` documents job persistence at `~/.openclaw/cron/jobs.json`, runtime state in `jobs-state.json`, schedule kinds, timezone, stagger, manual run, run history, and management commands.
+- `docs/cli/cron.md` provides the CLI reference for `openclaw cron add`, `list`, `get`, `show`, `edit`, `run`, `runs`, and `remove`.
+- `docs/gateway/protocol.md` lists automation RPCs including `cron.get`, `cron.list`, `cron.status`, `cron.add`, `cron.update`, `cron.remove`, `cron.run`, and `cron.runs`.
+
+### Source
+
+- `src/gateway/server-methods/cron.ts` implements validated Gateway methods for `cron.list`, `cron.add`, `cron.update`, `cron.remove`, `cron.run`, and `cron.runs`.
+- `src/cli/cron-cli/register.cron-add.ts`, `src/cli/cron-cli/register.cron-edit.ts`, `src/cli/cron-cli/schedule-options.ts`, and `src/cli/cron-cli/shared.ts` implement CLI flag parsing, schedule construction, and display.
+- `src/agents/tools/cron-tool.ts` exposes the agent cron tool, recovers flat params into job objects, handles self-scope introspection, and calls Gateway cron methods.
+- `src/cron/normalize.ts`, `src/cron/schedule.ts`, `src/cron/stagger.ts`, `src/cron/service/jobs.ts`, `src/cron/service/ops.ts`, and `src/cron/service/store.ts` normalize, compute, persist, and mutate jobs.
+
+### Integration tests
+
+- `test/gateway.multi.e2e.test.ts` is broad gateway e2e coverage but not specific to all cron schedule-management paths.
+- `src/gateway/tools-invoke-http.cron-regression.test.ts` exercises cron through Gateway tool invocation.
+- `src/cron/cron-protocol-conformance.test.ts` and `src/cron/cron-protocol-schema.test.ts` exercise protocol shape and compatibility.
+
+### Unit tests
+
+- `src/cron/schedule.test.ts`, `src/cron/parse.test.ts`, `src/cron/normalize.test.ts`, `src/cron/stagger.test.ts`, and `src/cron/validate-timestamp.ts` cover schedule parsing and normalization.
+- `src/cron/service.jobs.test.ts`, `src/cron/service.jobs.top-of-hour-stagger.test.ts`, `src/cron/service.issue-regressions.test.ts`, `src/cron/service.store-load-invalid-main-job.test.ts`, and `src/cron/service/ops.test.ts` cover job creation, mutation, repair, and store behavior.
+- `src/cli/cron-cli/register.cron-simple.test.ts`, `src/cli/cron-cli/register.cron-edit.test.ts`, `src/cli/cron-cli/shared.test.ts`, and `src/agents/tools/cron-tool.schema.test.ts` cover CLI and agent-tool surfaces.
+
+### Gitcrawl queries
+
+Query:
+
+`gitcrawl search openclaw/openclaw --query "cron add schedule nextRunAtMs" --json --limit 5`
+
+Results:
+
+- PR #52109, `fix(cron): apply MIN_REFIRE_GAP_MS to every-schedule jobs`, reports high-frequency `every` schedule refire risk.
+- Issue #81691, `Cron future-slot repair misclassifies exact second cron slots`, reports exact-second state repair trouble.
+- PR #75970, `fix(cron): ignore malformed persisted jobs`, points to malformed persisted job handling.
+- Issue #83538, `cron: deleteAfterRun fires on manual run even when no run executes`, reports manual-run data-loss risk.
+- Issue #73166, `Cron scheduler silently stops firing after ~2.5 days of gateway uptime`, reports long-uptime scheduling failure.
+
+### Discrawl queries
+
+Query:
+
+`/Users/kevinlin/.local/bin/discrawl search --mode hybrid --limit 5 "cron add schedule nextRunAtMs"`
+
+Results:
+
+- PR #66083 discussion: unresolved `computeJobNextRunAtMs` results previously caused refire loops; current fix adds maintenance wake behavior for enabled jobs without a next run.
+- PR #63507 discussion: `nextRunAtMs=0` on non-schedule edits required repair.
+- User thread `Triggering main to do something in a cron.` includes a concrete main-session cron job that did not fire and showed `lastError: "disabled"`.
+- Review comment on PR #52619 warns that schedule computation errors could leave an enabled job with no armed timer.
--- a/Show More
+++ b/Show More