fix(agents): re-probe single-provider primary during cooldown (#90717)

Fixes #90702. Allow a single-provider primary to periodically probe through the existing cooldown throttle even when no fallback chain is configured. This lets WHAM/subscription-limit cooldown state recover without waiting for a far-future provider reset timestamp. Verified: - node scripts/run-vitest.mjs src/agents/model-fallback.probe.test.ts - git diff --check - cherry-pick onto current origin/main and rerun focused regression
2026-06-06 05:51:15 +08:00 · 2026-06-06 05:20:57 +08:00
parent 2ab4eaa2b1
commit 6da3b1f6a3
2 changed files with 71 additions and 23 deletions
--- a/src/agents/model-fallback.probe.test.ts
+++ b/src/agents/model-fallback.probe.test.ts
@@ -360,6 +360,44 @@ describe("runWithModelFallback – probe logic", () => {
    await expectPrimarySkippedAfterLongCooldown("billing");
  });

+  it("re-probes a single-provider primary blocked by a far-future subscription_limit (#90702)", () => {
+    // fallbacks:[] + a multi-day subscription_limit reset must still re-probe on
+    // the throttle instead of suspending until blockedUntil literally arrives,
+    // since the rolling cap usually recovers earlier. Multi-fallback setups keep
+    // preferring the fallback chain (covered above).
+    const sixDays = 6 * 24 * 60 * 60 * 1000;
+    const usageStats = {
+      "openai-profile-1": {
+        blockedUntil: NOW + sixDays,
+        blockedReason: "subscription_limit",
+        blockedSource: "wham",
+      },
+    } satisfies AuthProfileStore["usageStats"];
+
+    expect(
+      resolveOpenAiCooldownDecision({
+        reason: "rate_limit",
+        soonest: NOW + sixDays,
+        hasFallbackCandidates: false,
+        usageStats,
+      }),
+    ).toEqual({ type: "attempt", reason: "rate_limit", markProbe: true });
+
+    // The 30s probe throttle is still honored so recovery probing cannot hammer
+    // the upstream: a recent probe on the same key suspends until the slot opens.
+    probeThrottleInternals.lastProbeAttempt.set("recent-openai", NOW - 10_000);
+    expectOpenAiProbeSuspension(
+      resolveOpenAiCooldownDecision({
+        reason: "rate_limit",
+        soonest: NOW + sixDays,
+        hasFallbackCandidates: false,
+        throttleKey: "recent-openai",
+        usageStats,
+      }),
+      "rate_limit",
+    );
+  });
+
  it("decides when cooldowned primary probes are allowed", () => {
    expect(
      resolveOpenAiCooldownDecision({
@@ -674,7 +712,7 @@ describe("runWithModelFallback – probe logic", () => {
    }
  });

-  it("single candidate skips with rate_limit and exhausts candidates", async () => {
+  it("re-probes a single-provider rate-limited primary instead of suspending", async () => {
    const cfg = makeCfg({
      agents: {
        defaults: {
@@ -686,22 +724,26 @@ describe("runWithModelFallback – probe logic", () => {
      },
    } as Partial<OpenClawConfig>);

-    const almostExpired = NOW + 30 * 1000;
-    mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
+    // Far-future cooldown with no fallback chain: the primary must still be
+    // probed so a recovered rolling cap resumes work instead of staying silent
+    // until blockedUntil arrives. See #90702.
+    mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 6 * 24 * 60 * 60 * 1000);

-    const run = vi.fn().mockResolvedValue("unreachable");
+    const run = vi.fn().mockResolvedValue("probed-ok");

-    await expect(
-      runWithModelFallback({
+    const result = await runWithModelFallback({
      cfg,
      provider: "openai",
      model: "gpt-4.1-mini",
      fallbacksOverride: [],
      run,
-      }),
-    ).rejects.toThrow("All models failed");
+    });

-    expect(run).not.toHaveBeenCalled();
+    expect(result.result).toBe("probed-ok");
+    expect(run).toHaveBeenCalledTimes(1);
+    expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
+      allowTransientCooldownProbe: true,
+    });
  });

  it("scopes probe throttling by agentDir to avoid cross-agent suppression", () => {
--- a/src/agents/model-fallback.ts
+++ b/src/agents/model-fallback.ts
@@ -1060,7 +1060,7 @@ function shouldProbePrimaryDuringCooldown(params: {
  profileIds: string[];
  model: string;
 }): boolean {
-  if (!params.isPrimary || !params.hasFallbackCandidates) {
+  if (!params.isPrimary) {
    return false;
  }

@@ -1068,6 +1068,16 @@ function shouldProbePrimaryDuringCooldown(params: {
    return false;
  }

+  // A single-provider primary has no fallback chain to prefer, so every open
+  // throttle slot is a recovery probe: "is the primary callable yet?" is a
+  // recovery question independent of fallback configuration. Without this, a
+  // fallbacks:[] setup that hits a rate/subscription cap stays suspended until
+  // the provider-reported reset (which can be days out) even though the rolling
+  // cap usually recovers earlier. See #90702.
+  if (!params.hasFallbackCandidates) {
+    return true;
+  }
+
  const soonest = params.authRuntime.getSoonestCooldownExpiry(params.authStore, params.profileIds, {
    now: params.now,
    forModel: params.model,
@@ -1163,15 +1173,11 @@ function resolveCooldownDecision(params: {
  }

  // Billing is semi-persistent: the user may fix their balance, or a transient
-  // 402 might have been misclassified. Probe single-provider setups on the
-  // standard throttle so they can recover without a restart; when fallbacks
-  // exist, only probe near cooldown expiry so the fallback chain stays preferred.
+  // 402 might have been misclassified. shouldProbe already re-probes
+  // single-provider setups on the throttle (no fallback chain to prefer) and
+  // multi-fallback setups near cooldown expiry, so both recover without a restart.
  if (inferredReason === "billing") {
-    const shouldProbeSingleProviderBilling =
-      params.isPrimary &&
-      !params.hasFallbackCandidates &&
-      isProbeThrottleOpen(params.now, params.probeThrottleKey);
-    if (params.isPrimary && (shouldProbe || shouldProbeSingleProviderBilling)) {
+    if (params.isPrimary && shouldProbe) {
      return { type: "attempt", reason: inferredReason, markProbe: true };
    }
    return {