diff --git a/src/agents/model-fallback.probe.test.ts b/src/agents/model-fallback.probe.test.ts index 4929c4af2fba..b3d98b69ea08 100644 --- a/src/agents/model-fallback.probe.test.ts +++ b/src/agents/model-fallback.probe.test.ts @@ -360,6 +360,44 @@ describe("runWithModelFallback – probe logic", () => { await expectPrimarySkippedAfterLongCooldown("billing"); }); + it("re-probes a single-provider primary blocked by a far-future subscription_limit (#90702)", () => { + // fallbacks:[] + a multi-day subscription_limit reset must still re-probe on + // the throttle instead of suspending until blockedUntil literally arrives, + // since the rolling cap usually recovers earlier. Multi-fallback setups keep + // preferring the fallback chain (covered above). + const sixDays = 6 * 24 * 60 * 60 * 1000; + const usageStats = { + "openai-profile-1": { + blockedUntil: NOW + sixDays, + blockedReason: "subscription_limit", + blockedSource: "wham", + }, + } satisfies AuthProfileStore["usageStats"]; + + expect( + resolveOpenAiCooldownDecision({ + reason: "rate_limit", + soonest: NOW + sixDays, + hasFallbackCandidates: false, + usageStats, + }), + ).toEqual({ type: "attempt", reason: "rate_limit", markProbe: true }); + + // The 30s probe throttle is still honored so recovery probing cannot hammer + // the upstream: a recent probe on the same key suspends until the slot opens. + probeThrottleInternals.lastProbeAttempt.set("recent-openai", NOW - 10_000); + expectOpenAiProbeSuspension( + resolveOpenAiCooldownDecision({ + reason: "rate_limit", + soonest: NOW + sixDays, + hasFallbackCandidates: false, + throttleKey: "recent-openai", + usageStats, + }), + "rate_limit", + ); + }); + it("decides when cooldowned primary probes are allowed", () => { expect( resolveOpenAiCooldownDecision({ @@ -674,7 +712,7 @@ describe("runWithModelFallback – probe logic", () => { } }); - it("single candidate skips with rate_limit and exhausts candidates", async () => { + it("re-probes a single-provider rate-limited primary instead of suspending", async () => { const cfg = makeCfg({ agents: { defaults: { @@ -686,22 +724,26 @@ describe("runWithModelFallback – probe logic", () => { }, } as Partial); - const almostExpired = NOW + 30 * 1000; - mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired); + // Far-future cooldown with no fallback chain: the primary must still be + // probed so a recovered rolling cap resumes work instead of staying silent + // until blockedUntil arrives. See #90702. + mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 6 * 24 * 60 * 60 * 1000); - const run = vi.fn().mockResolvedValue("unreachable"); + const run = vi.fn().mockResolvedValue("probed-ok"); - await expect( - runWithModelFallback({ - cfg, - provider: "openai", - model: "gpt-4.1-mini", - fallbacksOverride: [], - run, - }), - ).rejects.toThrow("All models failed"); + const result = await runWithModelFallback({ + cfg, + provider: "openai", + model: "gpt-4.1-mini", + fallbacksOverride: [], + run, + }); - expect(run).not.toHaveBeenCalled(); + expect(result.result).toBe("probed-ok"); + expect(run).toHaveBeenCalledTimes(1); + expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", { + allowTransientCooldownProbe: true, + }); }); it("scopes probe throttling by agentDir to avoid cross-agent suppression", () => { diff --git a/src/agents/model-fallback.ts b/src/agents/model-fallback.ts index dc57f2046bfd..42b5e34a0264 100644 --- a/src/agents/model-fallback.ts +++ b/src/agents/model-fallback.ts @@ -1060,7 +1060,7 @@ function shouldProbePrimaryDuringCooldown(params: { profileIds: string[]; model: string; }): boolean { - if (!params.isPrimary || !params.hasFallbackCandidates) { + if (!params.isPrimary) { return false; } @@ -1068,6 +1068,16 @@ function shouldProbePrimaryDuringCooldown(params: { return false; } + // A single-provider primary has no fallback chain to prefer, so every open + // throttle slot is a recovery probe: "is the primary callable yet?" is a + // recovery question independent of fallback configuration. Without this, a + // fallbacks:[] setup that hits a rate/subscription cap stays suspended until + // the provider-reported reset (which can be days out) even though the rolling + // cap usually recovers earlier. See #90702. + if (!params.hasFallbackCandidates) { + return true; + } + const soonest = params.authRuntime.getSoonestCooldownExpiry(params.authStore, params.profileIds, { now: params.now, forModel: params.model, @@ -1163,15 +1173,11 @@ function resolveCooldownDecision(params: { } // Billing is semi-persistent: the user may fix their balance, or a transient - // 402 might have been misclassified. Probe single-provider setups on the - // standard throttle so they can recover without a restart; when fallbacks - // exist, only probe near cooldown expiry so the fallback chain stays preferred. + // 402 might have been misclassified. shouldProbe already re-probes + // single-provider setups on the throttle (no fallback chain to prefer) and + // multi-fallback setups near cooldown expiry, so both recover without a restart. if (inferredReason === "billing") { - const shouldProbeSingleProviderBilling = - params.isPrimary && - !params.hasFallbackCandidates && - isProbeThrottleOpen(params.now, params.probeThrottleKey); - if (params.isPrimary && (shouldProbe || shouldProbeSingleProviderBilling)) { + if (params.isPrimary && shouldProbe) { return { type: "attempt", reason: inferredReason, markProbe: true }; } return {