fix(agents): re-probe single-provider primary during cooldown (#90717)

Fixes #90702.

Allow a single-provider primary to periodically probe through the existing cooldown throttle even when no fallback chain is configured. This lets WHAM/subscription-limit cooldown state recover without waiting for a far-future provider reset timestamp.

Verified:
- node scripts/run-vitest.mjs src/agents/model-fallback.probe.test.ts
- git diff --check
- cherry-pick onto current origin/main and rerun focused regression
This commit is contained in:
Yzx
2026-06-06 05:20:57 +08:00
committed by GitHub
parent 2ab4eaa2b1
commit 6da3b1f6a3
2 changed files with 71 additions and 23 deletions

View File

@@ -360,6 +360,44 @@ describe("runWithModelFallback probe logic", () => {
await expectPrimarySkippedAfterLongCooldown("billing");
});
it("re-probes a single-provider primary blocked by a far-future subscription_limit (#90702)", () => {
// fallbacks:[] + a multi-day subscription_limit reset must still re-probe on
// the throttle instead of suspending until blockedUntil literally arrives,
// since the rolling cap usually recovers earlier. Multi-fallback setups keep
// preferring the fallback chain (covered above).
const sixDays = 6 * 24 * 60 * 60 * 1000;
const usageStats = {
"openai-profile-1": {
blockedUntil: NOW + sixDays,
blockedReason: "subscription_limit",
blockedSource: "wham",
},
} satisfies AuthProfileStore["usageStats"];
expect(
resolveOpenAiCooldownDecision({
reason: "rate_limit",
soonest: NOW + sixDays,
hasFallbackCandidates: false,
usageStats,
}),
).toEqual({ type: "attempt", reason: "rate_limit", markProbe: true });
// The 30s probe throttle is still honored so recovery probing cannot hammer
// the upstream: a recent probe on the same key suspends until the slot opens.
probeThrottleInternals.lastProbeAttempt.set("recent-openai", NOW - 10_000);
expectOpenAiProbeSuspension(
resolveOpenAiCooldownDecision({
reason: "rate_limit",
soonest: NOW + sixDays,
hasFallbackCandidates: false,
throttleKey: "recent-openai",
usageStats,
}),
"rate_limit",
);
});
it("decides when cooldowned primary probes are allowed", () => {
expect(
resolveOpenAiCooldownDecision({
@@ -674,7 +712,7 @@ describe("runWithModelFallback probe logic", () => {
}
});
it("single candidate skips with rate_limit and exhausts candidates", async () => {
it("re-probes a single-provider rate-limited primary instead of suspending", async () => {
const cfg = makeCfg({
agents: {
defaults: {
@@ -686,22 +724,26 @@ describe("runWithModelFallback probe logic", () => {
},
} as Partial<OpenClawConfig>);
const almostExpired = NOW + 30 * 1000;
mockedGetSoonestCooldownExpiry.mockReturnValue(almostExpired);
// Far-future cooldown with no fallback chain: the primary must still be
// probed so a recovered rolling cap resumes work instead of staying silent
// until blockedUntil arrives. See #90702.
mockedGetSoonestCooldownExpiry.mockReturnValue(NOW + 6 * 24 * 60 * 60 * 1000);
const run = vi.fn().mockResolvedValue("unreachable");
const run = vi.fn().mockResolvedValue("probed-ok");
await expect(
runWithModelFallback({
const result = await runWithModelFallback({
cfg,
provider: "openai",
model: "gpt-4.1-mini",
fallbacksOverride: [],
run,
}),
).rejects.toThrow("All models failed");
});
expect(run).not.toHaveBeenCalled();
expect(result.result).toBe("probed-ok");
expect(run).toHaveBeenCalledTimes(1);
expect(run).toHaveBeenCalledWith("openai", "gpt-4.1-mini", {
allowTransientCooldownProbe: true,
});
});
it("scopes probe throttling by agentDir to avoid cross-agent suppression", () => {

View File

@@ -1060,7 +1060,7 @@ function shouldProbePrimaryDuringCooldown(params: {
profileIds: string[];
model: string;
}): boolean {
if (!params.isPrimary || !params.hasFallbackCandidates) {
if (!params.isPrimary) {
return false;
}
@@ -1068,6 +1068,16 @@ function shouldProbePrimaryDuringCooldown(params: {
return false;
}
// A single-provider primary has no fallback chain to prefer, so every open
// throttle slot is a recovery probe: "is the primary callable yet?" is a
// recovery question independent of fallback configuration. Without this, a
// fallbacks:[] setup that hits a rate/subscription cap stays suspended until
// the provider-reported reset (which can be days out) even though the rolling
// cap usually recovers earlier. See #90702.
if (!params.hasFallbackCandidates) {
return true;
}
const soonest = params.authRuntime.getSoonestCooldownExpiry(params.authStore, params.profileIds, {
now: params.now,
forModel: params.model,
@@ -1163,15 +1173,11 @@ function resolveCooldownDecision(params: {
}
// Billing is semi-persistent: the user may fix their balance, or a transient
// 402 might have been misclassified. Probe single-provider setups on the
// standard throttle so they can recover without a restart; when fallbacks
// exist, only probe near cooldown expiry so the fallback chain stays preferred.
// 402 might have been misclassified. shouldProbe already re-probes
// single-provider setups on the throttle (no fallback chain to prefer) and
// multi-fallback setups near cooldown expiry, so both recover without a restart.
if (inferredReason === "billing") {
const shouldProbeSingleProviderBilling =
params.isPrimary &&
!params.hasFallbackCandidates &&
isProbeThrottleOpen(params.now, params.probeThrottleKey);
if (params.isPrimary && (shouldProbe || shouldProbeSingleProviderBilling)) {
if (params.isPrimary && shouldProbe) {
return { type: "attempt", reason: inferredReason, markProbe: true };
}
return {