mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 05:51:15 +08:00
fix(auth): add force re-login recovery and fallback auth skips
Summary:
- Add forced provider re-login support that clears cached auth profiles before running provider login again.
- Add provider-auth remediation guidance and a session-scoped skip cache for known-bad fallback auth attempts.
- Wire session ids through agent command, auto-reply, and embedded compaction fallback callers so the skip cache applies on real run paths.
- Fail closed when forced auth profile removal cannot update the profile store.
Verification:
- Local format, lint, diff-check, focused Vitest shards, and autoreview passed.
- PR CI, CodeQL Security High, and Critical Quality agent-runtime-boundary passed on head 1b4e9e753e.
Co-authored-by: Mert Basar <MertBasar0@users.noreply.github.com>
This commit is contained in:
@@ -67,6 +67,27 @@ OpenClaw separates the selected provider/model from why it was selected. That so
|
||||
|
||||
The auto fallback primary-probe interval is five minutes and is not configurable. OpenClaw remembers recent probes per session and primary model so a failing primary is not retried on every turn. OpenClaw sends a visible notice when a session moves onto fallback and another notice when it returns to the selected primary; it does not repeat the notice on every sticky fallback turn.
|
||||
|
||||
## Auth failure skip cache
|
||||
|
||||
By default, every new turn keeps the existing fallback retry behavior: OpenClaw
|
||||
will try each configured fallback candidate again, including non-primary
|
||||
candidates that recently failed with `auth` or `auth_permanent`.
|
||||
|
||||
Operators who prefer to suppress those repeat auth failures can opt in with:
|
||||
|
||||
```bash
|
||||
OPENCLAW_FALLBACK_SKIP_TTL_MS=60000
|
||||
```
|
||||
|
||||
When enabled, OpenClaw records an in-memory, session-scoped skip marker for a
|
||||
non-primary fallback candidate after an auth-class failure. The marker is keyed
|
||||
by session id, provider, and model. Primary candidates are never skipped, so an
|
||||
explicit user model selection still surfaces the real auth error. The cache is
|
||||
process-local and clears on Gateway restart.
|
||||
|
||||
The value is a TTL in milliseconds. `0` or an unset value disables the cache.
|
||||
Positive values are clamped between 1 second and 10 minutes.
|
||||
|
||||
## User-visible fallback notices
|
||||
|
||||
When a session moves onto an auto-selected fallback, OpenClaw sends a status notice in the same reply surface:
|
||||
|
||||
@@ -206,6 +206,17 @@ openclaw models auth login --provider openai --profile-id openai:lain
|
||||
This is the easiest way to keep multiple OAuth logins for the same provider
|
||||
separate inside one agent.
|
||||
|
||||
Use `--force` when a saved provider profile is stuck, expired, or tied to the
|
||||
wrong account and the normal login command keeps reusing it. `--force` deletes
|
||||
the saved auth profiles for that provider in the selected agent directory, then
|
||||
runs the same provider auth flow again. It does not revoke credentials at the
|
||||
provider; rotate or revoke them in the provider dashboard when you need
|
||||
provider-side invalidation.
|
||||
|
||||
```bash
|
||||
openclaw models auth login --provider anthropic --force
|
||||
```
|
||||
|
||||
### Per-session (chat command)
|
||||
|
||||
Use `/model <alias-or-id>@<profileId>` to pin a specific provider credential for the current session (example profile ids: `anthropic:default`, `anthropic:work`).
|
||||
|
||||
@@ -704,6 +704,7 @@ beforeAll(async () => {
|
||||
type FallbackRunnerParams = {
|
||||
provider: string;
|
||||
model: string;
|
||||
sessionId?: string;
|
||||
run: (provider: string, model: string) => Promise<unknown>;
|
||||
onFallbackStep?: (step: Record<string, unknown>) => void | Promise<void>;
|
||||
classifyResult?: (params: {
|
||||
@@ -969,6 +970,7 @@ describe("agentCommand – LiveSessionModelSwitchError retry", () => {
|
||||
const secondCall = mockCallArg(state.runWithModelFallbackMock, 1) as FallbackRunnerParams;
|
||||
expect(secondCall.provider).toBe("openai");
|
||||
expect(secondCall.model).toBe("gpt-5.4");
|
||||
expect(secondCall.sessionId).toBe("session-1");
|
||||
|
||||
const lifecycleEndCalls = state.emitAgentEventMock.mock.calls.filter((call: unknown[]) => {
|
||||
const arg = call[0] as { stream?: string; data?: { phase?: string } };
|
||||
|
||||
@@ -1513,6 +1513,7 @@ async function agentCommandInternal(
|
||||
runId,
|
||||
agentDir,
|
||||
agentId: sessionAgentId,
|
||||
sessionId,
|
||||
sessionKey: sessionKey ?? sessionId,
|
||||
prepareAgentHarnessRuntime: async ({
|
||||
provider: providerValue,
|
||||
|
||||
@@ -445,6 +445,7 @@ export async function compactEmbeddedAgentSessionDirect(
|
||||
runId: params.runId ?? params.sessionId,
|
||||
agentDir: params.agentDir,
|
||||
agentId: fallbackAgentId,
|
||||
sessionId: params.sessionId,
|
||||
sessionKey: fallbackSessionKey,
|
||||
prepareAgentHarnessRuntime: async ({ provider, model, agentHarnessRuntimeOverride }) => {
|
||||
await ensureSelectedAgentHarnessPlugin({
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { classifyFailoverSignal } from "./embedded-agent-helpers/errors.js";
|
||||
import {
|
||||
buildFailoverRemediationHint,
|
||||
buildProviderReauthCommand,
|
||||
coerceToFailoverError,
|
||||
describeFailoverError,
|
||||
FailoverError,
|
||||
@@ -1240,3 +1242,72 @@ describe("failover-error", () => {
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("buildFailoverRemediationHint", () => {
|
||||
it("returns a copy-pasteable login command for auth failures", () => {
|
||||
const err = new FailoverError("missing token", {
|
||||
reason: "auth",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
});
|
||||
expect(buildFailoverRemediationHint(err)).toBe(
|
||||
"Re-authenticate with: openclaw models auth login --provider 'anthropic' --force",
|
||||
);
|
||||
});
|
||||
|
||||
it("returns a hint for auth_permanent as well", () => {
|
||||
const err = new FailoverError("revoked", {
|
||||
reason: "auth_permanent",
|
||||
provider: "google-gemini-cli",
|
||||
model: "gemini-3.1-pro-preview",
|
||||
});
|
||||
expect(buildFailoverRemediationHint(err)).toBe(
|
||||
"Re-authenticate with: openclaw models auth login --provider 'google-gemini-cli' --force",
|
||||
);
|
||||
});
|
||||
|
||||
it("quotes provider ids that contain shell metacharacters", () => {
|
||||
expect(buildProviderReauthCommand("custom;touch /tmp/pwned")).toBe(
|
||||
"openclaw models auth login --provider 'custom;touch /tmp/pwned' --force",
|
||||
);
|
||||
expect(buildProviderReauthCommand("custom'provider")).toBe(
|
||||
"openclaw models auth login --provider 'custom'\\''provider' --force",
|
||||
);
|
||||
});
|
||||
|
||||
it("refuses control characters in rendered provider commands", () => {
|
||||
expect(buildProviderReauthCommand("custom\nprovider")).toBeUndefined();
|
||||
});
|
||||
|
||||
it("wraps rendered provider commands in the standard CLI formatter", () => {
|
||||
expect(buildProviderReauthCommand("anthropic", { OPENCLAW_PROFILE: "work" })).toBe(
|
||||
"openclaw --profile work models auth login --provider 'anthropic' --force",
|
||||
);
|
||||
expect(buildProviderReauthCommand("anthropic", { OPENCLAW_CONTAINER_HINT: "dev" })).toBe(
|
||||
"openclaw --container dev models auth login --provider 'anthropic' --force",
|
||||
);
|
||||
});
|
||||
|
||||
it("returns undefined for non-auth reasons", () => {
|
||||
const err = new FailoverError("429", {
|
||||
reason: "rate_limit",
|
||||
provider: "openai",
|
||||
model: "gpt-5",
|
||||
});
|
||||
expect(buildFailoverRemediationHint(err)).toBeUndefined();
|
||||
});
|
||||
|
||||
it("returns undefined when provider is not attributed", () => {
|
||||
const err = new FailoverError("no token", {
|
||||
reason: "auth",
|
||||
model: "claude-opus-4-7",
|
||||
});
|
||||
expect(buildFailoverRemediationHint(err)).toBeUndefined();
|
||||
});
|
||||
|
||||
it("returns undefined for non-FailoverError inputs", () => {
|
||||
expect(buildFailoverRemediationHint(new Error("oops"))).toBeUndefined();
|
||||
expect(buildFailoverRemediationHint(undefined)).toBeUndefined();
|
||||
expect(buildFailoverRemediationHint("just a string")).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
import { parseStrictNonNegativeInteger } from "@openclaw/normalization-core/number-coercion";
|
||||
import { formatCliCommand } from "../cli/command-format.js";
|
||||
import { readErrorName } from "../infra/errors.js";
|
||||
import {
|
||||
classifyFailoverSignal,
|
||||
@@ -535,6 +536,59 @@ export function resolveFailoverReasonFromError(
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Build an actionable remediation hint for a failover error when the failure
|
||||
* reason is `auth` / `auth_permanent` and we have enough provider attribution
|
||||
* to suggest a re-authentication command. Returns `undefined` for any other
|
||||
* failure shape so callers can opportunistically append the hint without
|
||||
* branching on every reason themselves.
|
||||
*
|
||||
* Keep the string short and copy-pasteable — operators see it in fallback
|
||||
* summary errors and TUI status lines.
|
||||
*/
|
||||
export function buildFailoverRemediationHint(err: unknown): string | undefined {
|
||||
if (!isFailoverError(err)) {
|
||||
return undefined;
|
||||
}
|
||||
if (err.reason !== "auth" && err.reason !== "auth_permanent") {
|
||||
return undefined;
|
||||
}
|
||||
const provider = err.provider?.trim();
|
||||
if (!provider) {
|
||||
return undefined;
|
||||
}
|
||||
const command = buildProviderReauthCommand(provider);
|
||||
return command ? `Re-authenticate with: ${command}` : undefined;
|
||||
}
|
||||
|
||||
function quotePosixShellArg(value: string): string {
|
||||
return `'${value.replaceAll("'", "'\\''")}'`;
|
||||
}
|
||||
|
||||
export function buildProviderReauthCommand(
|
||||
provider: string,
|
||||
env: Record<string, string | undefined> = process.env as Record<string, string | undefined>,
|
||||
): string | undefined {
|
||||
const trimmed = provider.trim();
|
||||
if (!trimmed || hasControlCharacter(trimmed)) {
|
||||
return undefined;
|
||||
}
|
||||
return formatCliCommand(
|
||||
`openclaw models auth login --provider ${quotePosixShellArg(trimmed)} --force`,
|
||||
env,
|
||||
);
|
||||
}
|
||||
|
||||
function hasControlCharacter(value: string): boolean {
|
||||
for (let i = 0; i < value.length; i += 1) {
|
||||
const code = value.charCodeAt(i);
|
||||
if (code < 0x20 || code === 0x7f) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
export function describeFailoverError(err: unknown): {
|
||||
message: string;
|
||||
rawError?: string;
|
||||
|
||||
334
src/agents/fallback-skip-cache.test.ts
Normal file
334
src/agents/fallback-skip-cache.test.ts
Normal file
@@ -0,0 +1,334 @@
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import {
|
||||
DEFAULT_FALLBACK_SKIP_TTL_MS,
|
||||
resetFallbackSkipCacheForTest,
|
||||
clearFallbackSkipCacheForSession,
|
||||
getFallbackCandidateSkipReason,
|
||||
isFallbackCandidateSkipped,
|
||||
markFallbackCandidateSkipped,
|
||||
} from "./fallback-skip-cache.js";
|
||||
|
||||
describe("fallback-skip-cache", () => {
|
||||
beforeEach(() => {
|
||||
resetFallbackSkipCacheForTest();
|
||||
});
|
||||
|
||||
afterEach(() => {
|
||||
resetFallbackSkipCacheForTest();
|
||||
});
|
||||
|
||||
it("returns false for an unknown (session, provider, model) triple", () => {
|
||||
expect(
|
||||
isFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
now: 1_000,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("treats falsy sessionId as a no-op for both mark and check", () => {
|
||||
markFallbackCandidateSkipped({
|
||||
sessionId: undefined,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
reason: "auth",
|
||||
now: 1_000,
|
||||
});
|
||||
expect(
|
||||
isFallbackCandidateSkipped({
|
||||
sessionId: undefined,
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
now: 1_000,
|
||||
}),
|
||||
).toBe(false);
|
||||
expect(
|
||||
isFallbackCandidateSkipped({
|
||||
sessionId: "",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
now: 1_000,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("marks then sees a candidate as skipped within the TTL", () => {
|
||||
markFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
reason: "auth",
|
||||
now: 1_000,
|
||||
ttlMs: 60_000,
|
||||
});
|
||||
|
||||
expect(
|
||||
isFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
now: 30_000,
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(
|
||||
getFallbackCandidateSkipReason({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
now: 30_000,
|
||||
}),
|
||||
).toBe("auth");
|
||||
});
|
||||
|
||||
it("expires entries after the TTL elapses", () => {
|
||||
markFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
reason: "auth_permanent",
|
||||
now: 1_000,
|
||||
ttlMs: 10_000,
|
||||
});
|
||||
|
||||
// Just before expiry, still skipped.
|
||||
expect(
|
||||
isFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
now: 10_000,
|
||||
}),
|
||||
).toBe(true);
|
||||
// At and after expiry, no longer skipped.
|
||||
expect(
|
||||
isFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
now: 11_001,
|
||||
}),
|
||||
).toBe(false);
|
||||
expect(
|
||||
getFallbackCandidateSkipReason({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
now: 11_001,
|
||||
}),
|
||||
).toBeUndefined();
|
||||
});
|
||||
|
||||
it("isolates entries across sessions", () => {
|
||||
markFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
reason: "auth",
|
||||
now: 1_000,
|
||||
});
|
||||
expect(
|
||||
isFallbackCandidateSkipped({
|
||||
sessionId: "s2",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
now: 30_000,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("isolates entries across (provider, model) pairs", () => {
|
||||
markFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
reason: "auth",
|
||||
now: 1_000,
|
||||
});
|
||||
expect(
|
||||
isFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-sonnet-4-6",
|
||||
now: 30_000,
|
||||
}),
|
||||
).toBe(false);
|
||||
expect(
|
||||
isFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "google",
|
||||
model: "claude-opus-4-7",
|
||||
now: 30_000,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("clearFallbackSkipCacheForSession drops every marker for that session", () => {
|
||||
markFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
reason: "auth",
|
||||
now: 1_000,
|
||||
});
|
||||
markFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "google",
|
||||
model: "gemini-3.1-pro-preview",
|
||||
reason: "auth",
|
||||
now: 1_000,
|
||||
});
|
||||
clearFallbackSkipCacheForSession("s1");
|
||||
expect(
|
||||
isFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
now: 30_000,
|
||||
}),
|
||||
).toBe(false);
|
||||
expect(
|
||||
isFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "google",
|
||||
model: "gemini-3.1-pro-preview",
|
||||
now: 30_000,
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("re-marking the same triple refreshes the TTL", () => {
|
||||
markFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
reason: "auth",
|
||||
now: 1_000,
|
||||
ttlMs: 10_000,
|
||||
});
|
||||
// Re-mark just before the original entry would expire.
|
||||
markFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
reason: "auth_permanent",
|
||||
now: 10_000,
|
||||
ttlMs: 10_000,
|
||||
});
|
||||
// Without refresh, this point would be past expiry. With refresh it lives.
|
||||
expect(
|
||||
isFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
now: 19_000,
|
||||
}),
|
||||
).toBe(true);
|
||||
// The most recent reason wins.
|
||||
expect(
|
||||
getFallbackCandidateSkipReason({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
now: 19_000,
|
||||
}),
|
||||
).toBe("auth_permanent");
|
||||
});
|
||||
|
||||
it("prunes expired buckets from sessions that are never queried again", async () => {
|
||||
const { peekFallbackSkipBucketsForTest } = await import("./fallback-skip-cache.js");
|
||||
|
||||
// Two short-lived sessions write markers, then never come back.
|
||||
markFallbackCandidateSkipped({
|
||||
sessionId: "one-off-1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
reason: "auth",
|
||||
now: 1_000,
|
||||
ttlMs: 10_000,
|
||||
});
|
||||
markFallbackCandidateSkipped({
|
||||
sessionId: "one-off-2",
|
||||
provider: "google",
|
||||
model: "gemini-3.1-pro-preview",
|
||||
reason: "auth",
|
||||
now: 1_000,
|
||||
ttlMs: 10_000,
|
||||
});
|
||||
|
||||
expect(peekFallbackSkipBucketsForTest().size).toBe(2);
|
||||
|
||||
// A third session writes well after the first two have expired. The
|
||||
// opportunistic global prune must drop the stale buckets even though
|
||||
// those original sessions are never re-queried.
|
||||
markFallbackCandidateSkipped({
|
||||
sessionId: "later",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
reason: "auth",
|
||||
now: 100_000,
|
||||
ttlMs: 10_000,
|
||||
});
|
||||
|
||||
const buckets = peekFallbackSkipBucketsForTest();
|
||||
expect(buckets.has("one-off-1")).toBe(false);
|
||||
expect(buckets.has("one-off-2")).toBe(false);
|
||||
expect(buckets.has("later")).toBe(true);
|
||||
});
|
||||
|
||||
it("does not skip by default when ttlMs is omitted", () => {
|
||||
markFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
reason: "auth",
|
||||
now: 1_000,
|
||||
});
|
||||
expect(
|
||||
isFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
now: 1_000,
|
||||
}),
|
||||
).toBe(false);
|
||||
expect(DEFAULT_FALLBACK_SKIP_TTL_MS).toBe(0);
|
||||
});
|
||||
|
||||
it("uses OPENCLAW_FALLBACK_SKIP_TTL_MS as an opt-in default TTL", () => {
|
||||
const previous = process.env.OPENCLAW_FALLBACK_SKIP_TTL_MS;
|
||||
process.env.OPENCLAW_FALLBACK_SKIP_TTL_MS = "60000";
|
||||
try {
|
||||
markFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
reason: "auth",
|
||||
now: 1_000,
|
||||
});
|
||||
expect(
|
||||
isFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
now: 60_000,
|
||||
}),
|
||||
).toBe(true);
|
||||
expect(
|
||||
isFallbackCandidateSkipped({
|
||||
sessionId: "s1",
|
||||
provider: "anthropic",
|
||||
model: "claude-opus-4-7",
|
||||
now: 61_001,
|
||||
}),
|
||||
).toBe(false);
|
||||
} finally {
|
||||
if (previous === undefined) {
|
||||
delete process.env.OPENCLAW_FALLBACK_SKIP_TTL_MS;
|
||||
} else {
|
||||
process.env.OPENCLAW_FALLBACK_SKIP_TTL_MS = previous;
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
246
src/agents/fallback-skip-cache.ts
Normal file
246
src/agents/fallback-skip-cache.ts
Normal file
@@ -0,0 +1,246 @@
|
||||
/**
|
||||
* Session-scoped "known-bad candidate" cache for the model fallback chain.
|
||||
*
|
||||
* When explicitly enabled and a fallback candidate fails with a non-transient
|
||||
* credential error (`auth` / `auth_permanent`), the chain can avoid retrying
|
||||
* the same candidate on every subsequent turn until the user fixes their auth.
|
||||
*
|
||||
* This module records skip markers per `(sessionId, provider, model)` with a
|
||||
* short TTL. The cache is intentionally in-memory only: a process restart
|
||||
* clears it so a freshly-restarted gateway always tries every candidate at
|
||||
* least once before deciding to skip again.
|
||||
*
|
||||
* The cache is global, not per-config, so any caller running fallbacks for the
|
||||
* same `sessionId` shares the same skip set. Tests can reset state via
|
||||
* `resetFallbackSkipCacheForTest()`.
|
||||
*/
|
||||
|
||||
import { modelKey } from "./model-selection-normalize.js";
|
||||
|
||||
/**
|
||||
* Default time-to-live for a skip marker. Disabled by default so existing
|
||||
* fallback retry behavior stays unchanged unless an operator opts in with
|
||||
* OPENCLAW_FALLBACK_SKIP_TTL_MS.
|
||||
*/
|
||||
export const DEFAULT_FALLBACK_SKIP_TTL_MS = 0;
|
||||
const FALLBACK_SKIP_TTL_ENV = "OPENCLAW_FALLBACK_SKIP_TTL_MS";
|
||||
const FALLBACK_SKIP_TTL_MIN_MS = 1_000;
|
||||
const FALLBACK_SKIP_TTL_MAX_MS = 10 * 60_000;
|
||||
|
||||
function resolveConfiguredSkipTtlMs(env: NodeJS.ProcessEnv = process.env): number {
|
||||
const raw = env[FALLBACK_SKIP_TTL_ENV];
|
||||
if (!raw) {
|
||||
return DEFAULT_FALLBACK_SKIP_TTL_MS;
|
||||
}
|
||||
const trimmed = raw.trim();
|
||||
if (!trimmed) {
|
||||
return DEFAULT_FALLBACK_SKIP_TTL_MS;
|
||||
}
|
||||
const parsed = Number.parseInt(trimmed, 10);
|
||||
if (!Number.isFinite(parsed) || parsed < 0) {
|
||||
return DEFAULT_FALLBACK_SKIP_TTL_MS;
|
||||
}
|
||||
if (parsed === 0) {
|
||||
return 0;
|
||||
}
|
||||
return Math.min(FALLBACK_SKIP_TTL_MAX_MS, Math.max(FALLBACK_SKIP_TTL_MIN_MS, parsed));
|
||||
}
|
||||
|
||||
type SkipEntry = {
|
||||
expiresAtMs: number;
|
||||
reason: string;
|
||||
};
|
||||
|
||||
type SkipBySession = Map<string, Map<string, SkipEntry>>;
|
||||
|
||||
type SkipCacheState = {
|
||||
buckets: SkipBySession;
|
||||
lastGlobalPruneAtMs: number;
|
||||
};
|
||||
|
||||
/**
|
||||
* Minimum interval between two opportunistic global prunes. Keeps the
|
||||
* worst-case cost of a hot write/check path amortized: even if a gateway
|
||||
* tracks thousands of sessions, the cache is only walked every
|
||||
* `GLOBAL_PRUNE_INTERVAL_MS`, not on every call.
|
||||
*/
|
||||
const GLOBAL_PRUNE_INTERVAL_MS = 5_000;
|
||||
|
||||
function getState(): SkipCacheState {
|
||||
const globalStore = globalThis as typeof globalThis & {
|
||||
openclawFallbackSkipCache?: SkipBySession;
|
||||
openclawFallbackSkipCacheState?: SkipCacheState;
|
||||
};
|
||||
if (!globalStore.openclawFallbackSkipCacheState) {
|
||||
// Reuse the existing buckets map if a prior version of this module already
|
||||
// populated the legacy global; otherwise start fresh.
|
||||
const buckets = globalStore.openclawFallbackSkipCache ?? new Map();
|
||||
globalStore.openclawFallbackSkipCacheState = {
|
||||
buckets,
|
||||
lastGlobalPruneAtMs: 0,
|
||||
};
|
||||
globalStore.openclawFallbackSkipCache = buckets;
|
||||
}
|
||||
return globalStore.openclawFallbackSkipCacheState;
|
||||
}
|
||||
|
||||
function getBuckets(): SkipBySession {
|
||||
return getState().buckets;
|
||||
}
|
||||
|
||||
function sessionBucket(sessionId: string, create: boolean): Map<string, SkipEntry> | undefined {
|
||||
const buckets = getBuckets();
|
||||
let bucket = buckets.get(sessionId);
|
||||
if (!bucket && create) {
|
||||
bucket = new Map();
|
||||
buckets.set(sessionId, bucket);
|
||||
}
|
||||
return bucket;
|
||||
}
|
||||
|
||||
function candidateKey(provider: string, model: string): string {
|
||||
return modelKey(provider, model);
|
||||
}
|
||||
|
||||
function pruneExpired(bucket: Map<string, SkipEntry>, now: number): void {
|
||||
for (const [key, entry] of bucket.entries()) {
|
||||
if (entry.expiresAtMs <= now) {
|
||||
bucket.delete(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Walk every session bucket, drop expired markers, and remove buckets that
|
||||
* end up empty. Called opportunistically from the hot write/check paths so
|
||||
* stale buckets left behind by one-off sessions cannot accumulate across the
|
||||
* gateway's lifetime — the per-bucket prune only fires when the same session
|
||||
* is queried again, which is not guaranteed for short-lived sessions.
|
||||
*/
|
||||
function pruneAllExpired(now: number): void {
|
||||
const state = getState();
|
||||
if (now - state.lastGlobalPruneAtMs < GLOBAL_PRUNE_INTERVAL_MS) {
|
||||
return;
|
||||
}
|
||||
state.lastGlobalPruneAtMs = now;
|
||||
for (const [sessionId, bucket] of state.buckets.entries()) {
|
||||
pruneExpired(bucket, now);
|
||||
if (bucket.size === 0) {
|
||||
state.buckets.delete(sessionId);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Record that `(sessionId, provider, model)` should be skipped for the
|
||||
* configured TTL. Safe to call with falsy `sessionId` — the call becomes a
|
||||
* no-op so callers do not need to guard themselves.
|
||||
*/
|
||||
export function markFallbackCandidateSkipped(params: {
|
||||
sessionId: string | undefined;
|
||||
provider: string;
|
||||
model: string;
|
||||
reason: string;
|
||||
now?: number;
|
||||
ttlMs?: number;
|
||||
}): void {
|
||||
if (!params.sessionId || !params.provider || !params.model) {
|
||||
return;
|
||||
}
|
||||
const now = params.now ?? Date.now();
|
||||
const ttlMs = params.ttlMs ?? resolveConfiguredSkipTtlMs();
|
||||
if (ttlMs <= 0) {
|
||||
return;
|
||||
}
|
||||
pruneAllExpired(now);
|
||||
const bucket = sessionBucket(params.sessionId, true);
|
||||
if (!bucket) {
|
||||
return;
|
||||
}
|
||||
bucket.set(candidateKey(params.provider, params.model), {
|
||||
expiresAtMs: now + ttlMs,
|
||||
reason: params.reason,
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true when `(sessionId, provider, model)` has an unexpired skip
|
||||
* marker. Expired entries are pruned as a side-effect so the cache does not
|
||||
* grow unbounded.
|
||||
*/
|
||||
export function isFallbackCandidateSkipped(params: {
|
||||
sessionId: string | undefined;
|
||||
provider: string;
|
||||
model: string;
|
||||
now?: number;
|
||||
}): boolean {
|
||||
if (!params.sessionId || !params.provider || !params.model) {
|
||||
return false;
|
||||
}
|
||||
const now = params.now ?? Date.now();
|
||||
pruneAllExpired(now);
|
||||
const bucket = sessionBucket(params.sessionId, false);
|
||||
if (!bucket) {
|
||||
return false;
|
||||
}
|
||||
pruneExpired(bucket, now);
|
||||
if (bucket.size === 0) {
|
||||
getBuckets().delete(params.sessionId);
|
||||
return false;
|
||||
}
|
||||
const entry = bucket.get(candidateKey(params.provider, params.model));
|
||||
return Boolean(entry && entry.expiresAtMs > now);
|
||||
}
|
||||
|
||||
/**
|
||||
* Look up the recorded skip reason for a `(sessionId, provider, model)`
|
||||
* triple. Returns `undefined` when no unexpired marker exists. Used by the
|
||||
* fallback chain to surface the original failure reason in observation logs.
|
||||
*/
|
||||
export function getFallbackCandidateSkipReason(params: {
|
||||
sessionId: string | undefined;
|
||||
provider: string;
|
||||
model: string;
|
||||
now?: number;
|
||||
}): string | undefined {
|
||||
if (!params.sessionId || !params.provider || !params.model) {
|
||||
return undefined;
|
||||
}
|
||||
const bucket = sessionBucket(params.sessionId, false);
|
||||
if (!bucket) {
|
||||
return undefined;
|
||||
}
|
||||
const now = params.now ?? Date.now();
|
||||
const entry = bucket.get(candidateKey(params.provider, params.model));
|
||||
if (!entry || entry.expiresAtMs <= now) {
|
||||
return undefined;
|
||||
}
|
||||
return entry.reason;
|
||||
}
|
||||
|
||||
/** Drop every skip marker associated with the given session. */
|
||||
export function clearFallbackSkipCacheForSession(sessionId: string | undefined): void {
|
||||
if (!sessionId) {
|
||||
return;
|
||||
}
|
||||
getBuckets().delete(sessionId);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test-only escape hatch. Production code must not call this; the global
|
||||
* cache is meant to outlive individual fallback runs.
|
||||
*/
|
||||
export function resetFallbackSkipCacheForTest(): void {
|
||||
const state = getState();
|
||||
state.buckets.clear();
|
||||
state.lastGlobalPruneAtMs = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Test-only inspection hook for the global session-bucket map. Production
|
||||
* code must not read this; the buckets are an implementation detail of the
|
||||
* cache and may change shape.
|
||||
*/
|
||||
export function peekFallbackSkipBucketsForTest(): SkipBySession {
|
||||
return getBuckets();
|
||||
}
|
||||
@@ -19,6 +19,7 @@ import type { AuthProfileStore } from "./auth-profiles/types.js";
|
||||
import { classifyEmbeddedAgentRunResultForModelFallback } from "./embedded-agent-runner/result-fallback-classifier.js";
|
||||
import type { EmbeddedAgentRunResult } from "./embedded-agent-runner/types.js";
|
||||
import { FailoverError } from "./failover-error.js";
|
||||
import { resetFallbackSkipCacheForTest } from "./fallback-skip-cache.js";
|
||||
import { MissingAgentHarnessError } from "./harness/errors.js";
|
||||
import { LiveSessionModelSwitchError } from "./live-model-switch-error.js";
|
||||
import {
|
||||
@@ -180,6 +181,7 @@ afterAll(() => {
|
||||
});
|
||||
|
||||
function resetModelFallbackTestState(): void {
|
||||
resetFallbackSkipCacheForTest();
|
||||
authRuntimeMock.clear();
|
||||
authRuntimeMock.runtime.ensureAuthProfileStore.mockClear();
|
||||
authRuntimeMock.runtime.loadAuthProfileStoreForRuntime.mockClear();
|
||||
@@ -514,6 +516,75 @@ const INSUFFICIENT_QUOTA_PAYLOAD =
|
||||
'{"type":"error","error":{"type":"insufficient_quota","message":"Your account has insufficient quota balance to run this request."}}';
|
||||
|
||||
describe("runWithModelFallback", () => {
|
||||
it("uses the opt-in auth skip cache on the second turn for the same session", async () => {
|
||||
const previous = process.env.OPENCLAW_FALLBACK_SKIP_TTL_MS;
|
||||
process.env.OPENCLAW_FALLBACK_SKIP_TTL_MS = "60000";
|
||||
try {
|
||||
const cfg = makeCfg({
|
||||
agents: {
|
||||
defaults: {
|
||||
model: {
|
||||
primary: "openai/gpt-5.4",
|
||||
fallbacks: ["anthropic/claude-opus-4-7", "google/gemini-3.1-pro-preview"],
|
||||
},
|
||||
},
|
||||
},
|
||||
});
|
||||
const run = vi.fn(async (provider: string, model: string) => {
|
||||
if (provider === "openai") {
|
||||
throw new FailoverError("primary rate limited", {
|
||||
provider,
|
||||
model,
|
||||
reason: "rate_limit",
|
||||
});
|
||||
}
|
||||
if (provider === "anthropic") {
|
||||
throw new FailoverError("fallback auth failed", {
|
||||
provider,
|
||||
model,
|
||||
reason: "auth",
|
||||
});
|
||||
}
|
||||
return "ok";
|
||||
});
|
||||
|
||||
const first = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "openai",
|
||||
model: "gpt-5.4",
|
||||
sessionId: "session:auth-skip",
|
||||
run,
|
||||
});
|
||||
const second = await runWithModelFallback({
|
||||
cfg,
|
||||
provider: "openai",
|
||||
model: "gpt-5.4",
|
||||
sessionId: "session:auth-skip",
|
||||
run,
|
||||
});
|
||||
|
||||
expect(first.result).toBe("ok");
|
||||
expect(second.result).toBe("ok");
|
||||
expect(run.mock.calls.map(([provider, model]) => `${provider}/${model}`)).toEqual([
|
||||
"openai/gpt-5.4",
|
||||
"anthropic/claude-opus-4-7",
|
||||
"google/gemini-3.1-pro-preview",
|
||||
"openai/gpt-5.4",
|
||||
"google/gemini-3.1-pro-preview",
|
||||
]);
|
||||
expect(second.attempts.some((attempt) => attempt.provider === "anthropic")).toBe(true);
|
||||
expect(second.attempts.find((attempt) => attempt.provider === "anthropic")?.error).toContain(
|
||||
"recent auth failure",
|
||||
);
|
||||
} finally {
|
||||
if (previous === undefined) {
|
||||
delete process.env.OPENCLAW_FALLBACK_SKIP_TTL_MS;
|
||||
} else {
|
||||
process.env.OPENCLAW_FALLBACK_SKIP_TTL_MS = previous;
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("skips auth store bootstrap when no auth profile sources exist", async () => {
|
||||
authSourceCheckMock.hasAnyAuthProfileStoreSource.mockReturnValue(false);
|
||||
const run = vi.fn().mockResolvedValueOnce("ok");
|
||||
|
||||
@@ -28,6 +28,8 @@ import { isLikelyContextOverflowError } from "./embedded-agent-helpers/errors.js
|
||||
import type { FailoverReason } from "./embedded-agent-helpers/types.js";
|
||||
import {
|
||||
FailoverError,
|
||||
buildFailoverRemediationHint,
|
||||
buildProviderReauthCommand,
|
||||
coerceToFailoverError,
|
||||
describeFailoverError,
|
||||
isFailoverError,
|
||||
@@ -39,6 +41,11 @@ import {
|
||||
shouldPreserveTransientCooldownProbeSlot,
|
||||
shouldUseTransientCooldownProbeSlot,
|
||||
} from "./failover-policy.js";
|
||||
import {
|
||||
getFallbackCandidateSkipReason,
|
||||
isFallbackCandidateSkipped,
|
||||
markFallbackCandidateSkipped,
|
||||
} from "./fallback-skip-cache.js";
|
||||
import { MissingAgentHarnessError, isMissingAgentHarnessError } from "./harness/errors.js";
|
||||
import { resolveAgentHarnessPolicy } from "./harness/policy.js";
|
||||
import { getRegisteredAgentHarness } from "./harness/registry.js";
|
||||
@@ -562,8 +569,12 @@ function throwFallbackFailureSummary(params: {
|
||||
|
||||
const summary =
|
||||
params.attempts.length > 0 ? params.attempts.map(params.formatAttempt).join(" | ") : "unknown";
|
||||
const remediation = buildFailoverRemediationHint(params.lastError);
|
||||
const message = remediation
|
||||
? `All ${params.label} failed (${params.attempts.length || params.candidates.length}): ${summary}. ${remediation}`
|
||||
: `All ${params.label} failed (${params.attempts.length || params.candidates.length}): ${summary}`;
|
||||
throw new FallbackSummaryError(
|
||||
`All ${params.label} failed (${params.attempts.length || params.candidates.length}): ${summary}`,
|
||||
message,
|
||||
params.attempts,
|
||||
params.soonestCooldownExpiry ?? null,
|
||||
params.lastError instanceof Error ? params.lastError : undefined,
|
||||
@@ -1203,6 +1214,58 @@ export async function runWithModelFallback<T>(
|
||||
const requestedModel = requestedCandidate
|
||||
? sameModelCandidate(candidate, requestedCandidate)
|
||||
: false;
|
||||
|
||||
// Skip-known-bad cache: when a previous turn in this session failed this
|
||||
// candidate with `auth` / `auth_permanent` (e.g. missing or expired
|
||||
// credentials), suppress repeat attempts for the cache TTL so we do not
|
||||
// burn latency on the same broken candidate every turn. Primary is never
|
||||
// skipped — if the user explicitly requested it we should still surface
|
||||
// the auth error rather than silently jumping past it.
|
||||
if (!isPrimary && params.sessionId) {
|
||||
const skipped = isFallbackCandidateSkipped({
|
||||
sessionId: params.sessionId,
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
});
|
||||
if (skipped) {
|
||||
const skipReason =
|
||||
getFallbackCandidateSkipReason({
|
||||
sessionId: params.sessionId,
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
}) ?? "auth";
|
||||
const reauthCommand = buildProviderReauthCommand(candidate.provider);
|
||||
const reauthHint = reauthCommand
|
||||
? `run \`${reauthCommand}\` to re-authenticate`
|
||||
: "re-authenticate that provider";
|
||||
const error = `Skipping ${candidate.provider}/${candidate.model}: recent ${skipReason} failure in this session (${reauthHint})`;
|
||||
attempts.push({
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
error,
|
||||
reason: skipReason as FailoverReason,
|
||||
});
|
||||
await observeDecision({
|
||||
decision: "skip_candidate",
|
||||
runId: params.runId,
|
||||
sessionId: params.sessionId,
|
||||
lane: params.lane,
|
||||
requestedProvider: params.provider,
|
||||
requestedModel: params.model,
|
||||
candidate,
|
||||
attempt: i + 1,
|
||||
total: candidates.length,
|
||||
reason: skipReason as FailoverReason,
|
||||
error,
|
||||
nextCandidate: candidates[i + 1],
|
||||
isPrimary,
|
||||
requestedModelMatched: requestedModel,
|
||||
fallbackConfigured: hasFallbackCandidates,
|
||||
});
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
let runOptions: ModelFallbackRunOptions | undefined;
|
||||
let attemptedDuringCooldown = false;
|
||||
let transientProbeProviderForAttempt: string | null = null;
|
||||
@@ -1498,6 +1561,23 @@ export async function runWithModelFallback<T>(
|
||||
throw err;
|
||||
}
|
||||
|
||||
// Record auth-class failures in the session-scoped skip cache so the
|
||||
// next turn does not re-attempt the same broken candidate. Only mark
|
||||
// for non-primary candidates — see the skip-check above for rationale.
|
||||
if (
|
||||
isKnownFailover &&
|
||||
!isPrimary &&
|
||||
params.sessionId &&
|
||||
(normalized.reason === "auth" || normalized.reason === "auth_permanent")
|
||||
) {
|
||||
markFallbackCandidateSkipped({
|
||||
sessionId: params.sessionId,
|
||||
provider: candidate.provider,
|
||||
model: candidate.model,
|
||||
reason: normalized.reason,
|
||||
});
|
||||
}
|
||||
|
||||
lastError = isKnownFailover ? normalized : err;
|
||||
await observeFailedCandidate({
|
||||
attempts,
|
||||
|
||||
@@ -211,6 +211,7 @@ async function getApplyFallbackCandidateSelectionToEntry() {
|
||||
type FallbackRunnerParams = {
|
||||
provider: string;
|
||||
model: string;
|
||||
sessionId?: string;
|
||||
abortSignal?: AbortSignal;
|
||||
run: (provider: string, model: string) => Promise<unknown>;
|
||||
classifyResult?: (params: {
|
||||
@@ -1152,6 +1153,7 @@ describe("runAgentTurnWithFallback", () => {
|
||||
"runEmbeddedAgent params",
|
||||
);
|
||||
expect(fallbackCall.abortSignal).toBe(replyOperation.abortSignal);
|
||||
expect(fallbackCall.sessionId).toBe("session");
|
||||
expect(embeddedCall.abortSignal).toBe(replyOperation.abortSignal);
|
||||
});
|
||||
|
||||
|
||||
@@ -72,6 +72,7 @@ type ModelFallbackParams = {
|
||||
model?: string;
|
||||
abortSignal?: AbortSignal;
|
||||
agentId?: string;
|
||||
sessionId?: string;
|
||||
sessionKey?: string;
|
||||
fallbacksOverride?: unknown[];
|
||||
resolveAgentHarnessRuntimeOverride?: (provider: string, model: string) => string | undefined;
|
||||
@@ -775,6 +776,7 @@ describe("runMemoryFlushIfNeeded", () => {
|
||||
expect(fallbackCall.provider).toBe("ollama");
|
||||
expect(fallbackCall.model).toBe("qwen3:8b");
|
||||
expect(fallbackCall.abortSignal).toBe(replyOperation.abortSignal);
|
||||
expect(fallbackCall.sessionId).toBe("session");
|
||||
expect(fallbackCall.fallbacksOverride).toEqual([]);
|
||||
expect(runEmbeddedAgentMock).toHaveBeenCalledTimes(1);
|
||||
const agentCall = requireEmbeddedAgentCall();
|
||||
|
||||
@@ -1501,6 +1501,7 @@ describe("createFollowupRunner runtime config", () => {
|
||||
const call = requireLastMockCallArg(runEmbeddedAgentMock, "run embedded agent");
|
||||
expect(fallbackCall.abortSignal).toBeInstanceOf(AbortSignal);
|
||||
expect(fallbackCall.abortSignal).not.toBe(abortController.signal);
|
||||
expect(fallbackCall.sessionId).toBe("session");
|
||||
expect(call.abortSignal).toBe(fallbackCall.abortSignal);
|
||||
});
|
||||
|
||||
|
||||
@@ -663,6 +663,7 @@ export function createFollowupRunner(params: {
|
||||
...resolveModelFallbackOptions(run, runtimeConfig),
|
||||
cfg: runtimeConfig,
|
||||
runId,
|
||||
sessionId: run.sessionId,
|
||||
abortSignal: runAbortSignal,
|
||||
resolveAgentHarnessRuntimeOverride: (provider) =>
|
||||
resolveSessionRuntimeOverrideForProvider({
|
||||
|
||||
@@ -348,6 +348,11 @@ export function registerModelsCli(program: Command) {
|
||||
.option("--device-code", "Use the provider device-code auth method", false)
|
||||
.option("--profile-id <id>", "Auth profile id override for single-profile login methods")
|
||||
.option("--set-default", "Apply the provider's default model recommendation", false)
|
||||
.option(
|
||||
"--force",
|
||||
"Remove existing profiles for the provider before logging in (use when a cached OAuth profile is stuck or you want to switch accounts)",
|
||||
false,
|
||||
)
|
||||
.action(async (opts, command) => {
|
||||
if (opts.deviceCode && typeof opts.method === "string" && opts.method !== "device-code") {
|
||||
throw new Error(
|
||||
@@ -363,6 +368,7 @@ export function registerModelsCli(program: Command) {
|
||||
method: opts.deviceCode ? "device-code" : (opts.method as string | undefined),
|
||||
profileId: opts.profileId as string | undefined,
|
||||
setDefault: Boolean(opts.setDefault),
|
||||
force: Boolean(opts.force),
|
||||
agent,
|
||||
},
|
||||
defaultRuntime,
|
||||
|
||||
@@ -48,6 +48,7 @@ const mocks = vi.hoisted(() => ({
|
||||
resolveDefaultAgentWorkspaceDir: vi.fn(),
|
||||
upsertAuthProfile: vi.fn(),
|
||||
upsertAuthProfileWithLock: vi.fn(),
|
||||
removeProviderAuthProfilesWithLock: vi.fn(),
|
||||
resolvePluginProviders: vi.fn(),
|
||||
createClackPrompter: vi.fn(),
|
||||
loadValidConfigOrThrow: vi.fn(),
|
||||
@@ -67,6 +68,7 @@ const mocks = vi.hoisted(() => ({
|
||||
vi.mock("../../agents/auth-profiles/profiles.js", () => ({
|
||||
listProfilesForProvider: mocks.listProfilesForProvider,
|
||||
promoteAuthProfileInOrder: mocks.promoteAuthProfileInOrder,
|
||||
removeProviderAuthProfilesWithLock: mocks.removeProviderAuthProfilesWithLock,
|
||||
upsertAuthProfile: mocks.upsertAuthProfile,
|
||||
upsertAuthProfileWithLock: mocks.upsertAuthProfileWithLock,
|
||||
}));
|
||||
@@ -362,6 +364,8 @@ describe("modelsAuthLoginCommand", () => {
|
||||
mocks.upsertAuthProfileWithLock.mockReset();
|
||||
mocks.upsertAuthProfileWithLock.mockResolvedValue({ version: 1, profiles: {} });
|
||||
mocks.promoteAuthProfileInOrder.mockReset();
|
||||
mocks.removeProviderAuthProfilesWithLock.mockReset();
|
||||
mocks.removeProviderAuthProfilesWithLock.mockResolvedValue({ version: 1, profiles: {} });
|
||||
|
||||
mocks.resolveDefaultAgentId.mockReturnValue("main");
|
||||
mocks.resolveAgentDir.mockReturnValue("/tmp/openclaw/agents/main");
|
||||
@@ -1177,6 +1181,95 @@ describe("modelsAuthLoginCommand", () => {
|
||||
expect(runProviderAuth).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("--force purges cached profiles for the provider before login", async () => {
|
||||
const runtime = createRuntime();
|
||||
|
||||
await modelsAuthLoginCommand({ provider: "openai", force: true }, runtime);
|
||||
|
||||
expect(mocks.removeProviderAuthProfilesWithLock).toHaveBeenCalledWith({
|
||||
provider: "openai",
|
||||
agentDir: "/tmp/openclaw/agents/main",
|
||||
});
|
||||
expect(runProviderAuth).toHaveBeenCalledOnce();
|
||||
expect(runtime.log).toHaveBeenCalledWith(
|
||||
expect.stringContaining('Removed cached auth profiles for provider "openai"'),
|
||||
);
|
||||
});
|
||||
|
||||
it("--force does not purge when omitted", async () => {
|
||||
const runtime = createRuntime();
|
||||
|
||||
await modelsAuthLoginCommand({ provider: "openai" }, runtime);
|
||||
|
||||
expect(mocks.removeProviderAuthProfilesWithLock).not.toHaveBeenCalled();
|
||||
expect(runProviderAuth).toHaveBeenCalledOnce();
|
||||
});
|
||||
|
||||
it("--force fails before login when purge throws", async () => {
|
||||
const runtime = createRuntime();
|
||||
mocks.removeProviderAuthProfilesWithLock.mockRejectedValueOnce(new Error("disk full"));
|
||||
|
||||
await expect(
|
||||
modelsAuthLoginCommand({ provider: "openai", force: true }, runtime),
|
||||
).rejects.toThrow('Could not clear cached profiles for "openai" before re-login: disk full');
|
||||
|
||||
expect(runtime.error).not.toHaveBeenCalled();
|
||||
expect(runProviderAuth).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("--force fails before login when purge cannot update the profile store", async () => {
|
||||
const runtime = createRuntime();
|
||||
mocks.removeProviderAuthProfilesWithLock.mockResolvedValueOnce(null);
|
||||
|
||||
await expect(
|
||||
modelsAuthLoginCommand({ provider: "openai", force: true }, runtime),
|
||||
).rejects.toThrow(
|
||||
'Could not clear cached profiles for "openai" before re-login: profile store update failed',
|
||||
);
|
||||
|
||||
expect(runtime.error).not.toHaveBeenCalled();
|
||||
expect(runProviderAuth).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("--force does NOT purge cached profiles when the requested auth method is unknown", async () => {
|
||||
const runtime = createRuntime();
|
||||
const runOauthAuth = vi.fn().mockResolvedValue({ profiles: [] });
|
||||
const runApiKeyAuth = vi.fn().mockResolvedValue({ profiles: [] });
|
||||
mocks.resolvePluginSetupProvider.mockReturnValue(
|
||||
createProvider({
|
||||
id: "openai",
|
||||
label: "OpenAI",
|
||||
run: runOauthAuth as ProviderPlugin["auth"][number]["run"],
|
||||
auth: [
|
||||
{
|
||||
id: "oauth",
|
||||
label: "ChatGPT Login",
|
||||
kind: "oauth",
|
||||
run: runOauthAuth,
|
||||
},
|
||||
{
|
||||
id: "api-key",
|
||||
label: "OpenAI API Key",
|
||||
kind: "api_key",
|
||||
run: runApiKeyAuth,
|
||||
},
|
||||
],
|
||||
}),
|
||||
);
|
||||
|
||||
// Using the wrong method id ("api_key" vs the registered "api-key") forces
|
||||
// pickProviderAuthMethod to return null, which throws "Unknown auth method".
|
||||
// The purge must NOT have run, otherwise the user's working credentials
|
||||
// would be deleted before any auth flow had a chance to start.
|
||||
await expect(
|
||||
modelsAuthLoginCommand({ provider: "openai", method: "api_key", force: true }, runtime),
|
||||
).rejects.toThrow("Unknown auth method");
|
||||
|
||||
expect(mocks.removeProviderAuthProfilesWithLock).not.toHaveBeenCalled();
|
||||
expect(runOauthAuth).not.toHaveBeenCalled();
|
||||
expect(runApiKeyAuth).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("reports loaded plugin providers when requested provider is unavailable", async () => {
|
||||
const runtime = createRuntime();
|
||||
|
||||
|
||||
@@ -20,7 +20,10 @@ import {
|
||||
resolveAgentWorkspaceDir,
|
||||
resolveDefaultAgentId,
|
||||
} from "../../agents/agent-scope.js";
|
||||
import { externalCliDiscoveryForProviderAuth } from "../../agents/auth-profiles.js";
|
||||
import {
|
||||
externalCliDiscoveryForProviderAuth,
|
||||
removeProviderAuthProfilesWithLock,
|
||||
} from "../../agents/auth-profiles.js";
|
||||
import {
|
||||
listProfilesForProvider,
|
||||
promoteAuthProfileInOrder,
|
||||
@@ -874,6 +877,13 @@ type LoginOptions = {
|
||||
setDefault?: boolean;
|
||||
yes?: boolean;
|
||||
agent?: string;
|
||||
/**
|
||||
* When true, remove any existing auth profiles for the resolved provider
|
||||
* before invoking the auth flow. This is the escape hatch for stuck
|
||||
* cached OAuth profiles where the standard `auth login` short-circuits
|
||||
* because credentials already exist on disk.
|
||||
*/
|
||||
force?: boolean;
|
||||
};
|
||||
|
||||
/**
|
||||
@@ -982,6 +992,7 @@ export async function modelsAuthLoginCommand(opts: LoginOptions, runtime: Runtim
|
||||
`Unknown provider. Run ${formatCliCommand("openclaw models status")} or ${formatCliCommand("openclaw plugins list")} to see available provider plugins.`,
|
||||
);
|
||||
}
|
||||
|
||||
const chosenMethod = await pickProviderAuthMethod({
|
||||
provider: selectedProvider,
|
||||
requestedMethod: opts.method,
|
||||
@@ -994,6 +1005,35 @@ export async function modelsAuthLoginCommand(opts: LoginOptions, runtime: Runtim
|
||||
);
|
||||
}
|
||||
|
||||
if (opts.force) {
|
||||
// Purge existing profiles for this provider only after we have a valid
|
||||
// auth method to invoke. Running the purge earlier (before method
|
||||
// resolution) would delete the user's working credentials and then
|
||||
// throw on an unresolvable `--method`, leaving them without a usable
|
||||
// profile and no auth flow started. This is the documented escape
|
||||
// hatch for stuck OAuth credentials (expired token, swapped account,
|
||||
// etc.) where `auth login` would otherwise short-circuit on the cached
|
||||
// profile.
|
||||
try {
|
||||
const clearedStore = await removeProviderAuthProfilesWithLock({
|
||||
provider: selectedProvider.id,
|
||||
agentDir,
|
||||
});
|
||||
if (!clearedStore) {
|
||||
throw new Error("profile store update failed");
|
||||
}
|
||||
runtime.log(
|
||||
`Removed cached auth profiles for provider "${selectedProvider.id}" (--force). Running fresh auth flow.`,
|
||||
);
|
||||
} catch (err) {
|
||||
const message = err instanceof Error ? err.message : String(err);
|
||||
throw new Error(
|
||||
`Could not clear cached profiles for "${selectedProvider.id}" before re-login: ${message}. Re-login was not started because --force must remove cached profiles first.`,
|
||||
{ cause: err },
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
await runProviderAuthMethod({
|
||||
config,
|
||||
agentDir,
|
||||
|
||||
Reference in New Issue
Block a user