fix: cancel stale provider auth prewarms (#85503)

2026-06-06 05:51:15 +08:00 · 2026-05-22 21:51:43 +01:00
parent 0a50cbdf34
commit 60e3749de3
4 changed files with 118 additions and 6 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -52,6 +52,7 @@ Docs: https://docs.openclaw.ai
 - Gateway/agents: return phase-aware `agent.wait` timeout attribution and only cool auth profiles on provider-started timeouts. Refs #65504. Thanks @100yenadmin.
 - Gateway: defer provider auth-state prewarm until after startup readiness so early gateway tool/session requests are not blocked by provider auth discovery. (#85272) Thanks @dutifulbob.
 - Gateway/models: coalesce provider auth-state rewarms after auth-profile failures and log event-loop delay for warm/rewarm work, so provider auth bursts no longer stack full auth sweeps behind channel replies.
+- Gateway/models: stop cancelled provider auth-state prewarms from continuing full provider sweeps, so reload and auth-failure bursts no longer keep startup busy.
 - Agents/Codex: show the first plan update as a transient chat status notice without counting it as final assistant content.
 - CLI/update: walk the macOS process ancestry and honor the inherited Gateway runtime PID before package updates stop the managed Gateway service, so nested in-band updater children can refuse instead of killing the LaunchAgent-supervised Gateway that owns them. Fixes #85120.
 - Gateway/LaunchAgent: wait for launchd reload bootout to finish and fall back to kickstart when bootstrap races, so reload handoff does not leave the service deregistered. Fixes #84630. (#84641) Thanks @NianJiuZst.
--- a/src/agents/model-auth.ts
+++ b/src/agents/model-auth.ts
@@ -13,6 +13,7 @@ import {
  shouldDeferProviderSyntheticProfileAuthWithPlugin,
 } from "../plugins/provider-runtime.js";
 import { resolveOwningPluginIdsForProvider } from "../plugins/providers.js";
+import type { ProviderAuthEvidence } from "../secrets/provider-env-vars.js";
 import { resolveDefaultSecretProviderAlias } from "../secrets/ref-contract.js";
 import {
  normalizeLowercaseStringOrEmpty,
@@ -349,6 +350,11 @@ export function hasRuntimeAvailableProviderAuth(params: {
  workspaceDir?: string;
  env?: NodeJS.ProcessEnv;
  allowPluginSyntheticAuth?: boolean;
+  envAuthLookup?: {
+    aliasMap?: Readonly<Record<string, string>>;
+    candidateMap?: Readonly<Record<string, readonly string[]>>;
+    authEvidenceMap?: Readonly<Record<string, readonly ProviderAuthEvidence[]>>;
+  };
 }): boolean {
  const provider = normalizeProviderId(params.provider);
  const authOverride = resolveProviderAuthOverride(params.cfg, provider);
@@ -362,6 +368,9 @@ export function hasRuntimeAvailableProviderAuth(params: {
    resolveEnvApiKey(provider, params.env, {
      config: params.cfg,
      workspaceDir: params.workspaceDir,
+      aliasMap: params.envAuthLookup?.aliasMap,
+      candidateMap: params.envAuthLookup?.candidateMap,
+      authEvidenceMap: params.envAuthLookup?.authEvidenceMap,
    })
  ) {
    return true;
@@ -774,7 +783,11 @@ export async function resolveApiKeyForProvider(params: {
    return deferredAuthProfileResult;
  }

-  const syntheticLocalAuth = resolveSyntheticLocalProviderAuth({ cfg, provider, modelApi: params.modelApi });
+  const syntheticLocalAuth = resolveSyntheticLocalProviderAuth({
+    cfg,
+    provider,
+    modelApi: params.modelApi,
+  });
  if (syntheticLocalAuth) {
    return syntheticLocalAuth;
  }
--- a/src/agents/model-provider-auth.test.ts
+++ b/src/agents/model-provider-auth.test.ts
@@ -8,7 +8,23 @@ const modelCatalogMocks = vi.hoisted(() => ({

 const modelAuthMocks = vi.hoisted(() => ({
  hasRuntimeAvailableProviderAuth:
-    vi.fn<(params: { provider: string; cfg?: OpenClawConfig; workspaceDir?: string }) => boolean>(),
+    vi.fn<
+      (params: {
+        provider: string;
+        cfg?: OpenClawConfig;
+        workspaceDir?: string;
+        envAuthLookup?: unknown;
+      }) => boolean
+    >(),
+}));
+
+const providerAuthAliasMocks = vi.hoisted(() => ({
+  resolveProviderAuthAliasMap: vi.fn(() => ({ openai: "openai" })),
+}));
+
+const modelAuthEnvVarMocks = vi.hoisted(() => ({
+  resolveProviderEnvApiKeyCandidates: vi.fn(() => ({ openai: ["OPENAI_API_KEY"] })),
+  resolveProviderEnvAuthEvidence: vi.fn(() => ({})),
 }));

 const authProfilesMocks = vi.hoisted(() => ({
@@ -27,6 +43,15 @@ vi.mock("./model-auth.js", () => ({
  hasRuntimeAvailableProviderAuth: modelAuthMocks.hasRuntimeAvailableProviderAuth,
 }));

+vi.mock("./provider-auth-aliases.js", () => ({
+  resolveProviderAuthAliasMap: providerAuthAliasMocks.resolveProviderAuthAliasMap,
+}));
+
+vi.mock("./model-auth-env-vars.js", () => ({
+  resolveProviderEnvApiKeyCandidates: modelAuthEnvVarMocks.resolveProviderEnvApiKeyCandidates,
+  resolveProviderEnvAuthEvidence: modelAuthEnvVarMocks.resolveProviderEnvAuthEvidence,
+}));
+
 vi.mock("./auth-profiles.js", () => ({
  ensureAuthProfileStore: authProfilesMocks.ensureAuthProfileStore,
  ensureAuthProfileStoreWithoutExternalProfiles:
@@ -56,6 +81,26 @@ describe("prepared provider auth state", () => {
    vi.clearAllMocks();
  });

+  it("reuses prepared env auth lookup data while warming providers", async () => {
+    const cfg = {} as OpenClawConfig;
+    modelCatalogMocks.loadModelCatalog.mockResolvedValue([
+      { id: "gpt", name: "gpt", provider: "openai" },
+      { id: "claude", name: "claude", provider: "anthropic" },
+    ]);
+    modelAuthMocks.hasRuntimeAvailableProviderAuth.mockReturnValue(false);
+
+    await warmCurrentProviderAuthState(cfg);
+
+    expect(providerAuthAliasMocks.resolveProviderAuthAliasMap).toHaveBeenCalledTimes(1);
+    expect(modelAuthEnvVarMocks.resolveProviderEnvApiKeyCandidates).toHaveBeenCalledTimes(1);
+    expect(modelAuthEnvVarMocks.resolveProviderEnvAuthEvidence).toHaveBeenCalledTimes(1);
+    const firstLookup =
+      modelAuthMocks.hasRuntimeAvailableProviderAuth.mock.calls[0]?.[0].envAuthLookup;
+    const secondLookup =
+      modelAuthMocks.hasRuntimeAvailableProviderAuth.mock.calls[1]?.[0].envAuthLookup;
+    expect(firstLookup).toBe(secondLookup);
+  });
+
  it("hasAuthForModelProvider returns the prepared answer after warm and falls through to compute after clear", async () => {
    const cfg = {} as OpenClawConfig;
    modelCatalogMocks.loadModelCatalog.mockResolvedValue([
@@ -192,17 +237,17 @@ describe("prepared provider auth state", () => {
    await secondWarm;
    resolveFirstCatalog?.([{ id: "gpt", name: "gpt", provider: "openai" }]);
    await firstWarm;
-    expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(2);
+    expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(1);

    modelAuthMocks.hasRuntimeAvailableProviderAuth.mockReturnValue(true);
    await expect(hasAuthForModelProvider({ provider: "openai", cfg: secondCfg })).resolves.toBe(
      false,
    );
-    expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(2);
+    expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(1);
    await expect(hasAuthForModelProvider({ provider: "openai", cfg: firstCfg })).resolves.toBe(
      true,
    );
-    expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(3);
+    expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(2);
  });

  it("does not publish a warm that is cancelled before completion", async () => {
@@ -224,6 +269,28 @@ describe("prepared provider auth state", () => {

    modelAuthMocks.hasRuntimeAvailableProviderAuth.mockReturnValue(false);
    await expect(hasAuthForModelProvider({ provider: "openai", cfg })).resolves.toBe(false);
-    expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(2);
+    expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(1);
+  });
+
+  it("stops sweeping providers when a warm is cancelled mid-flight", async () => {
+    const cfg = {} as OpenClawConfig;
+    let cancelled = false;
+    modelCatalogMocks.loadModelCatalog.mockResolvedValue([
+      { id: "gpt", name: "gpt", provider: "openai" },
+      { id: "claude", name: "claude", provider: "anthropic" },
+      { id: "gemini", name: "gemini", provider: "google" },
+    ]);
+    modelAuthMocks.hasRuntimeAvailableProviderAuth.mockImplementation(() => {
+      cancelled = true;
+      return false;
+    });
+
+    await warmCurrentProviderAuthState(cfg, { isCancelled: () => cancelled });
+    expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(1);
+
+    modelAuthMocks.hasRuntimeAvailableProviderAuth.mockClear();
+    modelAuthMocks.hasRuntimeAvailableProviderAuth.mockReturnValue(true);
+    await expect(hasAuthForModelProvider({ provider: "openai", cfg })).resolves.toBe(true);
+    expect(modelAuthMocks.hasRuntimeAvailableProviderAuth).toHaveBeenCalledTimes(1);
  });
 });
--- a/src/agents/model-provider-auth.ts
+++ b/src/agents/model-provider-auth.ts
@@ -1,5 +1,6 @@
 import { hashRuntimeConfigValue } from "../config/runtime-snapshot.js";
 import type { OpenClawConfig } from "../config/types.openclaw.js";
+import type { ProviderAuthEvidence } from "../secrets/provider-env-vars.js";
 import {
  listAgentIds,
  resolveAgentDir,
@@ -14,9 +15,14 @@ import {
  listProfilesForProvider,
  type AuthProfileStore,
 } from "./auth-profiles.js";
+import {
+  resolveProviderEnvApiKeyCandidates,
+  resolveProviderEnvAuthEvidence,
+} from "./model-auth-env-vars.js";
 import { hasRuntimeAvailableProviderAuth } from "./model-auth.js";
 import { loadModelCatalog } from "./model-catalog.js";
 import { normalizeProviderId } from "./model-selection.js";
+import { resolveProviderAuthAliasMap } from "./provider-auth-aliases.js";
 import { resolveDefaultAgentWorkspaceDir } from "./workspace.js";

 // Prepared runtime fact: which providers have available auth given the
@@ -31,6 +37,12 @@ type PreparedProviderAuthState = {
  providers: ReadonlyMap<string, boolean>;
 };

+type ProviderEnvAuthLookup = {
+  aliasMap?: Readonly<Record<string, string>>;
+  candidateMap?: Readonly<Record<string, readonly string[]>>;
+  authEvidenceMap?: Readonly<Record<string, readonly ProviderAuthEvidence[]>>;
+};
+
 // One entry per configured agent, keyed by agentId. Populated by
 // warmCurrentProviderAuthState at gateway startup / on reload; consulted by
 // hasAuthForModelProvider on every model-listing call.
@@ -85,6 +97,7 @@ export async function hasAuthForModelProvider(params: {
  store?: AuthProfileStore;
  allowPluginSyntheticAuth?: boolean;
  discoverExternalCliAuth?: boolean;
+  envAuthLookup?: ProviderEnvAuthLookup;
 }): Promise<boolean> {
  const provider = normalizeProviderId(params.provider);
  // The prepared map is built by warmCurrentProviderAuthState — one entry per
@@ -131,6 +144,7 @@ export async function hasAuthForModelProvider(params: {
      workspaceDir: params.workspaceDir,
      env: params.env,
      allowPluginSyntheticAuth: params.allowPluginSyntheticAuth,
+      envAuthLookup: params.envAuthLookup,
    })
  ) {
    return true;
@@ -189,7 +203,12 @@ export async function warmCurrentProviderAuthState(
  // turns our published state stale.
  currentProviderAuthStateGeneration += 1;
  const ownGeneration = currentProviderAuthStateGeneration;
+  const isWarmStale = () =>
+    options.isCancelled?.() === true || ownGeneration !== currentProviderAuthStateGeneration;
  const catalog = await loadModelCatalog({ config: cfg });
+  if (isWarmStale()) {
+    return;
+  }
  const providers = new Set<string>();
  for (const entry of catalog) {
    providers.add(normalizeProviderId(entry.provider));
@@ -201,8 +220,16 @@ export async function warmCurrentProviderAuthState(
  // any agentId. The catalog above is shared across agents; the per-agent
  // work is the auth-discovery sweep against that agent's store.
  for (const agentId of listAgentIds(cfg)) {
+    if (isWarmStale()) {
+      return;
+    }
    const workspaceDir = resolveAgentWorkspaceDir(cfg, agentId);
    const agentDir = resolveAgentDir(cfg, agentId);
+    const envAuthLookup = {
+      aliasMap: resolveProviderAuthAliasMap({ config: cfg, workspaceDir }),
+      candidateMap: resolveProviderEnvApiKeyCandidates({ config: cfg, workspaceDir }),
+      authEvidenceMap: resolveProviderEnvAuthEvidence({ config: cfg, workspaceDir }),
+    };
    // One AuthProfileStore scoped to every candidate provider; without this
    // the per-provider externalCli discovery rebuilds the store ~N times.
    const store = ensureAuthProfileStore(agentDir, {
@@ -214,12 +241,16 @@ export async function warmCurrentProviderAuthState(
    });
    const state = new Map<string, boolean>();
    for (const provider of providers) {
+      if (isWarmStale()) {
+        return;
+      }
      const value = await hasAuthForModelProvider({
        provider,
        cfg,
        workspaceDir,
        agentId,
        store,
+        envAuthLookup,
      });
      state.set(provider, value);
    }