perf(gateway): delay provider auth warmup

This commit is contained in:
Vincent Koc
2026-06-04 02:13:12 +02:00
parent 009d7335b5
commit eb5d6c7294
6 changed files with 146 additions and 88 deletions

View File

@@ -0,0 +1,11 @@
export type AuthProfileFailureHook = () => void;
let authProfileFailureHook: AuthProfileFailureHook | undefined;
export function setAuthProfileFailureHook(hook: AuthProfileFailureHook | undefined): void {
authProfileFailureHook = hook;
}
export function notifyAuthProfileFailureHook(): void {
authProfileFailureHook?.();
}

View File

@@ -9,6 +9,7 @@ import {
import type { OpenClawConfig } from "../../config/types.openclaw.js";
import { createSubsystemLogger } from "../../logging/subsystem.js";
import { resolveProviderRequestHeaders } from "../provider-request-config.js";
import { notifyAuthProfileFailureHook, setAuthProfileFailureHook } from "./failure-hook.js";
import { logAuthProfileFailureStateChange } from "./state-observation.js";
const authProfileUsageLog = createSubsystemLogger("agent/embedded");
@@ -37,14 +38,7 @@ const authProfileUsageDeps = {
updateAuthProfileStoreWithLock,
};
// Invoked once per recorded auth-profile failure. Gateway startup wires this
// to clearCurrentProviderAuthState so the next model-listing call recomputes
// against the real auth state.
let onAuthProfileFailureHook: (() => void) | undefined;
export function setAuthProfileFailureHook(hook: (() => void) | undefined): void {
onAuthProfileFailureHook = hook;
}
export { setAuthProfileFailureHook };
export const testing = {
setDepsForTest(
@@ -763,7 +757,7 @@ export async function markAuthProfileFailure(params: {
});
}
try {
onAuthProfileFailureHook?.();
notifyAuthProfileFailureHook();
} catch (err) {
// Hook errors must not break failure recording; log and continue.
authProfileUsageLog.warn("auth profile failure hook threw", {
@@ -814,7 +808,7 @@ export async function markAuthProfileFailure(params: {
now,
});
try {
onAuthProfileFailureHook?.();
notifyAuthProfileFailureHook();
} catch (err) {
// Hook errors must not break failure recording; log and continue.
authProfileUsageLog.warn("auth profile failure hook threw", {

View File

@@ -0,0 +1,84 @@
export type PreparedProviderAuthState = {
agentId: string;
configFingerprint: string;
providers: ReadonlyMap<string, boolean>;
};
export type ProviderAuthWarmSnapshot = {
agents: Array<{
agentId: string;
configFingerprint: string;
providers: Array<[string, boolean]>;
}>;
};
type ProviderAuthWarmWorkerHandle = {
worker: {
terminate: () => unknown;
};
cancelled: boolean;
};
// One entry per configured agent, keyed by agentId. Populated by the provider
// auth warm path; consulted by hasAuthForModelProvider on every model-listing call.
let currentProviderAuthStates: ReadonlyMap<string, PreparedProviderAuthState> | null = null;
// Generation counter guards against an in-flight warm publishing stale state
// after a subsequent warm or clear has invalidated it.
let currentProviderAuthStateGeneration = 0;
let currentProviderAuthWarmWorker: ProviderAuthWarmWorkerHandle | undefined;
export function getCurrentProviderAuthStates(): ReadonlyMap<
string,
PreparedProviderAuthState
> | null {
return currentProviderAuthStates;
}
export function claimCurrentProviderAuthStateGeneration(): number {
currentProviderAuthStateGeneration += 1;
return currentProviderAuthStateGeneration;
}
export function isCurrentProviderAuthStateGeneration(generation: number): boolean {
return generation === currentProviderAuthStateGeneration;
}
export function setCurrentProviderAuthWarmWorker(handle: ProviderAuthWarmWorkerHandle): void {
currentProviderAuthWarmWorker = handle;
}
export function clearCurrentProviderAuthWarmWorker(handle: ProviderAuthWarmWorkerHandle): void {
if (currentProviderAuthWarmWorker === handle) {
currentProviderAuthWarmWorker = undefined;
}
}
export function cancelCurrentProviderAuthWarmWorker(): void {
const current = currentProviderAuthWarmWorker;
if (!current) {
return;
}
current.cancelled = true;
currentProviderAuthWarmWorker = undefined;
void current.worker.terminate();
}
export function clearCurrentProviderAuthState(): void {
currentProviderAuthStates = null;
claimCurrentProviderAuthStateGeneration();
cancelCurrentProviderAuthWarmWorker();
}
export function publishProviderAuthWarmSnapshot(snapshot: ProviderAuthWarmSnapshot): void {
currentProviderAuthStates = new Map(
snapshot.agents.map((state) => [
state.agentId,
{
agentId: state.agentId,
configFingerprint: state.configFingerprint,
providers: new Map(state.providers),
},
]),
);
}

View File

@@ -25,28 +25,22 @@ import {
type RuntimeProviderAuthLookup,
} from "./model-auth.js";
import { loadModelCatalog } from "./model-catalog.js";
import {
cancelCurrentProviderAuthWarmWorker,
claimCurrentProviderAuthStateGeneration,
clearCurrentProviderAuthState,
clearCurrentProviderAuthWarmWorker,
getCurrentProviderAuthStates,
isCurrentProviderAuthStateGeneration,
publishProviderAuthWarmSnapshot,
setCurrentProviderAuthWarmWorker,
type PreparedProviderAuthState,
type ProviderAuthWarmSnapshot,
} from "./model-provider-auth-state.js";
import { normalizeProviderId } from "./model-selection.js";
import { resolveDefaultAgentWorkspaceDir } from "./workspace.js";
// Prepared runtime fact: which providers have available auth given the
// current cfg + env. Populated explicitly at gateway startup and on config
// reload; consulted by hasAuthForModelProvider so every model-listing call
// (pickers, /models, status commands, CLI) skips the per-provider plugin
// discovery and external-CLI probing on the hot path.
type PreparedProviderAuthState = {
agentId: string;
configFingerprint: string;
providers: ReadonlyMap<string, boolean>;
};
export type ProviderAuthWarmSnapshot = {
agents: Array<{
agentId: string;
configFingerprint: string;
providers: Array<[string, boolean]>;
}>;
};
export type { ProviderAuthWarmSnapshot } from "./model-provider-auth-state.js";
type ProviderAuthWarmWorkerResult =
| {
@@ -81,35 +75,8 @@ type ProviderAuthWarmWorkerRunner = (params: {
const PROVIDER_AUTH_WARM_WORKER_TIMEOUT_MS = 120_000;
const PROVIDER_AUTH_WARM_CANCEL_POLL_MS = 25;
// One entry per configured agent, keyed by agentId. Populated by the provider
// auth warm path; consulted by hasAuthForModelProvider on every model-listing call.
let currentProviderAuthStates: ReadonlyMap<string, PreparedProviderAuthState> | null = null;
const configFingerprintCache = new WeakMap<OpenClawConfig, string>();
// Generation counter guards against an in-flight warm publishing stale
// state after a subsequent warm or clear has invalidated it.
let currentProviderAuthStateGeneration = 0;
let currentProviderAuthWarmWorker:
| {
worker: Worker;
cancelled: boolean;
}
| undefined;
function cancelCurrentProviderAuthWarmWorker(): void {
const current = currentProviderAuthWarmWorker;
if (!current) {
return;
}
current.cancelled = true;
currentProviderAuthWarmWorker = undefined;
void current.worker.terminate();
}
export function clearCurrentProviderAuthState(): void {
currentProviderAuthStates = null;
currentProviderAuthStateGeneration += 1;
cancelCurrentProviderAuthWarmWorker();
}
export { clearCurrentProviderAuthState };
function resolvePreparedStateForCaller(params: {
states: ReadonlyMap<string, PreparedProviderAuthState> | null;
@@ -163,7 +130,7 @@ export async function hasAuthForModelProvider(params: {
// compute so callers that narrow the scope — e.g. gateway `models.list`
// with `runtimeAuthDiscovery: false`, or callers with a non-warmed
// workspaceDir — get the answer they asked for.
const preparedStates = currentProviderAuthStates;
const preparedStates = getCurrentProviderAuthStates();
const workspaceDir = params.workspaceDir ?? resolveDefaultAgentWorkspaceDir();
const configFingerprint = resolveProviderAuthConfigFingerprint(params.cfg);
const preparedState = resolvePreparedStateForCaller({
@@ -297,19 +264,6 @@ function serializeProviderAuthStates(
};
}
function publishProviderAuthWarmSnapshot(snapshot: ProviderAuthWarmSnapshot): void {
currentProviderAuthStates = new Map(
snapshot.agents.map((state) => [
state.agentId,
{
agentId: state.agentId,
configFingerprint: state.configFingerprint,
providers: new Map(state.providers),
},
]),
);
}
function resolveProviderConfigApi(
cfg: OpenClawConfig | undefined,
provider: string,
@@ -436,17 +390,16 @@ export async function warmCurrentProviderAuthState(
): Promise<void> {
// Claim a fresh generation; any concurrent warm or clear bumps this and
// turns our published state stale.
currentProviderAuthStateGeneration += 1;
const ownGeneration = currentProviderAuthStateGeneration;
const ownGeneration = claimCurrentProviderAuthStateGeneration();
const isWarmStale = () =>
options.isCancelled?.() === true || ownGeneration !== currentProviderAuthStateGeneration;
options.isCancelled?.() === true || !isCurrentProviderAuthStateGeneration(ownGeneration);
const snapshot = await buildCurrentProviderAuthStateSnapshot(cfg, {
isCancelled: isWarmStale,
});
if (isWarmStale()) {
return;
}
if (options.isCancelled?.() || ownGeneration !== currentProviderAuthStateGeneration) {
if (options.isCancelled?.() || !isCurrentProviderAuthStateGeneration(ownGeneration)) {
// A newer warm or clear ran while we were building; skip publication so
// the newer answer wins.
return;
@@ -584,7 +537,7 @@ function runProviderAuthWarmWorker(params: {
worker,
cancelled: false,
};
currentProviderAuthWarmWorker = handle;
setCurrentProviderAuthWarmWorker(handle);
return new Promise<ProviderAuthWarmSnapshot>((resolve, reject) => {
let settled = false;
const finish = (complete: () => void) => {
@@ -592,9 +545,7 @@ function runProviderAuthWarmWorker(params: {
return;
}
settled = true;
if (currentProviderAuthWarmWorker === handle) {
currentProviderAuthWarmWorker = undefined;
}
clearCurrentProviderAuthWarmWorker(handle);
if (timer) {
clearTimeout(timer);
}
@@ -674,11 +625,10 @@ export async function warmCurrentProviderAuthStateOffMainThread(
runWorker?: ProviderAuthWarmWorkerRunner;
} = {},
): Promise<void> {
currentProviderAuthStateGeneration += 1;
const ownGeneration = currentProviderAuthStateGeneration;
const ownGeneration = claimCurrentProviderAuthStateGeneration();
cancelCurrentProviderAuthWarmWorker();
const isWarmStale = () =>
options.isCancelled?.() === true || ownGeneration !== currentProviderAuthStateGeneration;
options.isCancelled?.() === true || !isCurrentProviderAuthStateGeneration(ownGeneration);
if (isWarmStale()) {
return;
}

View File

@@ -189,10 +189,17 @@ vi.mock("../agents/runtime-plugins.js", () => ({
}));
vi.mock("../agents/model-provider-auth.js", () => ({
clearCurrentProviderAuthState: hoisted.clearCurrentProviderAuthState,
warmCurrentProviderAuthStateOffMainThread: hoisted.warmCurrentProviderAuthStateOffMainThread,
}));
vi.mock("../agents/model-provider-auth-state.js", () => ({
clearCurrentProviderAuthState: hoisted.clearCurrentProviderAuthState,
}));
vi.mock("../agents/auth-profiles/failure-hook.js", () => ({
setAuthProfileFailureHook: hoisted.setAuthProfileFailureHook,
}));
vi.mock("../agents/auth-profiles.js", async () => {
const actual = await vi.importActual<typeof import("../agents/auth-profiles.js")>(
"../agents/auth-profiles.js",
@@ -1043,7 +1050,9 @@ describe("startGatewayPostAttachRuntime", () => {
const hook = hoisted.setAuthProfileFailureHook.mock.calls[0]?.[0] as (() => void) | undefined;
hook?.();
expect(hoisted.clearCurrentProviderAuthState).toHaveBeenCalledTimes(1);
await vi.waitFor(() => {
expect(hoisted.clearCurrentProviderAuthState).toHaveBeenCalledTimes(1);
});
expect(hoisted.warmCurrentProviderAuthStateOffMainThread).toHaveBeenCalledTimes(1);
await vi.advanceTimersByTimeAsync(1_000);
@@ -1124,6 +1133,7 @@ describe("startGatewayPostAttachRuntime", () => {
const hook = hoisted.setAuthProfileFailureHook.mock.calls[0]?.[0] as (() => void) | undefined;
hook?.();
await vi.dynamicImportSettled();
expect(hoisted.clearCurrentProviderAuthState).not.toHaveBeenCalled();
expect(hoisted.warmCurrentProviderAuthStateOffMainThread).not.toHaveBeenCalled();
} finally {
@@ -1131,6 +1141,10 @@ describe("startGatewayPostAttachRuntime", () => {
}
});
it("keeps the default provider auth prewarm out of the early post-ready window", async () => {
expect(testing.providerAuthPrewarmStartDelayMs).toBe(5_000);
});
it("uses the current provider auth config when the delayed prewarm fires", async () => {
vi.useFakeTimers();
const startupCfg = { marker: "startup" } as never;

View File

@@ -29,7 +29,7 @@ const ACP_BACKEND_READY_TIMEOUT_MS = 5_000;
const ACP_BACKEND_READY_POLL_MS = 50;
const PRIMARY_MODEL_PREWARM_TIMEOUT_MS = 5_000;
const STARTUP_PROVIDER_DISCOVERY_TIMEOUT_MS = 5_000;
const PROVIDER_AUTH_PREWARM_START_DELAY_MS = 1_000;
const PROVIDER_AUTH_PREWARM_START_DELAY_MS = 5_000;
const PROVIDER_AUTH_REWARM_DELAY_MS = 1_000;
const AGENT_RUNTIME_PLUGIN_PREWARM_START_DELAY_MS = 10_000;
const DEFERRED_SIDECAR_START_DELAY_MS = 100;
@@ -233,9 +233,11 @@ function scheduleProviderAuthStatePrewarm(params: {
const isStopped = () => stopped;
const delayMs = params.delayMs ?? PROVIDER_AUTH_PREWARM_START_DELAY_MS;
void (async () => {
const { clearCurrentProviderAuthState, warmCurrentProviderAuthStateOffMainThread } =
await import("../agents/model-provider-auth.js");
const { setAuthProfileFailureHook } = await import("../agents/auth-profiles.js");
const [{ setAuthProfileFailureHook }, { clearCurrentProviderAuthState }] = await Promise.all([
import("../agents/auth-profiles/failure-hook.js"),
import("../agents/model-provider-auth-state.js"),
]);
const loadProviderAuthWarmModule = () => import("../agents/model-provider-auth.js");
const runRewarm = async (reason: string) => {
if (isStopped()) {
return;
@@ -243,6 +245,7 @@ function scheduleProviderAuthStatePrewarm(params: {
const cfg = params.getConfig();
rewarmInFlight = true;
try {
const { warmCurrentProviderAuthStateOffMainThread } = await loadProviderAuthWarmModule();
const metrics = await measureProviderAuthWarm(() =>
warmCurrentProviderAuthStateOffMainThread(cfg, { isCancelled: isStopped }),
);
@@ -298,6 +301,7 @@ function scheduleProviderAuthStatePrewarm(params: {
return;
}
const cfg = params.getConfig();
const { warmCurrentProviderAuthStateOffMainThread } = await loadProviderAuthWarmModule();
const metrics = await measureProviderAuthWarm(() =>
warmCurrentProviderAuthStateOffMainThread(cfg, { isCancelled: isStopped }),
);
@@ -1446,6 +1450,7 @@ export async function startGatewayPostAttachRuntime(
}
export const testing = {
providerAuthPrewarmStartDelayMs: PROVIDER_AUTH_PREWARM_START_DELAY_MS,
hasRestartSentinelFileFast,
prewarmConfiguredPrimaryModel,
prewarmConfiguredPrimaryModelWithTimeout,