mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 05:51:15 +08:00
perf(gateway): delay provider auth warmup
This commit is contained in:
11
src/agents/auth-profiles/failure-hook.ts
Normal file
11
src/agents/auth-profiles/failure-hook.ts
Normal file
@@ -0,0 +1,11 @@
|
||||
export type AuthProfileFailureHook = () => void;
|
||||
|
||||
let authProfileFailureHook: AuthProfileFailureHook | undefined;
|
||||
|
||||
export function setAuthProfileFailureHook(hook: AuthProfileFailureHook | undefined): void {
|
||||
authProfileFailureHook = hook;
|
||||
}
|
||||
|
||||
export function notifyAuthProfileFailureHook(): void {
|
||||
authProfileFailureHook?.();
|
||||
}
|
||||
@@ -9,6 +9,7 @@ import {
|
||||
import type { OpenClawConfig } from "../../config/types.openclaw.js";
|
||||
import { createSubsystemLogger } from "../../logging/subsystem.js";
|
||||
import { resolveProviderRequestHeaders } from "../provider-request-config.js";
|
||||
import { notifyAuthProfileFailureHook, setAuthProfileFailureHook } from "./failure-hook.js";
|
||||
import { logAuthProfileFailureStateChange } from "./state-observation.js";
|
||||
|
||||
const authProfileUsageLog = createSubsystemLogger("agent/embedded");
|
||||
@@ -37,14 +38,7 @@ const authProfileUsageDeps = {
|
||||
updateAuthProfileStoreWithLock,
|
||||
};
|
||||
|
||||
// Invoked once per recorded auth-profile failure. Gateway startup wires this
|
||||
// to clearCurrentProviderAuthState so the next model-listing call recomputes
|
||||
// against the real auth state.
|
||||
let onAuthProfileFailureHook: (() => void) | undefined;
|
||||
|
||||
export function setAuthProfileFailureHook(hook: (() => void) | undefined): void {
|
||||
onAuthProfileFailureHook = hook;
|
||||
}
|
||||
export { setAuthProfileFailureHook };
|
||||
|
||||
export const testing = {
|
||||
setDepsForTest(
|
||||
@@ -763,7 +757,7 @@ export async function markAuthProfileFailure(params: {
|
||||
});
|
||||
}
|
||||
try {
|
||||
onAuthProfileFailureHook?.();
|
||||
notifyAuthProfileFailureHook();
|
||||
} catch (err) {
|
||||
// Hook errors must not break failure recording; log and continue.
|
||||
authProfileUsageLog.warn("auth profile failure hook threw", {
|
||||
@@ -814,7 +808,7 @@ export async function markAuthProfileFailure(params: {
|
||||
now,
|
||||
});
|
||||
try {
|
||||
onAuthProfileFailureHook?.();
|
||||
notifyAuthProfileFailureHook();
|
||||
} catch (err) {
|
||||
// Hook errors must not break failure recording; log and continue.
|
||||
authProfileUsageLog.warn("auth profile failure hook threw", {
|
||||
|
||||
84
src/agents/model-provider-auth-state.ts
Normal file
84
src/agents/model-provider-auth-state.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
export type PreparedProviderAuthState = {
|
||||
agentId: string;
|
||||
configFingerprint: string;
|
||||
providers: ReadonlyMap<string, boolean>;
|
||||
};
|
||||
|
||||
export type ProviderAuthWarmSnapshot = {
|
||||
agents: Array<{
|
||||
agentId: string;
|
||||
configFingerprint: string;
|
||||
providers: Array<[string, boolean]>;
|
||||
}>;
|
||||
};
|
||||
|
||||
type ProviderAuthWarmWorkerHandle = {
|
||||
worker: {
|
||||
terminate: () => unknown;
|
||||
};
|
||||
cancelled: boolean;
|
||||
};
|
||||
|
||||
// One entry per configured agent, keyed by agentId. Populated by the provider
|
||||
// auth warm path; consulted by hasAuthForModelProvider on every model-listing call.
|
||||
let currentProviderAuthStates: ReadonlyMap<string, PreparedProviderAuthState> | null = null;
|
||||
|
||||
// Generation counter guards against an in-flight warm publishing stale state
|
||||
// after a subsequent warm or clear has invalidated it.
|
||||
let currentProviderAuthStateGeneration = 0;
|
||||
let currentProviderAuthWarmWorker: ProviderAuthWarmWorkerHandle | undefined;
|
||||
|
||||
export function getCurrentProviderAuthStates(): ReadonlyMap<
|
||||
string,
|
||||
PreparedProviderAuthState
|
||||
> | null {
|
||||
return currentProviderAuthStates;
|
||||
}
|
||||
|
||||
export function claimCurrentProviderAuthStateGeneration(): number {
|
||||
currentProviderAuthStateGeneration += 1;
|
||||
return currentProviderAuthStateGeneration;
|
||||
}
|
||||
|
||||
export function isCurrentProviderAuthStateGeneration(generation: number): boolean {
|
||||
return generation === currentProviderAuthStateGeneration;
|
||||
}
|
||||
|
||||
export function setCurrentProviderAuthWarmWorker(handle: ProviderAuthWarmWorkerHandle): void {
|
||||
currentProviderAuthWarmWorker = handle;
|
||||
}
|
||||
|
||||
export function clearCurrentProviderAuthWarmWorker(handle: ProviderAuthWarmWorkerHandle): void {
|
||||
if (currentProviderAuthWarmWorker === handle) {
|
||||
currentProviderAuthWarmWorker = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export function cancelCurrentProviderAuthWarmWorker(): void {
|
||||
const current = currentProviderAuthWarmWorker;
|
||||
if (!current) {
|
||||
return;
|
||||
}
|
||||
current.cancelled = true;
|
||||
currentProviderAuthWarmWorker = undefined;
|
||||
void current.worker.terminate();
|
||||
}
|
||||
|
||||
export function clearCurrentProviderAuthState(): void {
|
||||
currentProviderAuthStates = null;
|
||||
claimCurrentProviderAuthStateGeneration();
|
||||
cancelCurrentProviderAuthWarmWorker();
|
||||
}
|
||||
|
||||
export function publishProviderAuthWarmSnapshot(snapshot: ProviderAuthWarmSnapshot): void {
|
||||
currentProviderAuthStates = new Map(
|
||||
snapshot.agents.map((state) => [
|
||||
state.agentId,
|
||||
{
|
||||
agentId: state.agentId,
|
||||
configFingerprint: state.configFingerprint,
|
||||
providers: new Map(state.providers),
|
||||
},
|
||||
]),
|
||||
);
|
||||
}
|
||||
@@ -25,28 +25,22 @@ import {
|
||||
type RuntimeProviderAuthLookup,
|
||||
} from "./model-auth.js";
|
||||
import { loadModelCatalog } from "./model-catalog.js";
|
||||
import {
|
||||
cancelCurrentProviderAuthWarmWorker,
|
||||
claimCurrentProviderAuthStateGeneration,
|
||||
clearCurrentProviderAuthState,
|
||||
clearCurrentProviderAuthWarmWorker,
|
||||
getCurrentProviderAuthStates,
|
||||
isCurrentProviderAuthStateGeneration,
|
||||
publishProviderAuthWarmSnapshot,
|
||||
setCurrentProviderAuthWarmWorker,
|
||||
type PreparedProviderAuthState,
|
||||
type ProviderAuthWarmSnapshot,
|
||||
} from "./model-provider-auth-state.js";
|
||||
import { normalizeProviderId } from "./model-selection.js";
|
||||
import { resolveDefaultAgentWorkspaceDir } from "./workspace.js";
|
||||
|
||||
// Prepared runtime fact: which providers have available auth given the
|
||||
// current cfg + env. Populated explicitly at gateway startup and on config
|
||||
// reload; consulted by hasAuthForModelProvider so every model-listing call
|
||||
// (pickers, /models, status commands, CLI) skips the per-provider plugin
|
||||
// discovery and external-CLI probing on the hot path.
|
||||
|
||||
type PreparedProviderAuthState = {
|
||||
agentId: string;
|
||||
configFingerprint: string;
|
||||
providers: ReadonlyMap<string, boolean>;
|
||||
};
|
||||
|
||||
export type ProviderAuthWarmSnapshot = {
|
||||
agents: Array<{
|
||||
agentId: string;
|
||||
configFingerprint: string;
|
||||
providers: Array<[string, boolean]>;
|
||||
}>;
|
||||
};
|
||||
export type { ProviderAuthWarmSnapshot } from "./model-provider-auth-state.js";
|
||||
|
||||
type ProviderAuthWarmWorkerResult =
|
||||
| {
|
||||
@@ -81,35 +75,8 @@ type ProviderAuthWarmWorkerRunner = (params: {
|
||||
const PROVIDER_AUTH_WARM_WORKER_TIMEOUT_MS = 120_000;
|
||||
const PROVIDER_AUTH_WARM_CANCEL_POLL_MS = 25;
|
||||
|
||||
// One entry per configured agent, keyed by agentId. Populated by the provider
|
||||
// auth warm path; consulted by hasAuthForModelProvider on every model-listing call.
|
||||
let currentProviderAuthStates: ReadonlyMap<string, PreparedProviderAuthState> | null = null;
|
||||
const configFingerprintCache = new WeakMap<OpenClawConfig, string>();
|
||||
// Generation counter guards against an in-flight warm publishing stale
|
||||
// state after a subsequent warm or clear has invalidated it.
|
||||
let currentProviderAuthStateGeneration = 0;
|
||||
let currentProviderAuthWarmWorker:
|
||||
| {
|
||||
worker: Worker;
|
||||
cancelled: boolean;
|
||||
}
|
||||
| undefined;
|
||||
|
||||
function cancelCurrentProviderAuthWarmWorker(): void {
|
||||
const current = currentProviderAuthWarmWorker;
|
||||
if (!current) {
|
||||
return;
|
||||
}
|
||||
current.cancelled = true;
|
||||
currentProviderAuthWarmWorker = undefined;
|
||||
void current.worker.terminate();
|
||||
}
|
||||
|
||||
export function clearCurrentProviderAuthState(): void {
|
||||
currentProviderAuthStates = null;
|
||||
currentProviderAuthStateGeneration += 1;
|
||||
cancelCurrentProviderAuthWarmWorker();
|
||||
}
|
||||
export { clearCurrentProviderAuthState };
|
||||
|
||||
function resolvePreparedStateForCaller(params: {
|
||||
states: ReadonlyMap<string, PreparedProviderAuthState> | null;
|
||||
@@ -163,7 +130,7 @@ export async function hasAuthForModelProvider(params: {
|
||||
// compute so callers that narrow the scope — e.g. gateway `models.list`
|
||||
// with `runtimeAuthDiscovery: false`, or callers with a non-warmed
|
||||
// workspaceDir — get the answer they asked for.
|
||||
const preparedStates = currentProviderAuthStates;
|
||||
const preparedStates = getCurrentProviderAuthStates();
|
||||
const workspaceDir = params.workspaceDir ?? resolveDefaultAgentWorkspaceDir();
|
||||
const configFingerprint = resolveProviderAuthConfigFingerprint(params.cfg);
|
||||
const preparedState = resolvePreparedStateForCaller({
|
||||
@@ -297,19 +264,6 @@ function serializeProviderAuthStates(
|
||||
};
|
||||
}
|
||||
|
||||
function publishProviderAuthWarmSnapshot(snapshot: ProviderAuthWarmSnapshot): void {
|
||||
currentProviderAuthStates = new Map(
|
||||
snapshot.agents.map((state) => [
|
||||
state.agentId,
|
||||
{
|
||||
agentId: state.agentId,
|
||||
configFingerprint: state.configFingerprint,
|
||||
providers: new Map(state.providers),
|
||||
},
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
function resolveProviderConfigApi(
|
||||
cfg: OpenClawConfig | undefined,
|
||||
provider: string,
|
||||
@@ -436,17 +390,16 @@ export async function warmCurrentProviderAuthState(
|
||||
): Promise<void> {
|
||||
// Claim a fresh generation; any concurrent warm or clear bumps this and
|
||||
// turns our published state stale.
|
||||
currentProviderAuthStateGeneration += 1;
|
||||
const ownGeneration = currentProviderAuthStateGeneration;
|
||||
const ownGeneration = claimCurrentProviderAuthStateGeneration();
|
||||
const isWarmStale = () =>
|
||||
options.isCancelled?.() === true || ownGeneration !== currentProviderAuthStateGeneration;
|
||||
options.isCancelled?.() === true || !isCurrentProviderAuthStateGeneration(ownGeneration);
|
||||
const snapshot = await buildCurrentProviderAuthStateSnapshot(cfg, {
|
||||
isCancelled: isWarmStale,
|
||||
});
|
||||
if (isWarmStale()) {
|
||||
return;
|
||||
}
|
||||
if (options.isCancelled?.() || ownGeneration !== currentProviderAuthStateGeneration) {
|
||||
if (options.isCancelled?.() || !isCurrentProviderAuthStateGeneration(ownGeneration)) {
|
||||
// A newer warm or clear ran while we were building; skip publication so
|
||||
// the newer answer wins.
|
||||
return;
|
||||
@@ -584,7 +537,7 @@ function runProviderAuthWarmWorker(params: {
|
||||
worker,
|
||||
cancelled: false,
|
||||
};
|
||||
currentProviderAuthWarmWorker = handle;
|
||||
setCurrentProviderAuthWarmWorker(handle);
|
||||
return new Promise<ProviderAuthWarmSnapshot>((resolve, reject) => {
|
||||
let settled = false;
|
||||
const finish = (complete: () => void) => {
|
||||
@@ -592,9 +545,7 @@ function runProviderAuthWarmWorker(params: {
|
||||
return;
|
||||
}
|
||||
settled = true;
|
||||
if (currentProviderAuthWarmWorker === handle) {
|
||||
currentProviderAuthWarmWorker = undefined;
|
||||
}
|
||||
clearCurrentProviderAuthWarmWorker(handle);
|
||||
if (timer) {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
@@ -674,11 +625,10 @@ export async function warmCurrentProviderAuthStateOffMainThread(
|
||||
runWorker?: ProviderAuthWarmWorkerRunner;
|
||||
} = {},
|
||||
): Promise<void> {
|
||||
currentProviderAuthStateGeneration += 1;
|
||||
const ownGeneration = currentProviderAuthStateGeneration;
|
||||
const ownGeneration = claimCurrentProviderAuthStateGeneration();
|
||||
cancelCurrentProviderAuthWarmWorker();
|
||||
const isWarmStale = () =>
|
||||
options.isCancelled?.() === true || ownGeneration !== currentProviderAuthStateGeneration;
|
||||
options.isCancelled?.() === true || !isCurrentProviderAuthStateGeneration(ownGeneration);
|
||||
if (isWarmStale()) {
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -189,10 +189,17 @@ vi.mock("../agents/runtime-plugins.js", () => ({
|
||||
}));
|
||||
|
||||
vi.mock("../agents/model-provider-auth.js", () => ({
|
||||
clearCurrentProviderAuthState: hoisted.clearCurrentProviderAuthState,
|
||||
warmCurrentProviderAuthStateOffMainThread: hoisted.warmCurrentProviderAuthStateOffMainThread,
|
||||
}));
|
||||
|
||||
vi.mock("../agents/model-provider-auth-state.js", () => ({
|
||||
clearCurrentProviderAuthState: hoisted.clearCurrentProviderAuthState,
|
||||
}));
|
||||
|
||||
vi.mock("../agents/auth-profiles/failure-hook.js", () => ({
|
||||
setAuthProfileFailureHook: hoisted.setAuthProfileFailureHook,
|
||||
}));
|
||||
|
||||
vi.mock("../agents/auth-profiles.js", async () => {
|
||||
const actual = await vi.importActual<typeof import("../agents/auth-profiles.js")>(
|
||||
"../agents/auth-profiles.js",
|
||||
@@ -1043,7 +1050,9 @@ describe("startGatewayPostAttachRuntime", () => {
|
||||
|
||||
const hook = hoisted.setAuthProfileFailureHook.mock.calls[0]?.[0] as (() => void) | undefined;
|
||||
hook?.();
|
||||
expect(hoisted.clearCurrentProviderAuthState).toHaveBeenCalledTimes(1);
|
||||
await vi.waitFor(() => {
|
||||
expect(hoisted.clearCurrentProviderAuthState).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
expect(hoisted.warmCurrentProviderAuthStateOffMainThread).toHaveBeenCalledTimes(1);
|
||||
|
||||
await vi.advanceTimersByTimeAsync(1_000);
|
||||
@@ -1124,6 +1133,7 @@ describe("startGatewayPostAttachRuntime", () => {
|
||||
|
||||
const hook = hoisted.setAuthProfileFailureHook.mock.calls[0]?.[0] as (() => void) | undefined;
|
||||
hook?.();
|
||||
await vi.dynamicImportSettled();
|
||||
expect(hoisted.clearCurrentProviderAuthState).not.toHaveBeenCalled();
|
||||
expect(hoisted.warmCurrentProviderAuthStateOffMainThread).not.toHaveBeenCalled();
|
||||
} finally {
|
||||
@@ -1131,6 +1141,10 @@ describe("startGatewayPostAttachRuntime", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps the default provider auth prewarm out of the early post-ready window", async () => {
|
||||
expect(testing.providerAuthPrewarmStartDelayMs).toBe(5_000);
|
||||
});
|
||||
|
||||
it("uses the current provider auth config when the delayed prewarm fires", async () => {
|
||||
vi.useFakeTimers();
|
||||
const startupCfg = { marker: "startup" } as never;
|
||||
|
||||
@@ -29,7 +29,7 @@ const ACP_BACKEND_READY_TIMEOUT_MS = 5_000;
|
||||
const ACP_BACKEND_READY_POLL_MS = 50;
|
||||
const PRIMARY_MODEL_PREWARM_TIMEOUT_MS = 5_000;
|
||||
const STARTUP_PROVIDER_DISCOVERY_TIMEOUT_MS = 5_000;
|
||||
const PROVIDER_AUTH_PREWARM_START_DELAY_MS = 1_000;
|
||||
const PROVIDER_AUTH_PREWARM_START_DELAY_MS = 5_000;
|
||||
const PROVIDER_AUTH_REWARM_DELAY_MS = 1_000;
|
||||
const AGENT_RUNTIME_PLUGIN_PREWARM_START_DELAY_MS = 10_000;
|
||||
const DEFERRED_SIDECAR_START_DELAY_MS = 100;
|
||||
@@ -233,9 +233,11 @@ function scheduleProviderAuthStatePrewarm(params: {
|
||||
const isStopped = () => stopped;
|
||||
const delayMs = params.delayMs ?? PROVIDER_AUTH_PREWARM_START_DELAY_MS;
|
||||
void (async () => {
|
||||
const { clearCurrentProviderAuthState, warmCurrentProviderAuthStateOffMainThread } =
|
||||
await import("../agents/model-provider-auth.js");
|
||||
const { setAuthProfileFailureHook } = await import("../agents/auth-profiles.js");
|
||||
const [{ setAuthProfileFailureHook }, { clearCurrentProviderAuthState }] = await Promise.all([
|
||||
import("../agents/auth-profiles/failure-hook.js"),
|
||||
import("../agents/model-provider-auth-state.js"),
|
||||
]);
|
||||
const loadProviderAuthWarmModule = () => import("../agents/model-provider-auth.js");
|
||||
const runRewarm = async (reason: string) => {
|
||||
if (isStopped()) {
|
||||
return;
|
||||
@@ -243,6 +245,7 @@ function scheduleProviderAuthStatePrewarm(params: {
|
||||
const cfg = params.getConfig();
|
||||
rewarmInFlight = true;
|
||||
try {
|
||||
const { warmCurrentProviderAuthStateOffMainThread } = await loadProviderAuthWarmModule();
|
||||
const metrics = await measureProviderAuthWarm(() =>
|
||||
warmCurrentProviderAuthStateOffMainThread(cfg, { isCancelled: isStopped }),
|
||||
);
|
||||
@@ -298,6 +301,7 @@ function scheduleProviderAuthStatePrewarm(params: {
|
||||
return;
|
||||
}
|
||||
const cfg = params.getConfig();
|
||||
const { warmCurrentProviderAuthStateOffMainThread } = await loadProviderAuthWarmModule();
|
||||
const metrics = await measureProviderAuthWarm(() =>
|
||||
warmCurrentProviderAuthStateOffMainThread(cfg, { isCancelled: isStopped }),
|
||||
);
|
||||
@@ -1446,6 +1450,7 @@ export async function startGatewayPostAttachRuntime(
|
||||
}
|
||||
|
||||
export const testing = {
|
||||
providerAuthPrewarmStartDelayMs: PROVIDER_AUTH_PREWARM_START_DELAY_MS,
|
||||
hasRestartSentinelFileFast,
|
||||
prewarmConfiguredPrimaryModel,
|
||||
prewarmConfiguredPrimaryModelWithTimeout,
|
||||
|
||||
Reference in New Issue
Block a user