Files
openclaw/extensions/vllm/thinking-policy.ts
2026-06-04 21:02:07 -04:00

67 lines
2.0 KiB
TypeScript

// Vllm plugin module implements thinking policy behavior.
import type {
ProviderDefaultThinkingPolicyContext,
ProviderThinkingProfile,
} from "openclaw/plugin-sdk/plugin-entry";
import { normalizeProviderId } from "openclaw/plugin-sdk/provider-model-shared";
export type VllmQwenThinkingFormat = "chat-template" | "top-level";
const VLLM_BINARY_THINKING_PROFILE = {
levels: [{ id: "off" }, { id: "low", label: "on" }],
defaultLevel: "off",
} satisfies ProviderThinkingProfile;
export function normalizeVllmQwenThinkingFormat(
value: unknown,
): VllmQwenThinkingFormat | undefined {
if (typeof value !== "string") {
return undefined;
}
const normalized = value.trim().toLowerCase().replace(/_/g, "-");
if (
normalized === "chat-template" ||
normalized === "chat-template-kwargs" ||
normalized === "chat-template-kwarg" ||
normalized === "chat-template-arguments" ||
normalized === "qwen-chat-template"
) {
return "chat-template";
}
if (
normalized === "top-level" ||
normalized === "enable-thinking" ||
normalized === "request-body" ||
normalized === "qwen"
) {
return "top-level";
}
return undefined;
}
export function resolveVllmQwenThinkingFormatFromCompat(
compat?: ProviderDefaultThinkingPolicyContext["compat"],
): VllmQwenThinkingFormat | undefined {
return normalizeVllmQwenThinkingFormat(compat?.thinkingFormat);
}
function isVllmNemotronThinkingModel(modelId: string): boolean {
return /\bnemotron-3(?:[-_](?:nano|super|ultra))?\b/i.test(modelId);
}
export function resolveThinkingProfile(
ctx: ProviderDefaultThinkingPolicyContext,
): ProviderThinkingProfile | null {
if (normalizeProviderId(ctx.provider) !== "vllm") {
return null;
}
if (ctx.reasoning === false) {
return null;
}
const qwenFormat = resolveVllmQwenThinkingFormatFromCompat(ctx.compat);
if (qwenFormat || (ctx.reasoning === true && isVllmNemotronThinkingModel(ctx.modelId))) {
return VLLM_BINARY_THINKING_PROFILE;
}
return null;
}