diff --git a/docs/concepts/model-providers.md b/docs/concepts/model-providers.md index 8af22bee223e..e788ee699ae1 100644 --- a/docs/concepts/model-providers.md +++ b/docs/concepts/model-providers.md @@ -306,7 +306,7 @@ See [/providers/kilocode](/providers/kilocode) for setup details. | MiniMax | `minimax` / `minimax-portal` | `MINIMAX_API_KEY` / `MINIMAX_OAUTH_TOKEN` | `minimax/MiniMax-M3` | | Mistral | `mistral` | `MISTRAL_API_KEY` | `mistral/mistral-large-latest` | | Moonshot | `moonshot` | `MOONSHOT_API_KEY` | `moonshot/kimi-k2.6` | -| NVIDIA | `nvidia` | `NVIDIA_API_KEY` | `nvidia/nvidia/nemotron-3-super-120b-a12b` | +| NVIDIA | `nvidia` | `NVIDIA_API_KEY` | `nvidia/nvidia/nemotron-3-ultra-550b-a55b` | | NovitaAI | `novita` | `NOVITA_API_KEY` | `novita/deepseek/deepseek-v3-0324` | | [Ollama Cloud](/providers/ollama-cloud) | `ollama-cloud` | `OLLAMA_API_KEY` | `ollama-cloud/kimi-k2.6` | | OpenRouter | `openrouter` | `OPENROUTER_API_KEY` | `openrouter/auto` | diff --git a/docs/providers/nvidia.md b/docs/providers/nvidia.md index 583007a4a3f6..60a65949bbc5 100644 --- a/docs/providers/nvidia.md +++ b/docs/providers/nvidia.md @@ -3,12 +3,15 @@ summary: "Use NVIDIA's OpenAI-compatible API in OpenClaw" read_when: - You want to use open models in OpenClaw for free - You need NVIDIA_API_KEY setup + - You want to use Nemotron 3 Ultra through NVIDIA title: "NVIDIA" --- NVIDIA provides an OpenAI-compatible API at `https://integrate.api.nvidia.com/v1` for open models for free. Authenticate with an API key from -[build.nvidia.com](https://build.nvidia.com/settings/api-keys). +[build.nvidia.com](https://build.nvidia.com/settings/api-keys). OpenClaw +defaults the NVIDIA provider to Nemotron 3 Ultra, NVIDIA's 550B total / 55B +active reasoning model for long-context agentic work. ## Getting started @@ -24,7 +27,7 @@ open models for free. Authenticate with an API key from ```bash - openclaw models set nvidia/nvidia/nemotron-3-super-120b-a12b + openclaw models set nvidia/nvidia/nemotron-3-ultra-550b-a55b ``` @@ -56,7 +59,7 @@ openclaw onboard --auth-choice nvidia-api-key --nvidia-api-key "nvapi-..." }, agents: { defaults: { - model: { primary: "nvidia/nvidia/nemotron-3-super-120b-a12b" }, + model: { primary: "nvidia/nvidia/nemotron-3-ultra-550b-a55b" }, }, }, } @@ -69,22 +72,39 @@ try NVIDIA's public featured-model catalog from `https://assets.ngc.nvidia.com/products/api-catalog/featured-models.json` and caches the ranked result for 24 hours. New featured models from build.nvidia.com therefore appear in setup and model-selection surfaces without waiting for an -OpenClaw release. +OpenClaw release. When the live feed is available, the first returned model is +the default option shown during NVIDIA setup. The fetch uses a fixed HTTPS host policy for `assets.ngc.nvidia.com`. If no NVIDIA API key is configured, or if that public catalog is unavailable or -malformed, OpenClaw falls back to the bundled catalog below. +malformed, OpenClaw falls back to the bundled catalog and bundled default below. + +## Nemotron 3 Ultra + +Nemotron 3 Ultra is the default NVIDIA model in OpenClaw. NVIDIA's build page for +[`nvidia/nemotron-3-ultra-550b-a55b`](https://build.nvidia.com/nvidia/nemotron-3-ultra-550b-a55b) +lists it as an available free endpoint with a 1M-token context specification. +The bundled catalog records a 16,384-token max output to match NVIDIA's current +OpenAI-compatible sample request for the hosted endpoint. + +Use Ultra for the highest-capability NVIDIA default. Keep Super selected when +you want the smaller Nemotron 3 option, or choose one of the third-party models +hosted in NVIDIA's catalog when their context, latency, or behavior fits better. +The bundled Ultra row sends `chat_template_kwargs.enable_thinking: false` and +`force_nonempty_content: true` by default so normal chat output stays in the +visible answer instead of exposing reasoning text. ## Bundled fallback catalog -| Model ref | Name | Context | Max output | Notes | -| ------------------------------------------ | ---------------------------- | ------- | ---------- | --------------------------------- | -| `nvidia/nvidia/nemotron-3-super-120b-a12b` | NVIDIA Nemotron 3 Super 120B | 262,144 | 8,192 | Featured fallback | -| `nvidia/moonshotai/kimi-k2.5` | Kimi K2.5 | 262,144 | 8,192 | Featured fallback | -| `nvidia/minimaxai/minimax-m2.7` | Minimax M2.7 | 196,608 | 8,192 | Featured fallback | -| `nvidia/z-ai/glm-5.1` | GLM 5.1 | 202,752 | 8,192 | Featured fallback | -| `nvidia/minimaxai/minimax-m2.5` | MiniMax M2.5 | 196,608 | 8,192 | Deprecated, upgrade compatibility | -| `nvidia/z-ai/glm5` | GLM-5 | 202,752 | 8,192 | Deprecated, upgrade compatibility | +| Model ref | Name | Context | Max output | Notes | +| ------------------------------------------ | ---------------------------- | --------- | ---------- | --------------------------------- | +| `nvidia/nvidia/nemotron-3-ultra-550b-a55b` | NVIDIA Nemotron 3 Ultra 550B | 1,000,000 | 16,384 | Default | +| `nvidia/nvidia/nemotron-3-super-120b-a12b` | NVIDIA Nemotron 3 Super 120B | 262,144 | 8,192 | Featured fallback | +| `nvidia/moonshotai/kimi-k2.5` | Kimi K2.5 | 262,144 | 8,192 | Featured fallback | +| `nvidia/minimaxai/minimax-m2.7` | Minimax M2.7 | 196,608 | 8,192 | Featured fallback | +| `nvidia/z-ai/glm-5.1` | GLM 5.1 | 202,752 | 8,192 | Featured fallback | +| `nvidia/minimaxai/minimax-m2.5` | MiniMax M2.5 | 196,608 | 8,192 | Deprecated, upgrade compatibility | +| `nvidia/z-ai/glm5` | GLM-5 | 202,752 | 8,192 | Deprecated, upgrade compatibility | ## Advanced configuration @@ -97,9 +117,9 @@ malformed, OpenClaw falls back to the bundled catalog below. OpenClaw prefers NVIDIA's public featured-model catalog when NVIDIA auth is configured and caches it for 24 hours. The bundled fallback catalog is static - and keeps deprecated shipped refs for upgrade compatibility. Costs default to - `0` in source since NVIDIA currently offers free API access for the listed - models. + and keeps deprecated shipped refs for upgrade compatibility. Costs default + to `0` in source since NVIDIA currently offers free API access for the + listed models. @@ -107,6 +127,36 @@ malformed, OpenClaw falls back to the bundled catalog below. tooling should work out of the box with the NVIDIA base URL. + + NVIDIA's Ultra sample request uses `chat_template_kwargs.enable_thinking` + and `reasoning_budget` for reasoning output. OpenClaw's bundled Ultra row + disables template thinking by default for normal chat use. If you need to + opt into NVIDIA reasoning output or force other NVIDIA-specific request + fields, set per-model params and keep provider-specific overrides scoped to + the NVIDIA model: + + ```json5 + { + agents: { + defaults: { + models: { + "nvidia/nvidia/nemotron-3-ultra-550b-a55b": { + params: { + chat_template_kwargs: { enable_thinking: true }, + extra_body: { reasoning_budget: 16384 }, + }, + }, + }, + }, + }, + } + ``` + + `params.extra_body` is the final OpenAI-compatible request-body override, so + use it only for fields NVIDIA documents for the selected endpoint. + + + Some NVIDIA-hosted custom models can take longer than the default model idle watchdog before they emit a first response chunk. For custom NVIDIA provider diff --git a/extensions/nvidia/index.test.ts b/extensions/nvidia/index.test.ts index 94e959133b38..c62427b8ede1 100644 --- a/extensions/nvidia/index.test.ts +++ b/extensions/nvidia/index.test.ts @@ -201,6 +201,7 @@ describe("nvidia provider hooks", () => { const entries = await provider.augmentModelCatalog?.(buildAugmentCatalogContext()); expect(entries?.map((entry) => entry.id)).toEqual([ + "nvidia/nemotron-3-ultra-550b-a55b", "nvidia/nemotron-3-super-120b-a12b", "moonshotai/kimi-k2.5", "minimaxai/minimax-m2.7", @@ -219,6 +220,7 @@ describe("nvidia provider hooks", () => { const entries = await provider.augmentModelCatalog?.(buildAugmentCatalogContext("nvapi-test")); expect(entries?.map((entry) => entry.id)).toEqual([ + "nvidia/nemotron-3-ultra-550b-a55b", "nvidia/nemotron-3-super-120b-a12b", "moonshotai/kimi-k2.5", "minimaxai/minimax-m2.7", @@ -287,6 +289,7 @@ describe("nvidia provider hooks", () => { const staticRows = await catalogProvider?.staticCatalog?.(buildCatalogContext()); expect(staticRows?.map((entry) => `${entry.source}:${entry.provider}/${entry.model}`)).toEqual([ + "static:nvidia/nvidia/nemotron-3-ultra-550b-a55b", "static:nvidia/nvidia/nemotron-3-super-120b-a12b", "static:nvidia/moonshotai/kimi-k2.5", "static:nvidia/minimaxai/minimax-m2.7", diff --git a/extensions/nvidia/onboard.test.ts b/extensions/nvidia/onboard.test.ts index 61075e637e1c..b0523593eea7 100644 --- a/extensions/nvidia/onboard.test.ts +++ b/extensions/nvidia/onboard.test.ts @@ -15,6 +15,7 @@ describe("nvidia onboard", () => { expect(provider.baseUrl).toBe("https://integrate.api.nvidia.com/v1"); expect(provider.api).toBe("openai-completions"); expect(provider.models.map((model) => model.id)).toEqual([ + "nvidia/nemotron-3-ultra-550b-a55b", "nvidia/nemotron-3-super-120b-a12b", "moonshotai/kimi-k2.5", "minimaxai/minimax-m2.7", @@ -26,7 +27,7 @@ describe("nvidia onboard", () => { // form via preserveLiteralProviderPrefix. expectProviderOnboardPrimaryModel({ applyConfig: applyNvidiaConfig, - modelRef: "nvidia/nemotron-3-super-120b-a12b", + modelRef: "nvidia/nemotron-3-ultra-550b-a55b", }); }); @@ -42,6 +43,7 @@ describe("nvidia onboard", () => { }); expect(provider?.models.map((model) => model.id)).toEqual([ "nvidia/custom-model", + "nvidia/nemotron-3-ultra-550b-a55b", "nvidia/nemotron-3-super-120b-a12b", "moonshotai/kimi-k2.5", "minimaxai/minimax-m2.7", diff --git a/extensions/nvidia/openclaw.plugin.json b/extensions/nvidia/openclaw.plugin.json index ee8fbd1ffb5e..be01e77f4957 100644 --- a/extensions/nvidia/openclaw.plugin.json +++ b/extensions/nvidia/openclaw.plugin.json @@ -25,6 +25,22 @@ "baseUrl": "https://integrate.api.nvidia.com/v1", "api": "openai-completions", "models": [ + { + "id": "nvidia/nemotron-3-ultra-550b-a55b", + "name": "NVIDIA Nemotron 3 Ultra 550B", + "input": ["text"], + "contextWindow": 1000000, + "maxTokens": 16384, + "cost": { + "input": 0, + "output": 0, + "cacheRead": 0, + "cacheWrite": 0 + }, + "compat": { + "requiresStringContent": true + } + }, { "id": "nvidia/nemotron-3-super-120b-a12b", "name": "NVIDIA Nemotron 3 Super 120B", diff --git a/extensions/nvidia/provider-catalog.test.ts b/extensions/nvidia/provider-catalog.test.ts index a1a79a8b7338..42b80fcdbf9b 100644 --- a/extensions/nvidia/provider-catalog.test.ts +++ b/extensions/nvidia/provider-catalog.test.ts @@ -40,6 +40,7 @@ describe("nvidia provider catalog", () => { expect(provider.api).toBe("openai-completions"); expect(provider.apiKey).toBe("NVIDIA_API_KEY"); expect(provider.models.map((model) => model.id)).toEqual([ + "nvidia/nemotron-3-ultra-550b-a55b", "nvidia/nemotron-3-super-120b-a12b", "moonshotai/kimi-k2.5", "minimaxai/minimax-m2.7", @@ -50,6 +51,16 @@ describe("nvidia provider catalog", () => { expect(provider.models.filter((model) => model.compat?.requiresStringContent !== true)).toEqual( [], ); + expect(provider.models[0]).toMatchObject({ + contextWindow: 1_000_000, + maxTokens: 16_384, + params: { + chat_template_kwargs: { + enable_thinking: false, + force_nonempty_content: true, + }, + }, + }); }); it("promotes ranked models from NVIDIA's featured catalog", async () => { @@ -99,6 +110,7 @@ describe("nvidia provider catalog", () => { const provider = await buildLiveNvidiaProvider(); expect(provider.models.map((model) => model.id)).toEqual([ + "nvidia/nemotron-3-ultra-550b-a55b", "nvidia/nemotron-3-super-120b-a12b", "moonshotai/kimi-k2.5", "minimaxai/minimax-m2.7", @@ -227,4 +239,41 @@ describe("nvidia provider catalog", () => { expect(second.models.map((model) => model.id)).toEqual(["z-ai/glm-5.1"]); expect(ssrfRuntimeMocks.fetchWithSsrFGuard).toHaveBeenCalledTimes(2); }); + + it("applies bundled Ultra defaults when featured catalog returns Ultra", async () => { + mockFeaturedCatalogResponse({ + "featured-models": [ + { + model: "nemotron-3-ultra-550b-a55b", + "model-name": "Nemotron 3 Ultra 550B", + context: 1000000, + "max-output": 16384, + }, + { + model: "minimaxai/minimax-m2.7", + "model-name": "Minimax M2.7", + context: 196608, + "max-output": 8192, + }, + ], + }); + + const provider = await buildLiveNvidiaProvider(); + + expect(provider.models.map((model) => model.id)).toEqual([ + "nvidia/nemotron-3-ultra-550b-a55b", + "minimaxai/minimax-m2.7", + ]); + expect(provider.models[0]).toMatchObject({ + name: "Nemotron 3 Ultra 550B", + contextWindow: 1_000_000, + maxTokens: 16_384, + params: { + chat_template_kwargs: { + enable_thinking: false, + force_nonempty_content: true, + }, + }, + }); + }); }); diff --git a/extensions/nvidia/provider-catalog.ts b/extensions/nvidia/provider-catalog.ts index 4fe9cdcb9a98..473b9be33cb3 100644 --- a/extensions/nvidia/provider-catalog.ts +++ b/extensions/nvidia/provider-catalog.ts @@ -15,7 +15,7 @@ import { } from "openclaw/plugin-sdk/ssrf-runtime"; import manifest from "./openclaw.plugin.json" with { type: "json" }; -export const NVIDIA_DEFAULT_MODEL_ID = "nvidia/nemotron-3-super-120b-a12b"; +export const NVIDIA_DEFAULT_MODEL_ID = "nvidia/nemotron-3-ultra-550b-a55b"; export const NVIDIA_FEATURED_MODELS_URL = "https://assets.ngc.nvidia.com/products/api-catalog/featured-models.json"; @@ -32,6 +32,12 @@ const FEATURED_MODEL_COST = { cacheRead: 0, cacheWrite: 0, } as const; +const NVIDIA_ULTRA_DEFAULT_PARAMS = { + chat_template_kwargs: { + enable_thinking: false, + force_nonempty_content: true, + }, +} as const; type NvidiaFeaturedModel = { model: string; @@ -67,13 +73,17 @@ const lookupNvidiaFeaturedModelHostname = (async ( }) as LookupFn; export function buildNvidiaProvider(): ModelProviderConfig { - return { + const provider = { ...buildManifestModelProviderConfig({ providerId: "nvidia", catalog: manifest.modelCatalog.providers.nvidia, }), apiKey: "NVIDIA_API_KEY", }; + return { + ...provider, + models: applyNvidiaModelDefaults(provider.models ?? []), + }; } export async function buildLiveNvidiaProvider(): Promise { @@ -84,7 +94,7 @@ export async function buildLiveNvidiaProvider(): Promise { } return { ...provider, - models: featuredModels, + models: applyNvidiaModelDefaults(featuredModels), }; } @@ -99,7 +109,7 @@ export async function buildSelectableLiveNvidiaProvider(): Promise 0 ? models : null; } +function applyNvidiaModelDefaults(models: ModelDefinitionConfig[]): ModelDefinitionConfig[] { + return models.map((model) => + model.id === NVIDIA_DEFAULT_MODEL_ID + ? { + ...model, + params: { + ...model.params, + chat_template_kwargs: { + ...NVIDIA_ULTRA_DEFAULT_PARAMS.chat_template_kwargs, + ...(isRecord(model.params?.chat_template_kwargs) + ? model.params.chat_template_kwargs + : {}), + }, + }, + } + : model, + ); +} + +function isRecord(value: unknown): value is Record { + return Boolean(value) && typeof value === "object" && !Array.isArray(value); +} + function parseNvidiaFeaturedModel(row: unknown): ModelDefinitionConfig | null { if (!row || typeof row !== "object") { return null; diff --git a/src/commands/model-picker.test.ts b/src/commands/model-picker.test.ts index 0cbdf97bd88c..bb1ac41e687b 100644 --- a/src/commands/model-picker.test.ts +++ b/src/commands/model-picker.test.ts @@ -901,6 +901,52 @@ describe("promptDefaultModel", () => { ]); }); + it("preselects the first live provider row when keep-current is disabled", async () => { + loadPreferredProviderPickerCatalog.mockResolvedValue([ + { + provider: "nvidia", + id: "z-ai/glm-5.1", + name: "GLM 5.1", + }, + { + provider: "nvidia", + id: "nvidia/nemotron-3-super-120b-a12b", + name: "NVIDIA Nemotron 3 Super 120B", + }, + ]); + const select = vi.fn(async (params) => params.initialValue as never); + const prompter = makePrompter({ select }); + const config = { + agents: { + defaults: { + model: "nvidia/nemotron-3-ultra-550b-a55b", + }, + }, + } as OpenClawConfig; + + const result = await promptDefaultModel({ + config, + prompter, + allowKeep: false, + includeManual: false, + ignoreAllowlist: true, + preferredProvider: "nvidia", + browseCatalogOnDemand: true, + }); + + expect(result.model).toBe("nvidia/z-ai/glm-5.1"); + expect(pickerParams(select as MockCallSource).initialValue).toBe("nvidia/z-ai/glm-5.1"); + expect(optionValues(pickerOptions(select as MockCallSource))).toEqual([ + "nvidia/z-ai/glm-5.1", + "nvidia/nemotron-3-super-120b-a12b", + "nvidia/nemotron-3-ultra-550b-a55b", + ]); + expect( + requireOption(pickerOptions(select as MockCallSource), "nvidia/nemotron-3-ultra-550b-a55b") + .hint, + ).toBe("current (not in catalog)"); + }); + it("keeps on-demand NVIDIA vendor labels single-prefixed after browsing", async () => { loadPreferredProviderPickerCatalog.mockResolvedValue([ { diff --git a/src/flows/model-picker.ts b/src/flows/model-picker.ts index 319a86bb6fb9..9bcdb28c28b3 100644 --- a/src/flows/model-picker.ts +++ b/src/flows/model-picker.ts @@ -293,6 +293,11 @@ async function resolveLiteralPrefixProviderIds(params: { return ids; } +function modelCatalogEntryKey(entry: { provider: string; id: string }): string { + const normalizedRef = normalizeModelRef(entry.provider, entry.id); + return modelKey(normalizedRef.provider, normalizedRef.model); +} + async function addModelSelectOption(params: { entry: { provider: string; @@ -309,7 +314,7 @@ async function addModelSelectOption(params: { isVisibleProvider: (provider: string) => boolean; }) { const normalizedRef = normalizeModelRef(params.entry.provider, params.entry.id); - const key = modelKey(normalizedRef.provider, normalizedRef.model); + const key = modelCatalogEntryKey(params.entry); if ( params.seen.has(key) || HIDDEN_ROUTER_MODELS.has(key) || @@ -917,17 +922,23 @@ export async function promptDefaultModel( }); } + const firstPreferredModel = + preferredProvider && hasPreferredProvider + ? filteredModels.find((entry) => matchesPreferredProvider?.(entry.provider)) + : undefined; + const firstPreferredModelKey = firstPreferredModel + ? modelCatalogEntryKey(firstPreferredModel) + : undefined; let initialValue: string | undefined = allowKeep ? KEEP_VALUE : configuredKey || undefined; - if ( + if (!allowKeep && firstPreferredModelKey) { + initialValue = firstPreferredModelKey; + } else if ( allowKeep && - hasPreferredProvider && + firstPreferredModelKey && preferredProvider && !matchesPreferredProvider?.(resolved.provider) ) { - const firstModel = filteredModels[0]; - if (firstModel) { - initialValue = modelKey(firstModel.provider, firstModel.id); - } + initialValue = firstPreferredModelKey; } const selection = await params.prompter.select({