feat(nvidia): default to nemotron ultra

Add NVIDIA Nemotron 3 Ultra to the bundled NVIDIA provider catalog and make it the bundled fallback default. Keep successful NVIDIA featured-model feeds authoritative, while treating the first live feed row as the setup default option. Update NVIDIA provider docs and focused provider/model-picker tests for the new Ultra behavior. Verification: - node scripts/run-vitest.mjs src/commands/model-picker.test.ts extensions/nvidia/provider-catalog.test.ts extensions/nvidia/index.test.ts extensions/nvidia/onboard.test.ts - pnpm exec oxfmt --check src/flows/model-picker.ts src/commands/model-picker.test.ts - pnpm format:docs:check - pnpm docs:check-mdx - git diff --check upstream/main...HEAD - .agents/skills/autoreview/scripts/autoreview --mode branch --base upstream/main --parallel-tests "node scripts/run-vitest.mjs src/commands/model-picker.test.ts extensions/nvidia/provider-catalog.test.ts extensions/nvidia/index.test.ts extensions/nvidia/onboard.test.ts"
2026-06-06 05:51:15 +08:00 · 2026-06-04 20:13:06 +01:00
parent 8048ceca71
commit 829847292e
9 changed files with 239 additions and 29 deletions
--- a/docs/concepts/model-providers.md
+++ b/docs/concepts/model-providers.md
@@ -306,7 +306,7 @@ See [/providers/kilocode](/providers/kilocode) for setup details.
 | MiniMax                                 | `minimax` / `minimax-portal`     | `MINIMAX_API_KEY` / `MINIMAX_OAUTH_TOKEN`                    | `minimax/MiniMax-M3`                                       |
 | Mistral                                 | `mistral`                        | `MISTRAL_API_KEY`                                            | `mistral/mistral-large-latest`                             |
 | Moonshot                                | `moonshot`                       | `MOONSHOT_API_KEY`                                           | `moonshot/kimi-k2.6`                                       |
-| NVIDIA                                  | `nvidia`                         | `NVIDIA_API_KEY`                                             | `nvidia/nvidia/nemotron-3-super-120b-a12b`                 |
+| NVIDIA                                  | `nvidia`                         | `NVIDIA_API_KEY`                                             | `nvidia/nvidia/nemotron-3-ultra-550b-a55b`                 |
 | NovitaAI                                | `novita`                         | `NOVITA_API_KEY`                                             | `novita/deepseek/deepseek-v3-0324`                         |
 | [Ollama Cloud](/providers/ollama-cloud) | `ollama-cloud`                   | `OLLAMA_API_KEY`                                             | `ollama-cloud/kimi-k2.6`                                   |
 | OpenRouter                              | `openrouter`                     | `OPENROUTER_API_KEY`                                         | `openrouter/auto`                                          |
--- a/docs/providers/nvidia.md
+++ b/docs/providers/nvidia.md
@@ -3,12 +3,15 @@ summary: "Use NVIDIA's OpenAI-compatible API in OpenClaw"
 read_when:
  - You want to use open models in OpenClaw for free
  - You need NVIDIA_API_KEY setup
+  - You want to use Nemotron 3 Ultra through NVIDIA
 title: "NVIDIA"
 ---

 NVIDIA provides an OpenAI-compatible API at `https://integrate.api.nvidia.com/v1` for
 open models for free. Authenticate with an API key from
-[build.nvidia.com](https://build.nvidia.com/settings/api-keys).
+[build.nvidia.com](https://build.nvidia.com/settings/api-keys). OpenClaw
+defaults the NVIDIA provider to Nemotron 3 Ultra, NVIDIA's 550B total / 55B
+active reasoning model for long-context agentic work.

 ## Getting started

@@ -24,7 +27,7 @@ open models for free. Authenticate with an API key from
  </Step>
  <Step title="Set an NVIDIA model">
    ```bash
-    openclaw models set nvidia/nvidia/nemotron-3-super-120b-a12b
+    openclaw models set nvidia/nvidia/nemotron-3-ultra-550b-a55b
    ```
  </Step>
 </Steps>
@@ -56,7 +59,7 @@ openclaw onboard --auth-choice nvidia-api-key --nvidia-api-key "nvapi-..."
  },
  agents: {
    defaults: {
-      model: { primary: "nvidia/nvidia/nemotron-3-super-120b-a12b" },
+      model: { primary: "nvidia/nvidia/nemotron-3-ultra-550b-a55b" },
    },
  },
 }
@@ -69,22 +72,39 @@ try NVIDIA's public featured-model catalog from
 `https://assets.ngc.nvidia.com/products/api-catalog/featured-models.json` and
 caches the ranked result for 24 hours. New featured models from build.nvidia.com
 therefore appear in setup and model-selection surfaces without waiting for an
-OpenClaw release.
+OpenClaw release. When the live feed is available, the first returned model is
+the default option shown during NVIDIA setup.

 The fetch uses a fixed HTTPS host policy for `assets.ngc.nvidia.com`. If no
 NVIDIA API key is configured, or if that public catalog is unavailable or
-malformed, OpenClaw falls back to the bundled catalog below.
+malformed, OpenClaw falls back to the bundled catalog and bundled default below.
+
+## Nemotron 3 Ultra
+
+Nemotron 3 Ultra is the default NVIDIA model in OpenClaw. NVIDIA's build page for
+[`nvidia/nemotron-3-ultra-550b-a55b`](https://build.nvidia.com/nvidia/nemotron-3-ultra-550b-a55b)
+lists it as an available free endpoint with a 1M-token context specification.
+The bundled catalog records a 16,384-token max output to match NVIDIA's current
+OpenAI-compatible sample request for the hosted endpoint.
+
+Use Ultra for the highest-capability NVIDIA default. Keep Super selected when
+you want the smaller Nemotron 3 option, or choose one of the third-party models
+hosted in NVIDIA's catalog when their context, latency, or behavior fits better.
+The bundled Ultra row sends `chat_template_kwargs.enable_thinking: false` and
+`force_nonempty_content: true` by default so normal chat output stays in the
+visible answer instead of exposing reasoning text.

 ## Bundled fallback catalog

-| Model ref                                  | Name                         | Context | Max output | Notes                             |
-| ------------------------------------------ | ---------------------------- | ------- | ---------- | --------------------------------- |
-| `nvidia/nvidia/nemotron-3-super-120b-a12b` | NVIDIA Nemotron 3 Super 120B | 262,144 | 8,192      | Featured fallback                 |
-| `nvidia/moonshotai/kimi-k2.5`              | Kimi K2.5                    | 262,144 | 8,192      | Featured fallback                 |
-| `nvidia/minimaxai/minimax-m2.7`            | Minimax M2.7                 | 196,608 | 8,192      | Featured fallback                 |
-| `nvidia/z-ai/glm-5.1`                      | GLM 5.1                      | 202,752 | 8,192      | Featured fallback                 |
-| `nvidia/minimaxai/minimax-m2.5`            | MiniMax M2.5                 | 196,608 | 8,192      | Deprecated, upgrade compatibility |
-| `nvidia/z-ai/glm5`                         | GLM-5                        | 202,752 | 8,192      | Deprecated, upgrade compatibility |
+| Model ref                                  | Name                         | Context   | Max output | Notes                             |
+| ------------------------------------------ | ---------------------------- | --------- | ---------- | --------------------------------- |
+| `nvidia/nvidia/nemotron-3-ultra-550b-a55b` | NVIDIA Nemotron 3 Ultra 550B | 1,000,000 | 16,384     | Default                           |
+| `nvidia/nvidia/nemotron-3-super-120b-a12b` | NVIDIA Nemotron 3 Super 120B | 262,144   | 8,192      | Featured fallback                 |
+| `nvidia/moonshotai/kimi-k2.5`              | Kimi K2.5                    | 262,144   | 8,192      | Featured fallback                 |
+| `nvidia/minimaxai/minimax-m2.7`            | Minimax M2.7                 | 196,608   | 8,192      | Featured fallback                 |
+| `nvidia/z-ai/glm-5.1`                      | GLM 5.1                      | 202,752   | 8,192      | Featured fallback                 |
+| `nvidia/minimaxai/minimax-m2.5`            | MiniMax M2.5                 | 196,608   | 8,192      | Deprecated, upgrade compatibility |
+| `nvidia/z-ai/glm5`                         | GLM-5                        | 202,752   | 8,192      | Deprecated, upgrade compatibility |

 ## Advanced configuration

@@ -97,9 +117,9 @@ malformed, OpenClaw falls back to the bundled catalog below.
  <Accordion title="Catalog and pricing">
    OpenClaw prefers NVIDIA's public featured-model catalog when NVIDIA auth is
    configured and caches it for 24 hours. The bundled fallback catalog is static
-    and keeps deprecated shipped refs for upgrade compatibility. Costs default to
-    `0` in source since NVIDIA currently offers free API access for the listed
-    models.
+    and keeps deprecated shipped refs for upgrade compatibility. Costs default
+    to `0` in source since NVIDIA currently offers free API access for the
+    listed models.
  </Accordion>

  <Accordion title="OpenAI-compatible endpoint">
@@ -107,6 +127,36 @@ malformed, OpenClaw falls back to the bundled catalog below.
    tooling should work out of the box with the NVIDIA base URL.
  </Accordion>

+  <Accordion title="Nemotron 3 Ultra reasoning params">
+    NVIDIA's Ultra sample request uses `chat_template_kwargs.enable_thinking`
+    and `reasoning_budget` for reasoning output. OpenClaw's bundled Ultra row
+    disables template thinking by default for normal chat use. If you need to
+    opt into NVIDIA reasoning output or force other NVIDIA-specific request
+    fields, set per-model params and keep provider-specific overrides scoped to
+    the NVIDIA model:
+
+    ```json5
+    {
+      agents: {
+        defaults: {
+          models: {
+            "nvidia/nvidia/nemotron-3-ultra-550b-a55b": {
+              params: {
+                chat_template_kwargs: { enable_thinking: true },
+                extra_body: { reasoning_budget: 16384 },
+              },
+            },
+          },
+        },
+      },
+    }
+    ```
+
+    `params.extra_body` is the final OpenAI-compatible request-body override, so
+    use it only for fields NVIDIA documents for the selected endpoint.
+
+  </Accordion>
+
  <Accordion title="Slow custom provider responses">
    Some NVIDIA-hosted custom models can take longer than the default model idle
    watchdog before they emit a first response chunk. For custom NVIDIA provider
--- a/extensions/nvidia/index.test.ts
+++ b/extensions/nvidia/index.test.ts
@@ -201,6 +201,7 @@ describe("nvidia provider hooks", () => {
    const entries = await provider.augmentModelCatalog?.(buildAugmentCatalogContext());

    expect(entries?.map((entry) => entry.id)).toEqual([
+      "nvidia/nemotron-3-ultra-550b-a55b",
      "nvidia/nemotron-3-super-120b-a12b",
      "moonshotai/kimi-k2.5",
      "minimaxai/minimax-m2.7",
@@ -219,6 +220,7 @@ describe("nvidia provider hooks", () => {
    const entries = await provider.augmentModelCatalog?.(buildAugmentCatalogContext("nvapi-test"));

    expect(entries?.map((entry) => entry.id)).toEqual([
+      "nvidia/nemotron-3-ultra-550b-a55b",
      "nvidia/nemotron-3-super-120b-a12b",
      "moonshotai/kimi-k2.5",
      "minimaxai/minimax-m2.7",
@@ -287,6 +289,7 @@ describe("nvidia provider hooks", () => {

    const staticRows = await catalogProvider?.staticCatalog?.(buildCatalogContext());
    expect(staticRows?.map((entry) => `${entry.source}:${entry.provider}/${entry.model}`)).toEqual([
+      "static:nvidia/nvidia/nemotron-3-ultra-550b-a55b",
      "static:nvidia/nvidia/nemotron-3-super-120b-a12b",
      "static:nvidia/moonshotai/kimi-k2.5",
      "static:nvidia/minimaxai/minimax-m2.7",
--- a/extensions/nvidia/onboard.test.ts
+++ b/extensions/nvidia/onboard.test.ts
@@ -15,6 +15,7 @@ describe("nvidia onboard", () => {
    expect(provider.baseUrl).toBe("https://integrate.api.nvidia.com/v1");
    expect(provider.api).toBe("openai-completions");
    expect(provider.models.map((model) => model.id)).toEqual([
+      "nvidia/nemotron-3-ultra-550b-a55b",
      "nvidia/nemotron-3-super-120b-a12b",
      "moonshotai/kimi-k2.5",
      "minimaxai/minimax-m2.7",
@@ -26,7 +27,7 @@ describe("nvidia onboard", () => {
    // form via preserveLiteralProviderPrefix.
    expectProviderOnboardPrimaryModel({
      applyConfig: applyNvidiaConfig,
-      modelRef: "nvidia/nemotron-3-super-120b-a12b",
+      modelRef: "nvidia/nemotron-3-ultra-550b-a55b",
    });
  });

@@ -42,6 +43,7 @@ describe("nvidia onboard", () => {
    });
    expect(provider?.models.map((model) => model.id)).toEqual([
      "nvidia/custom-model",
+      "nvidia/nemotron-3-ultra-550b-a55b",
      "nvidia/nemotron-3-super-120b-a12b",
      "moonshotai/kimi-k2.5",
      "minimaxai/minimax-m2.7",
--- a/extensions/nvidia/openclaw.plugin.json
+++ b/extensions/nvidia/openclaw.plugin.json
@@ -25,6 +25,22 @@
        "baseUrl": "https://integrate.api.nvidia.com/v1",
        "api": "openai-completions",
        "models": [
+          {
+            "id": "nvidia/nemotron-3-ultra-550b-a55b",
+            "name": "NVIDIA Nemotron 3 Ultra 550B",
+            "input": ["text"],
+            "contextWindow": 1000000,
+            "maxTokens": 16384,
+            "cost": {
+              "input": 0,
+              "output": 0,
+              "cacheRead": 0,
+              "cacheWrite": 0
+            },
+            "compat": {
+              "requiresStringContent": true
+            }
+          },
          {
            "id": "nvidia/nemotron-3-super-120b-a12b",
            "name": "NVIDIA Nemotron 3 Super 120B",
--- a/extensions/nvidia/provider-catalog.test.ts
+++ b/extensions/nvidia/provider-catalog.test.ts
@@ -40,6 +40,7 @@ describe("nvidia provider catalog", () => {
    expect(provider.api).toBe("openai-completions");
    expect(provider.apiKey).toBe("NVIDIA_API_KEY");
    expect(provider.models.map((model) => model.id)).toEqual([
+      "nvidia/nemotron-3-ultra-550b-a55b",
      "nvidia/nemotron-3-super-120b-a12b",
      "moonshotai/kimi-k2.5",
      "minimaxai/minimax-m2.7",
@@ -50,6 +51,16 @@ describe("nvidia provider catalog", () => {
    expect(provider.models.filter((model) => model.compat?.requiresStringContent !== true)).toEqual(
      [],
    );
+    expect(provider.models[0]).toMatchObject({
+      contextWindow: 1_000_000,
+      maxTokens: 16_384,
+      params: {
+        chat_template_kwargs: {
+          enable_thinking: false,
+          force_nonempty_content: true,
+        },
+      },
+    });
  });

  it("promotes ranked models from NVIDIA's featured catalog", async () => {
@@ -99,6 +110,7 @@ describe("nvidia provider catalog", () => {
    const provider = await buildLiveNvidiaProvider();

    expect(provider.models.map((model) => model.id)).toEqual([
+      "nvidia/nemotron-3-ultra-550b-a55b",
      "nvidia/nemotron-3-super-120b-a12b",
      "moonshotai/kimi-k2.5",
      "minimaxai/minimax-m2.7",
@@ -227,4 +239,41 @@ describe("nvidia provider catalog", () => {
    expect(second.models.map((model) => model.id)).toEqual(["z-ai/glm-5.1"]);
    expect(ssrfRuntimeMocks.fetchWithSsrFGuard).toHaveBeenCalledTimes(2);
  });
+
+  it("applies bundled Ultra defaults when featured catalog returns Ultra", async () => {
+    mockFeaturedCatalogResponse({
+      "featured-models": [
+        {
+          model: "nemotron-3-ultra-550b-a55b",
+          "model-name": "Nemotron 3 Ultra 550B",
+          context: 1000000,
+          "max-output": 16384,
+        },
+        {
+          model: "minimaxai/minimax-m2.7",
+          "model-name": "Minimax M2.7",
+          context: 196608,
+          "max-output": 8192,
+        },
+      ],
+    });
+
+    const provider = await buildLiveNvidiaProvider();
+
+    expect(provider.models.map((model) => model.id)).toEqual([
+      "nvidia/nemotron-3-ultra-550b-a55b",
+      "minimaxai/minimax-m2.7",
+    ]);
+    expect(provider.models[0]).toMatchObject({
+      name: "Nemotron 3 Ultra 550B",
+      contextWindow: 1_000_000,
+      maxTokens: 16_384,
+      params: {
+        chat_template_kwargs: {
+          enable_thinking: false,
+          force_nonempty_content: true,
+        },
+      },
+    });
+  });
 });
--- a/extensions/nvidia/provider-catalog.ts
+++ b/extensions/nvidia/provider-catalog.ts
@@ -15,7 +15,7 @@ import {
 } from "openclaw/plugin-sdk/ssrf-runtime";
 import manifest from "./openclaw.plugin.json" with { type: "json" };

-export const NVIDIA_DEFAULT_MODEL_ID = "nvidia/nemotron-3-super-120b-a12b";
+export const NVIDIA_DEFAULT_MODEL_ID = "nvidia/nemotron-3-ultra-550b-a55b";
 export const NVIDIA_FEATURED_MODELS_URL =
  "https://assets.ngc.nvidia.com/products/api-catalog/featured-models.json";

@@ -32,6 +32,12 @@ const FEATURED_MODEL_COST = {
  cacheRead: 0,
  cacheWrite: 0,
 } as const;
+const NVIDIA_ULTRA_DEFAULT_PARAMS = {
+  chat_template_kwargs: {
+    enable_thinking: false,
+    force_nonempty_content: true,
+  },
+} as const;

 type NvidiaFeaturedModel = {
  model: string;
@@ -67,13 +73,17 @@ const lookupNvidiaFeaturedModelHostname = (async (
 }) as LookupFn;

 export function buildNvidiaProvider(): ModelProviderConfig {
-  return {
+  const provider = {
    ...buildManifestModelProviderConfig({
      providerId: "nvidia",
      catalog: manifest.modelCatalog.providers.nvidia,
    }),
    apiKey: "NVIDIA_API_KEY",
  };
+  return {
+    ...provider,
+    models: applyNvidiaModelDefaults(provider.models ?? []),
+  };
 }

 export async function buildLiveNvidiaProvider(): Promise<ModelProviderConfig> {
@@ -84,7 +94,7 @@ export async function buildLiveNvidiaProvider(): Promise<ModelProviderConfig> {
  }
  return {
    ...provider,
-    models: featuredModels,
+    models: applyNvidiaModelDefaults(featuredModels),
  };
 }

@@ -99,7 +109,7 @@ export async function buildSelectableLiveNvidiaProvider(): Promise<ModelProvider
  }
  return {
    ...provider,
-    models: featuredModels,
+    models: applyNvidiaModelDefaults(featuredModels),
  };
 }

@@ -178,6 +188,29 @@ function parseNvidiaFeaturedModels(payload: unknown): ModelDefinitionConfig[] |
  return models.length > 0 ? models : null;
 }

+function applyNvidiaModelDefaults(models: ModelDefinitionConfig[]): ModelDefinitionConfig[] {
+  return models.map((model) =>
+    model.id === NVIDIA_DEFAULT_MODEL_ID
+      ? {
+          ...model,
+          params: {
+            ...model.params,
+            chat_template_kwargs: {
+              ...NVIDIA_ULTRA_DEFAULT_PARAMS.chat_template_kwargs,
+              ...(isRecord(model.params?.chat_template_kwargs)
+                ? model.params.chat_template_kwargs
+                : {}),
+            },
+          },
+        }
+      : model,
+  );
+}
+
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return Boolean(value) && typeof value === "object" && !Array.isArray(value);
+}
+
 function parseNvidiaFeaturedModel(row: unknown): ModelDefinitionConfig | null {
  if (!row || typeof row !== "object") {
    return null;
--- a/src/commands/model-picker.test.ts
+++ b/src/commands/model-picker.test.ts
@@ -901,6 +901,52 @@ describe("promptDefaultModel", () => {
    ]);
  });

+  it("preselects the first live provider row when keep-current is disabled", async () => {
+    loadPreferredProviderPickerCatalog.mockResolvedValue([
+      {
+        provider: "nvidia",
+        id: "z-ai/glm-5.1",
+        name: "GLM 5.1",
+      },
+      {
+        provider: "nvidia",
+        id: "nvidia/nemotron-3-super-120b-a12b",
+        name: "NVIDIA Nemotron 3 Super 120B",
+      },
+    ]);
+    const select = vi.fn(async (params) => params.initialValue as never);
+    const prompter = makePrompter({ select });
+    const config = {
+      agents: {
+        defaults: {
+          model: "nvidia/nemotron-3-ultra-550b-a55b",
+        },
+      },
+    } as OpenClawConfig;
+
+    const result = await promptDefaultModel({
+      config,
+      prompter,
+      allowKeep: false,
+      includeManual: false,
+      ignoreAllowlist: true,
+      preferredProvider: "nvidia",
+      browseCatalogOnDemand: true,
+    });
+
+    expect(result.model).toBe("nvidia/z-ai/glm-5.1");
+    expect(pickerParams(select as MockCallSource).initialValue).toBe("nvidia/z-ai/glm-5.1");
+    expect(optionValues(pickerOptions(select as MockCallSource))).toEqual([
+      "nvidia/z-ai/glm-5.1",
+      "nvidia/nemotron-3-super-120b-a12b",
+      "nvidia/nemotron-3-ultra-550b-a55b",
+    ]);
+    expect(
+      requireOption(pickerOptions(select as MockCallSource), "nvidia/nemotron-3-ultra-550b-a55b")
+        .hint,
+    ).toBe("current (not in catalog)");
+  });
+
  it("keeps on-demand NVIDIA vendor labels single-prefixed after browsing", async () => {
    loadPreferredProviderPickerCatalog.mockResolvedValue([
      {
--- a/src/flows/model-picker.ts
+++ b/src/flows/model-picker.ts
@@ -293,6 +293,11 @@ async function resolveLiteralPrefixProviderIds(params: {
  return ids;
 }

+function modelCatalogEntryKey(entry: { provider: string; id: string }): string {
+  const normalizedRef = normalizeModelRef(entry.provider, entry.id);
+  return modelKey(normalizedRef.provider, normalizedRef.model);
+}
+
 async function addModelSelectOption(params: {
  entry: {
    provider: string;
@@ -309,7 +314,7 @@ async function addModelSelectOption(params: {
  isVisibleProvider: (provider: string) => boolean;
 }) {
  const normalizedRef = normalizeModelRef(params.entry.provider, params.entry.id);
-  const key = modelKey(normalizedRef.provider, normalizedRef.model);
+  const key = modelCatalogEntryKey(params.entry);
  if (
    params.seen.has(key) ||
    HIDDEN_ROUTER_MODELS.has(key) ||
@@ -917,17 +922,23 @@ export async function promptDefaultModel(
    });
  }

+  const firstPreferredModel =
+    preferredProvider && hasPreferredProvider
+      ? filteredModels.find((entry) => matchesPreferredProvider?.(entry.provider))
+      : undefined;
+  const firstPreferredModelKey = firstPreferredModel
+    ? modelCatalogEntryKey(firstPreferredModel)
+    : undefined;
  let initialValue: string | undefined = allowKeep ? KEEP_VALUE : configuredKey || undefined;
-  if (
+  if (!allowKeep && firstPreferredModelKey) {
+    initialValue = firstPreferredModelKey;
+  } else if (
    allowKeep &&
-    hasPreferredProvider &&
+    firstPreferredModelKey &&
    preferredProvider &&
    !matchesPreferredProvider?.(resolved.provider)
  ) {
-    const firstModel = filteredModels[0];
-    if (firstModel) {
-      initialValue = modelKey(firstModel.provider, firstModel.id);
-    }
+    initialValue = firstPreferredModelKey;
  }

  const selection = await params.prompter.select({