fix(memory): move local embeddings to llama.cpp provider

2026-06-06 05:51:15 +08:00 · 2026-06-04 16:57:17 +08:00
parent ec47d1cdd5
commit dcf6f66d56
33 changed files with 3432 additions and 179 deletions
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -301,6 +301,12 @@
  - changed-files:
      - any-glob-to-any-file:
          - "extensions/memory-lancedb/**"
+"extensions: llama-cpp":
+  - changed-files:
+      - any-glob-to-any-file:
+          - "extensions/llama-cpp/**"
+          - "docs/plugins/llama-cpp.md"
+          - "docs/plugins/reference/llama-cpp.md"
 "extensions: memory-wiki":
  - changed-files:
      - any-glob-to-any-file:
--- a/docs/concepts/memory-builtin.md
+++ b/docs/concepts/memory-builtin.md
@@ -39,9 +39,12 @@ To set a provider explicitly:

 Without an embedding provider, only keyword search is available.

-To force the built-in local embedding provider, install the optional
-`node-llama-cpp` runtime package next to OpenClaw, then point `local.modelPath`
-at a GGUF file:
+To force local GGUF embeddings, install the official llama.cpp provider plugin,
+then point `local.modelPath` at a GGUF file:
+
+```bash
+openclaw plugins install @openclaw/llama-cpp-provider
+```

 ```json5
 {
@@ -67,7 +70,7 @@ at a GGUF file:
 | DeepInfra         | `deepinfra`         | Default: `BAAI/bge-m3`              |
 | Gemini            | `gemini`            | Supports multimodal (image + audio) |
 | GitHub Copilot    | `github-copilot`    | Uses Copilot subscription           |
-| Local             | `local`             | Optional `node-llama-cpp` runtime   |
+| Local             | `local`             | `@openclaw/llama-cpp-provider`      |
 | Mistral           | `mistral`           |                                     |
 | Ollama            | `ollama`            | Local/self-hosted                   |
 | OpenAI            | `openai`            | Default: `text-embedding-3-small`   |
--- a/docs/concepts/memory-qmd.md
+++ b/docs/concepts/memory-qmd.md
@@ -15,7 +15,7 @@ binary, and can index content beyond your workspace memory files.
 - **Reranking and query expansion** for better recall.
 - **Index extra directories** -- project docs, team notes, anything on disk.
 - **Index session transcripts** -- recall earlier conversations.
- **Fully local** -- runs with the optional node-llama-cpp runtime package and
+- **Fully local** -- runs with the official llama.cpp provider plugin and
  auto-downloads GGUF models.
 - **Automatic fallback** -- if QMD is unavailable, OpenClaw falls back to the
  builtin engine seamlessly.
--- a/docs/concepts/memory-search.md
+++ b/docs/concepts/memory-search.md
@@ -32,8 +32,9 @@ For multi-endpoint setups with memory-specific providers, `provider` can also
 be a custom `models.providers.<id>` entry, such as `ollama-5080`, when that
 provider sets `api: "ollama"` or another memory embedding adapter owner.

-For local embeddings with no API key, set `provider: "local"`. Source checkouts
-may still require native build approval: `pnpm approve-builds` then
+For local embeddings with no API key, install
+`@openclaw/llama-cpp-provider` and set `provider: "local"`. Source checkouts may
+still require native build approval: `pnpm approve-builds` then
 `pnpm rebuild node-llama-cpp`.

 Some OpenAI-compatible embedding endpoints require asymmetric labels such as
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -1242,6 +1242,7 @@
                  "plugins/voice-call",
                  "plugins/memory-wiki",
                  "plugins/memory-lancedb",
+                  "plugins/llama-cpp",
                  "plugins/oc-path",
                  "plugins/zalouser"
                ]
--- a/docs/plugins/llama-cpp.md
+++ b/docs/plugins/llama-cpp.md
@@ -0,0 +1,58 @@
+---
+summary: "Install the official llama.cpp provider for local GGUF memory embeddings"
+read_when:
+  - You want memory search embeddings from a local GGUF model
+  - You are configuring memorySearch.provider = "local"
+  - You need the node-llama-cpp runtime dependency
+title: "llama.cpp Provider"
+sidebarTitle: "llama.cpp Provider"
+---
+
+`llama-cpp` is the official external provider plugin for local GGUF embeddings.
+It owns the `node-llama-cpp` runtime dependency used by `memorySearch.provider:
+"local"`.
+
+Install it before using local memory embeddings:
+
+```bash
+openclaw plugins install @openclaw/llama-cpp-provider
+```
+
+The main `openclaw` npm package does not include `node-llama-cpp`. Keeping the
+native dependency in this plugin prevents normal OpenClaw npm updates from
+deleting a manually installed runtime inside the OpenClaw package directory.
+
+## Configuration
+
+Set the memory search provider to `local`:
+
+```json5
+{
+  agents: {
+    defaults: {
+      memorySearch: {
+        provider: "local",
+        local: {
+          modelPath: "hf:ggml-org/embeddinggemma-300m-qat-q8_0-GGUF/embeddinggemma-300m-qat-Q8_0.gguf",
+        },
+      },
+    },
+  },
+}
+```
+
+The default model is `embeddinggemma-300m-qat-Q8_0.gguf`. You can also point
+`local.modelPath` at a local `.gguf` file.
+
+## Native Runtime
+
+Use Node 24 for the smoothest native install path. Source checkouts using pnpm
+may need to approve and rebuild the native dependency:
+
+```bash
+pnpm approve-builds
+pnpm rebuild node-llama-cpp
+```
+
+For lower-friction local embeddings, use a local service provider such as
+Ollama or LM Studio instead.
--- a/docs/plugins/plugin-inventory.md
+++ b/docs/plugins/plugin-inventory.md
@@ -93,7 +93,7 @@ commands.
 | [llm-task](/plugins/reference/llm-task)                           | Generic JSON-only LLM tool for structured tasks callable from workflows.                                                                                             | `@openclaw/llm-task`<br />included in OpenClaw                       | contracts: tools                                                                                                                                                                                                                                                 |
 | [lmstudio](/plugins/reference/lmstudio)                           | Adds LM Studio model provider support to OpenClaw.                                                                                                                   | `@openclaw/lmstudio-provider`<br />included in OpenClaw              | providers: lmstudio; contracts: memoryEmbeddingProviders                                                                                                                                                                                                         |
 | [mattermost](/plugins/reference/mattermost)                       | Adds the Mattermost channel surface for sending and receiving OpenClaw messages.                                                                                     | `@openclaw/mattermost`<br />included in OpenClaw                     | channels: mattermost                                                                                                                                                                                                                                             |
-| [memory-core](/plugins/reference/memory-core)                     | Adds memory embedding provider support. Adds agent-callable tools.                                                                                                   | `@openclaw/memory-core`<br />included in OpenClaw                    | contracts: memoryEmbeddingProviders, tools                                                                                                                                                                                                                       |
+| [memory-core](/plugins/reference/memory-core)                     | Adds agent-callable tools.                                                                                                                                           | `@openclaw/memory-core`<br />included in OpenClaw                    | contracts: tools                                                                                                                                                                                                                                                 |
 | [memory-wiki](/plugins/reference/memory-wiki)                     | Persistent wiki compiler and Obsidian-friendly knowledge vault for OpenClaw.                                                                                         | `@openclaw/memory-wiki`<br />included in OpenClaw                    | contracts: tools; skills                                                                                                                                                                                                                                         |
 | [microsoft](/plugins/reference/microsoft)                         | Adds text-to-speech provider support.                                                                                                                                | `@openclaw/microsoft-speech`<br />included in OpenClaw               | contracts: speechProviders                                                                                                                                                                                                                                       |
 | [microsoft-foundry](/plugins/reference/microsoft-foundry)         | Adds Microsoft Foundry model provider support to OpenClaw.                                                                                                           | `@openclaw/microsoft-foundry`<br />included in OpenClaw              | providers: microsoft-foundry                                                                                                                                                                                                                                     |
@@ -161,6 +161,7 @@ commands.
 | [google-meet](/plugins/reference/google-meet)                       | OpenClaw Google Meet participant plugin for joining calls through Chrome or Twilio transports.                  | `@openclaw/google-meet`<br />npm; ClawHub                                                        | contracts: tools                                                             |
 | [googlechat](/plugins/reference/googlechat)                         | OpenClaw Google Chat channel plugin for spaces and direct messages.                                             | `@openclaw/googlechat`<br />npm; ClawHub                                                         | channels: googlechat                                                         |
 | [line](/plugins/reference/line)                                     | OpenClaw LINE channel plugin for LINE Bot API chats.                                                            | `@openclaw/line`<br />npm; ClawHub                                                               | channels: line                                                               |
+| [llama-cpp](/plugins/reference/llama-cpp)                           | OpenClaw llama.cpp embedding provider plugin.                                                                   | `@openclaw/llama-cpp-provider`<br />npm; ClawHub                                                 | contracts: embeddingProviders, memoryEmbeddingProviders                      |
 | [lobster](/plugins/reference/lobster)                               | Lobster workflow tool plugin for typed pipelines and resumable approvals.                                       | `@openclaw/lobster`<br />npm; ClawHub                                                            | contracts: tools                                                             |
 | [matrix](/plugins/reference/matrix)                                 | OpenClaw Matrix channel plugin for rooms and direct messages.                                                   | `@openclaw/matrix`<br />ClawHub: `clawhub:@openclaw/matrix`; npm                                 | channels: matrix                                                             |
 | [memory-lancedb](/plugins/reference/memory-lancedb)                 | OpenClaw LanceDB-backed long-term memory plugin with auto-recall, auto-capture, and vector search.              | `@openclaw/memory-lancedb`<br />npm; ClawHub                                                     | contracts: tools                                                             |
--- a/docs/plugins/reference.md
+++ b/docs/plugins/reference.md
@@ -74,10 +74,11 @@ pnpm plugins:inventory:gen
 | [litellm](/plugins/reference/litellm)                               | Adds LiteLLM model provider support to OpenClaw.                                                                                                                     | `@openclaw/litellm-provider`<br />included in OpenClaw                                           | providers: litellm; contracts: imageGenerationProviders                                                                                                                                                                                                          |
 | [llm-task](/plugins/reference/llm-task)                             | Generic JSON-only LLM tool for structured tasks callable from workflows.                                                                                             | `@openclaw/llm-task`<br />included in OpenClaw                                                   | contracts: tools                                                                                                                                                                                                                                                 |
 | [lmstudio](/plugins/reference/lmstudio)                             | Adds LM Studio model provider support to OpenClaw.                                                                                                                   | `@openclaw/lmstudio-provider`<br />included in OpenClaw                                          | providers: lmstudio; contracts: memoryEmbeddingProviders                                                                                                                                                                                                         |
+| [llama-cpp](/plugins/reference/llama-cpp)                           | OpenClaw llama.cpp embedding provider plugin.                                                                                                                        | `@openclaw/llama-cpp-provider`<br />npm; ClawHub                                                 | contracts: embeddingProviders, memoryEmbeddingProviders                                                                                                                                                                                                          |
 | [lobster](/plugins/reference/lobster)                               | Lobster workflow tool plugin for typed pipelines and resumable approvals.                                                                                            | `@openclaw/lobster`<br />npm; ClawHub                                                            | contracts: tools                                                                                                                                                                                                                                                 |
 | [matrix](/plugins/reference/matrix)                                 | OpenClaw Matrix channel plugin for rooms and direct messages.                                                                                                        | `@openclaw/matrix`<br />ClawHub: `clawhub:@openclaw/matrix`; npm                                 | channels: matrix                                                                                                                                                                                                                                                 |
 | [mattermost](/plugins/reference/mattermost)                         | Adds the Mattermost channel surface for sending and receiving OpenClaw messages.                                                                                     | `@openclaw/mattermost`<br />included in OpenClaw                                                 | channels: mattermost                                                                                                                                                                                                                                             |
-| [memory-core](/plugins/reference/memory-core)                       | Adds memory embedding provider support. Adds agent-callable tools.                                                                                                   | `@openclaw/memory-core`<br />included in OpenClaw                                                | contracts: memoryEmbeddingProviders, tools                                                                                                                                                                                                                       |
+| [memory-core](/plugins/reference/memory-core)                       | Adds agent-callable tools.                                                                                                                                           | `@openclaw/memory-core`<br />included in OpenClaw                                                | contracts: tools                                                                                                                                                                                                                                                 |
 | [memory-lancedb](/plugins/reference/memory-lancedb)                 | OpenClaw LanceDB-backed long-term memory plugin with auto-recall, auto-capture, and vector search.                                                                   | `@openclaw/memory-lancedb`<br />npm; ClawHub                                                     | contracts: tools                                                                                                                                                                                                                                                 |
 | [memory-wiki](/plugins/reference/memory-wiki)                       | Persistent wiki compiler and Obsidian-friendly knowledge vault for OpenClaw.                                                                                         | `@openclaw/memory-wiki`<br />included in OpenClaw                                                | contracts: tools; skills                                                                                                                                                                                                                                         |
 | [microsoft](/plugins/reference/microsoft)                           | Adds text-to-speech provider support.                                                                                                                                | `@openclaw/microsoft-speech`<br />included in OpenClaw                                           | contracts: speechProviders                                                                                                                                                                                                                                       |
--- a/docs/plugins/reference/llama-cpp.md
+++ b/docs/plugins/reference/llama-cpp.md
@@ -0,0 +1,23 @@
+---
+summary: "OpenClaw llama.cpp embedding provider plugin."
+read_when:
+  - You are installing, configuring, or auditing the llama-cpp plugin
+title: "llama-cpp plugin"
+---
+
+# llama-cpp plugin
+
+OpenClaw llama.cpp embedding provider plugin.
+
+## Distribution
+
+- Package: `@openclaw/llama-cpp-provider`
+- Install route: npm; ClawHub
+
+## Surface
+
+contracts: embeddingProviders, memoryEmbeddingProviders
+
+## Related docs
+
+- [llama.cpp Provider](/plugins/llama-cpp)
--- a/docs/plugins/reference/memory-core.md
+++ b/docs/plugins/reference/memory-core.md
@@ -1,5 +1,5 @@
 ---
-summary: "Adds memory embedding provider support. Adds agent-callable tools."
+summary: "Adds agent-callable tools."
 read_when:
  - You are installing, configuring, or auditing the memory-core plugin
 title: "Memory Core plugin"
@@ -7,7 +7,7 @@ title: "Memory Core plugin"

 # Memory Core plugin

-Adds memory embedding provider support. Adds agent-callable tools.
+Adds agent-callable tools.

 ## Distribution

@@ -16,4 +16,4 @@ Adds memory embedding provider support. Adds agent-callable tools.

 ## Surface

-contracts: memoryEmbeddingProviders, tools
+contracts: tools
--- a/docs/reference/memory-config.md
+++ b/docs/reference/memory-config.md
@@ -267,13 +267,14 @@ Use `provider: "openai-compatible"` for a generic OpenAI-compatible
    ```

  </Accordion>
-  <Accordion title="Local (GGUF + node-llama-cpp)">
+  <Accordion title="Local (GGUF + llama.cpp)">
    | Key                   | Type               | Default                | Description                                                                                                                                                                                                                                                                                                          |
    | --------------------- | ------------------ | ---------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
    | `local.modelPath`     | `string`           | auto-downloaded        | Path to GGUF model file                                                                                                                                                                                                                                                                                              |
    | `local.modelCacheDir` | `string`           | node-llama-cpp default | Cache dir for downloaded models                                                                                                                                                                                                                                                                                      |
    | `local.contextSize`   | `number \| "auto"` | `4096`                 | Context window size for the embedding context. 4096 covers typical chunks (128–512 tokens) while bounding non-weight VRAM. Lower to 1024–2048 on constrained hosts. `"auto"` uses the model's trained maximum — not recommended for 8B+ models (Qwen3-Embedding-8B: 40 960 tokens → ~32 GB VRAM vs ~8.8 GB at 4096). |

+    Install the official llama.cpp provider first: `openclaw plugins install @openclaw/llama-cpp-provider`.
    Default model: `embeddinggemma-300m-qat-Q8_0.gguf` (~0.6 GB, auto-downloaded). Source checkouts still require native build approval: `pnpm approve-builds` then `pnpm rebuild node-llama-cpp`.

    Use the standalone CLI to verify the same provider path the Gateway uses:
--- a/extensions/llama-cpp/index.test.ts
+++ b/extensions/llama-cpp/index.test.ts
@@ -0,0 +1,129 @@
+import {
+  createPluginRegistryFixture,
+  registerVirtualTestPlugin,
+} from "openclaw/plugin-sdk/plugin-test-contracts";
+import {
+  clearEmbeddingProviders,
+  clearMemoryEmbeddingProviders,
+  getRegisteredEmbeddingProvider,
+  listRegisteredMemoryEmbeddingProviders,
+} from "openclaw/plugin-sdk/plugin-test-runtime";
+import { afterEach, describe, expect, it, vi } from "vitest";
+
+const memoryHostEmbeddingMocks = vi.hoisted(() => ({
+  createLocalEmbeddingProvider: vi.fn(),
+}));
+
+vi.mock("openclaw/plugin-sdk/memory-core-host-engine-embeddings", () => ({
+  createLocalEmbeddingProvider: memoryHostEmbeddingMocks.createLocalEmbeddingProvider,
+}));
+
+import llamaCppPlugin from "./index.js";
+import {
+  DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
+  createLlamaCppEmbeddingProvider,
+  formatLlamaCppSetupError,
+} from "./src/embedding-provider.js";
+
+afterEach(() => {
+  clearEmbeddingProviders();
+  clearMemoryEmbeddingProviders();
+  memoryHostEmbeddingMocks.createLocalEmbeddingProvider.mockReset();
+});
+
+describe("llama.cpp provider plugin", () => {
+  it("registers the local embedding provider through the generic SDK contract", () => {
+    const { config, registry } = createPluginRegistryFixture();
+
+    registerVirtualTestPlugin({
+      registry,
+      config,
+      id: "llama-cpp",
+      name: "llama.cpp Provider",
+      contracts: {
+        embeddingProviders: ["local"],
+        memoryEmbeddingProviders: ["local"],
+      },
+      register: llamaCppPlugin.register,
+    });
+
+    const provider = getRegisteredEmbeddingProvider("local");
+    expect(provider?.ownerPluginId).toBe("llama-cpp");
+    expect(provider?.adapter).toMatchObject({
+      id: "local",
+      defaultModel: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
+      transport: "local",
+    });
+    const memoryProvider = listRegisteredMemoryEmbeddingProviders().find(
+      (entry) => entry.adapter.id === "local",
+    );
+    expect(memoryProvider?.ownerPluginId).toBe("llama-cpp");
+    expect(memoryProvider?.adapter).toMatchObject({
+      id: "local",
+      defaultModel: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
+      transport: "local",
+    });
+  });
+
+  it("adapts the worker-backed local embedding provider", async () => {
+    const close = vi.fn();
+    memoryHostEmbeddingMocks.createLocalEmbeddingProvider.mockResolvedValue({
+      id: "local",
+      model: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
+      maxInputTokens: 2048,
+      embedQuery: vi.fn(async () => [0.6, 0.8]),
+      embedBatchInputs: vi.fn(async () => [[0.3, 0.4]]),
+      embedBatch: vi.fn(async () => [[1, 0]]),
+      close,
+    });
+    const abortController = new AbortController();
+
+    const provider = await createLlamaCppEmbeddingProvider(
+      {
+        config: {},
+        provider: "local",
+        model: "text-embedding-3-small",
+      },
+      { nodeLlamaCppImportUrl: "file:///plugin/node-llama-cpp.js" },
+    );
+
+    await expect(provider.embed("hello")).resolves.toEqual([0.6, 0.8]);
+    await expect(
+      provider.embedBatch([{ text: "doc" }], { signal: abortController.signal }),
+    ).resolves.toEqual([[0.3, 0.4]]);
+    await provider.close?.();
+
+    expect(provider.model).toBe(DEFAULT_LLAMA_CPP_EMBEDDING_MODEL);
+    expect(provider.maxInputTokens).toBe(2048);
+    expect(close).toHaveBeenCalledTimes(1);
+    expect(memoryHostEmbeddingMocks.createLocalEmbeddingProvider).toHaveBeenCalledWith(
+      {
+        config: {},
+        provider: "local",
+        fallback: "none",
+        model: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
+        local: {
+          modelPath: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
+        },
+      },
+      {
+        nodeLlamaCppImportUrl: "file:///plugin/node-llama-cpp.js",
+      },
+    );
+    const workerProvider =
+      await memoryHostEmbeddingMocks.createLocalEmbeddingProvider.mock.results[0].value;
+    expect(workerProvider.embedBatchInputs).toHaveBeenCalledWith([{ text: "doc" }], {
+      signal: abortController.signal,
+    });
+  });
+
+  it("formats missing runtime errors with the plugin install command", () => {
+    const err = Object.assign(new Error("Cannot find package 'node-llama-cpp'"), {
+      code: "ERR_MODULE_NOT_FOUND",
+    });
+
+    expect(formatLlamaCppSetupError(err)).toContain(
+      "openclaw plugins install @openclaw/llama-cpp-provider",
+    );
+  });
+});
--- a/extensions/llama-cpp/index.ts
+++ b/extensions/llama-cpp/index.ts
@@ -0,0 +1,15 @@
+import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
+import {
+  llamaCppEmbeddingProviderAdapter,
+  llamaCppMemoryEmbeddingProviderAdapter,
+} from "./src/embedding-provider.js";
+
+export default definePluginEntry({
+  id: "llama-cpp",
+  name: "llama.cpp Provider",
+  description: "Local GGUF embeddings through node-llama-cpp",
+  register(api) {
+    api.registerEmbeddingProvider(llamaCppEmbeddingProviderAdapter);
+    api.registerMemoryEmbeddingProvider(llamaCppMemoryEmbeddingProviderAdapter);
+  },
+});
--- a/extensions/llama-cpp/npm-shrinkwrap.json
+++ b/extensions/llama-cpp/npm-shrinkwrap.json
--- a/extensions/llama-cpp/openclaw.plugin.json
+++ b/extensions/llama-cpp/openclaw.plugin.json
@@ -0,0 +1,18 @@
+{
+  "id": "llama-cpp",
+  "name": "llama.cpp Provider",
+  "description": "Local GGUF embeddings through node-llama-cpp.",
+  "activation": {
+    "onStartup": false
+  },
+  "enabledByDefault": true,
+  "contracts": {
+    "embeddingProviders": ["local"],
+    "memoryEmbeddingProviders": ["local"]
+  },
+  "configSchema": {
+    "type": "object",
+    "additionalProperties": false,
+    "properties": {}
+  }
+}
--- a/extensions/llama-cpp/package.json
+++ b/extensions/llama-cpp/package.json
@@ -0,0 +1,37 @@
+{
+  "name": "@openclaw/llama-cpp-provider",
+  "version": "2026.6.2",
+  "description": "OpenClaw llama.cpp embedding provider plugin",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/openclaw/openclaw"
+  },
+  "type": "module",
+  "dependencies": {
+    "node-llama-cpp": "3.18.1"
+  },
+  "devDependencies": {
+    "@openclaw/plugin-sdk": "workspace:*"
+  },
+  "openclaw": {
+    "extensions": [
+      "./index.ts"
+    ],
+    "install": {
+      "npmSpec": "@openclaw/llama-cpp-provider",
+      "defaultChoice": "npm",
+      "minHostVersion": ">=2026.6.2"
+    },
+    "compat": {
+      "pluginApi": ">=2026.6.2"
+    },
+    "build": {
+      "openclawVersion": "2026.6.2"
+    },
+    "release": {
+      "bundleRuntimeDependencies": false,
+      "publishToClawHub": true,
+      "publishToNpm": true
+    }
+  }
+}
--- a/extensions/llama-cpp/src/embedding-provider.ts
+++ b/extensions/llama-cpp/src/embedding-provider.ts
@@ -0,0 +1,199 @@
+import { createRequire } from "node:module";
+import { pathToFileURL } from "node:url";
+import type {
+  EmbeddingInput,
+  EmbeddingProvider,
+  EmbeddingProviderAdapter,
+  EmbeddingProviderCreateOptions,
+} from "openclaw/plugin-sdk/embedding-providers";
+import {
+  createLocalEmbeddingProvider,
+  type EmbeddingInput as MemoryEmbeddingInput,
+  type MemoryEmbeddingProvider,
+  type MemoryEmbeddingProviderAdapter,
+  type MemoryEmbeddingProviderCreateOptions,
+  type MemoryEmbeddingProviderCreateResult,
+} from "openclaw/plugin-sdk/memory-core-host-engine-embeddings";
+
+type LlamaCppLocalOptions = {
+  modelPath?: string;
+  modelCacheDir?: string;
+  contextSize?: number | "auto";
+};
+
+export type LlamaCppEmbeddingProviderRuntimeOptions = {
+  nodeLlamaCppImportUrl?: string;
+};
+
+export const LLAMA_CPP_EMBEDDING_PROVIDER_ID = "local";
+export const DEFAULT_LLAMA_CPP_EMBEDDING_MODEL =
+  "hf:ggml-org/embeddinggemma-300m-qat-q8_0-GGUF/embeddinggemma-300m-qat-Q8_0.gguf";
+
+function normalizeOptionalString(value: unknown): string | undefined {
+  return typeof value === "string" && value.trim() ? value.trim() : undefined;
+}
+
+function readLocalOptions(options: { local?: unknown }): LlamaCppLocalOptions {
+  const local = options.local as LlamaCppLocalOptions | undefined;
+  return local ?? {};
+}
+
+function textFromEmbeddingInput(input: EmbeddingInput): string {
+  return typeof input === "string" ? input : input.text;
+}
+
+function toMemoryEmbeddingInput(input: EmbeddingInput): MemoryEmbeddingInput {
+  return typeof input === "string" ? { text: input } : input;
+}
+
+function isNodeLlamaCppMissing(err: unknown): boolean {
+  if (!(err instanceof Error)) {
+    return false;
+  }
+  const code = (err as Error & { code?: unknown }).code;
+  return code === "ERR_MODULE_NOT_FOUND" && err.message.includes("node-llama-cpp");
+}
+
+function formatErrorMessage(err: unknown): string {
+  if (err instanceof Error) {
+    return err.message;
+  }
+  return String(err);
+}
+
+export function formatLlamaCppSetupError(err: unknown): string {
+  const detail = formatErrorMessage(err);
+  const missing = isNodeLlamaCppMissing(err);
+  return [
+    "Local llama.cpp embeddings unavailable.",
+    missing
+      ? "Reason: node-llama-cpp is missing or failed to install."
+      : detail
+        ? `Reason: ${detail}`
+        : undefined,
+    missing && detail ? `Detail: ${detail}` : null,
+    "To enable local GGUF embeddings:",
+    "1) Install the official provider plugin: openclaw plugins install @openclaw/llama-cpp-provider",
+    "2) Use Node 24 for native installs/updates.",
+    "3) If you use pnpm from source: pnpm approve-builds, then pnpm rebuild node-llama-cpp.",
+    'Or set agents.defaults.memorySearch.provider to a remote embedding provider such as "openai", "ollama", "lmstudio", or "voyage".',
+  ]
+    .filter(Boolean)
+    .join("\n");
+}
+
+const requireFromPlugin = createRequire(import.meta.url);
+
+export function resolveNodeLlamaCppImportUrl(): string {
+  return pathToFileURL(requireFromPlugin.resolve("node-llama-cpp")).href;
+}
+
+function adaptMemoryEmbeddingProvider(provider: MemoryEmbeddingProvider): EmbeddingProvider {
+  return {
+    id: LLAMA_CPP_EMBEDDING_PROVIDER_ID,
+    model: provider.model,
+    maxInputTokens: provider.maxInputTokens,
+    embed: async (input, callOptions) =>
+      await provider.embedQuery(textFromEmbeddingInput(input), {
+        signal: callOptions?.signal,
+      }),
+    embedBatch: async (inputs, callOptions) => {
+      if (provider.embedBatchInputs) {
+        return await provider.embedBatchInputs(inputs.map(toMemoryEmbeddingInput), {
+          signal: callOptions?.signal,
+        });
+      }
+      return await provider.embedBatch(inputs.map(textFromEmbeddingInput), {
+        signal: callOptions?.signal,
+      });
+    },
+    close: provider.close,
+  };
+}
+
+export async function createLlamaCppEmbeddingProvider(
+  options: EmbeddingProviderCreateOptions,
+  runtimeOptions: LlamaCppEmbeddingProviderRuntimeOptions = {},
+): Promise<EmbeddingProvider> {
+  const result = await createLlamaCppMemoryEmbeddingProvider(
+    buildMemoryCreateOptions(options, options.dimensions),
+    runtimeOptions,
+  );
+  if (!result.provider) {
+    throw new Error("llama.cpp local embedding provider was unavailable");
+  }
+  return adaptMemoryEmbeddingProvider(result.provider);
+}
+
+export async function createLlamaCppMemoryEmbeddingProvider(
+  options: MemoryEmbeddingProviderCreateOptions,
+  runtimeOptions: LlamaCppEmbeddingProviderRuntimeOptions = {},
+): Promise<MemoryEmbeddingProviderCreateResult> {
+  const createOptions = buildMemoryCreateOptions(options, options.outputDimensionality);
+  const provider = await createLocalEmbeddingProvider(createOptions, {
+    nodeLlamaCppImportUrl: runtimeOptions.nodeLlamaCppImportUrl ?? resolveNodeLlamaCppImportUrl(),
+  });
+  return {
+    provider,
+    runtime: createLlamaCppEmbeddingProviderRuntime(provider),
+  };
+}
+
+function buildMemoryCreateOptions(
+  options: MemoryEmbeddingProviderCreateOptions | EmbeddingProviderCreateOptions,
+  outputDimensionality: number | undefined,
+): MemoryEmbeddingProviderCreateOptions {
+  const local = readLocalOptions(options);
+  const modelPath = normalizeOptionalString(local.modelPath) || DEFAULT_LLAMA_CPP_EMBEDDING_MODEL;
+  return {
+    config: options.config,
+    agentDir: options.agentDir,
+    provider: LLAMA_CPP_EMBEDDING_PROVIDER_ID,
+    fallback: "none",
+    remote: options.remote,
+    model: modelPath,
+    inputType: options.inputType,
+    queryInputType: options.queryInputType,
+    documentInputType: options.documentInputType,
+    local: {
+      ...local,
+      modelPath,
+    },
+    outputDimensionality,
+  };
+}
+
+function createLlamaCppEmbeddingProviderRuntime(provider: { model: string }) {
+  return {
+    id: LLAMA_CPP_EMBEDDING_PROVIDER_ID,
+    inlineQueryTimeoutMs: 5 * 60_000,
+    inlineBatchTimeoutMs: 10 * 60_000,
+    cacheKeyData: {
+      provider: LLAMA_CPP_EMBEDDING_PROVIDER_ID,
+      model: provider.model,
+    },
+  };
+}
+
+export const llamaCppEmbeddingProviderAdapter: EmbeddingProviderAdapter = {
+  id: LLAMA_CPP_EMBEDDING_PROVIDER_ID,
+  defaultModel: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
+  transport: "local",
+  formatSetupError: formatLlamaCppSetupError,
+  create: async (options) => {
+    const provider = await createLlamaCppEmbeddingProvider(options);
+    return {
+      provider,
+      runtime: createLlamaCppEmbeddingProviderRuntime(provider),
+    };
+  },
+};
+
+export const llamaCppMemoryEmbeddingProviderAdapter: MemoryEmbeddingProviderAdapter = {
+  id: LLAMA_CPP_EMBEDDING_PROVIDER_ID,
+  defaultModel: DEFAULT_LLAMA_CPP_EMBEDDING_MODEL,
+  transport: "local",
+  formatSetupError: formatLlamaCppSetupError,
+  shouldContinueAutoSelection: () => true,
+  create: async (options) => await createLlamaCppMemoryEmbeddingProvider(options),
+};
--- a/extensions/memory-core/openclaw.plugin.json
+++ b/extensions/memory-core/openclaw.plugin.json
@@ -5,7 +5,6 @@
  },
  "kind": "memory",
  "contracts": {
-    "memoryEmbeddingProviders": ["local"],
    "tools": ["memory_get", "memory_search"]
  },
  "commandAliases": [
--- a/extensions/memory-core/runtime-api.ts
+++ b/extensions/memory-core/runtime-api.ts
@@ -1,7 +1,6 @@
 export { getMemorySearchManager, MemoryIndexManager } from "./src/memory/index.js";
 export { memoryRuntime } from "./src/runtime-provider.js";
 export {
-  DEFAULT_LOCAL_MODEL,
  getBuiltinMemoryEmbeddingProviderDoctorMetadata,
  listBuiltinAutoSelectMemoryEmbeddingProviderDoctorMetadata,
  registerBuiltInMemoryEmbeddingProviders,
--- a/extensions/memory-core/src/memory/index.test.ts
+++ b/extensions/memory-core/src/memory/index.test.ts
@@ -2,11 +2,7 @@ import { mkdirSync, rmSync } from "node:fs";
 import fs from "node:fs/promises";
 import os from "node:os";
 import path from "node:path";
-import {
-  clearMemoryEmbeddingProviders as clearRegistry,
-  listRegisteredMemoryEmbeddingProviderAdapters as listRegisteredAdapters,
-  registerMemoryEmbeddingProvider as registerAdapter,
-} from "openclaw/plugin-sdk/memory-core-host-engine-embeddings";
+import { clearMemoryEmbeddingProviders as clearRegistry } from "openclaw/plugin-sdk/memory-core-host-engine-embeddings";
 import { resolveSessionTranscriptsDirForAgent } from "openclaw/plugin-sdk/memory-core-host-runtime-core";
 import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, it, vi } from "vitest";
 import "./test-runtime-mocks.js";
@@ -15,10 +11,7 @@ import { closeAllMemorySearchManagers, getMemorySearchManager } from "./index.js
 import { LOCAL_EMBEDDING_WORKER_ERROR_CODES } from "./manager-local-worker-errors.js";
 import type { MemoryIndexMeta } from "./manager-reindex-state.js";
 import { closeMemoryIndexManagersForAgent, EMBEDDING_PROBE_CACHE_TTL_MS } from "./manager.js";
-import {
-  DEFAULT_LOCAL_MODEL,
-  registerBuiltInMemoryEmbeddingProviders,
-} from "./provider-adapters.js";
+import { registerBuiltInMemoryEmbeddingProviders } from "./provider-adapters.js";

 // This suite performs real sqlite/media indexing and can exceed the global
 // timeout when it shares a packed CI extension shard.
@@ -170,20 +163,14 @@ describe("memory embedding provider registration", () => {
    clearRegistry();
  });

-  it("registers the builtin local embedding provider", () => {
+  it("does not register local embeddings from memory-core", () => {
    clearRegistry();
-    registerBuiltInMemoryEmbeddingProviders({ registerMemoryEmbeddingProvider: registerAdapter });
+    const registered: string[] = [];
+    registerBuiltInMemoryEmbeddingProviders({
+      registerMemoryEmbeddingProvider: (adapter) => registered.push(adapter.id),
+    });

-    const adapter = listRegisteredAdapters().find((entry) => entry.id === "local");
-
-    if (!adapter) {
-      throw new Error("expected local embedding provider adapter to be registered");
-    }
-    expect(adapter.id).toBe("local");
-    expect(adapter.defaultModel).toBe(DEFAULT_LOCAL_MODEL);
-    expect(adapter.transport).toBe("local");
-    expect(adapter.authProviderId).toBeUndefined();
-    expect(adapter.autoSelectPriority).toBe(10);
+    expect(registered).toEqual([]);
  });
 });

@@ -225,7 +212,6 @@ describe("memory index", () => {
    // Keep atomic reindex tests on the safe path.
    vi.stubEnv("OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX", "1");
    clearRegistry();
-    registerBuiltInMemoryEmbeddingProviders({ registerMemoryEmbeddingProvider: registerAdapter });
    embedBatchCalls = 0;
    embedBatchInputCalls = 0;
    providerCloseCalls = 0;
--- a/extensions/memory-core/src/memory/provider-adapter-registration.test.ts
+++ b/extensions/memory-core/src/memory/provider-adapter-registration.test.ts
@@ -4,18 +4,11 @@ import { filterUnregisteredMemoryEmbeddingProviderAdapters } from "./provider-ad
 describe("filterUnregisteredMemoryEmbeddingProviderAdapters", () => {
  it("keeps builtin adapters that are not already registered", () => {
    const adapters = filterUnregisteredMemoryEmbeddingProviderAdapters({
-      builtinAdapters: [
-        { id: "local" },
-        { id: "openai" },
-        { id: "gemini" },
-        { id: "voyage" },
-        { id: "mistral" },
-      ],
+      builtinAdapters: [{ id: "openai" }, { id: "gemini" }, { id: "voyage" }, { id: "mistral" }],
      registeredAdapters: [],
    });

    expect(adapters.map((adapter) => adapter.id)).toEqual([
-      "local",
      "openai",
      "gemini",
      "voyage",
@@ -25,14 +18,8 @@ describe("filterUnregisteredMemoryEmbeddingProviderAdapters", () => {

  it("skips builtin adapters that are already registered", () => {
    const adapters = filterUnregisteredMemoryEmbeddingProviderAdapters({
-      builtinAdapters: [
-        { id: "local" },
-        { id: "openai" },
-        { id: "gemini" },
-        { id: "voyage" },
-        { id: "mistral" },
-      ],
-      registeredAdapters: [{ id: "local" }, { id: "gemini" }],
+      builtinAdapters: [{ id: "openai" }, { id: "gemini" }, { id: "voyage" }, { id: "mistral" }],
+      registeredAdapters: [{ id: "gemini" }],
    });

    expect(adapters.map((adapter) => adapter.id)).toEqual(["openai", "voyage", "mistral"]);
--- a/extensions/memory-core/src/memory/provider-adapters.ts
+++ b/extensions/memory-core/src/memory/provider-adapters.ts
@@ -1,15 +1,11 @@
 import {
-  DEFAULT_LOCAL_MODEL,
  listMemoryEmbeddingProviders,
  listRegisteredMemoryEmbeddingProviderAdapters,
  type MemoryEmbeddingProviderAdapter,
 } from "openclaw/plugin-sdk/memory-core-host-embedding-registry";
 import { getProviderEnvVars } from "openclaw/plugin-sdk/provider-env-vars";
-import { formatErrorMessage } from "../dreaming-shared.js";
 import { filterUnregisteredMemoryEmbeddingProviderAdapters } from "./provider-adapter-registration.js";

-const NODE_LLAMA_CPP_RUNTIME_PACKAGE = "node-llama-cpp";
-
 export type BuiltinMemoryEmbeddingProviderDoctorMetadata = {
  providerId: string;
  authProviderId: string;
@@ -18,84 +14,7 @@ export type BuiltinMemoryEmbeddingProviderDoctorMetadata = {
  autoSelectPriority?: number;
 };

-function isNodeLlamaCppMissing(err: unknown): boolean {
-  if (!(err instanceof Error)) {
-    return false;
-  }
-  const code = (err as Error & { code?: unknown }).code;
-  return code === "ERR_MODULE_NOT_FOUND" && err.message.includes(NODE_LLAMA_CPP_RUNTIME_PACKAGE);
-}
-
-function listRemoteEmbeddingSetupHints(): string[] {
-  try {
-    return listMemoryEmbeddingProviders()
-      .filter(
-        (adapter) =>
-          adapter.transport === "remote" && typeof adapter.autoSelectPriority === "number",
-      )
-      .toSorted((a, b) => (a.autoSelectPriority ?? 0) - (b.autoSelectPriority ?? 0))
-      .map((adapter) => `Or set agents.defaults.memorySearch.provider = "${adapter.id}" (remote).`);
-  } catch {
-    return [];
-  }
-}
-
-function formatLocalSetupError(err: unknown): string {
-  const detail = formatErrorMessage(err);
-  const missing = isNodeLlamaCppMissing(err);
-  return [
-    "Local embeddings unavailable.",
-    missing
-      ? "Reason: optional dependency node-llama-cpp is missing (or failed to install)."
-      : detail
-        ? `Reason: ${detail}`
-        : undefined,
-    missing && detail ? `Detail: ${detail}` : null,
-    "To enable local embeddings:",
-    "1) Use Node 24 (recommended for installs/updates; Node 22 LTS, currently 22.19+, remains supported)",
-    missing
-      ? `2) Install ${NODE_LLAMA_CPP_RUNTIME_PACKAGE} next to the OpenClaw package or source checkout`
-      : null,
-    `3) If you use pnpm: pnpm approve-builds (select ${NODE_LLAMA_CPP_RUNTIME_PACKAGE}), then pnpm rebuild ${NODE_LLAMA_CPP_RUNTIME_PACKAGE}`,
-    ...listRemoteEmbeddingSetupHints(),
-  ]
-    .filter(Boolean)
-    .join("\n");
-}
-
-const localAdapter: MemoryEmbeddingProviderAdapter = {
-  id: "local",
-  defaultModel: DEFAULT_LOCAL_MODEL,
-  transport: "local",
-  autoSelectPriority: 10,
-  formatSetupError: formatLocalSetupError,
-  shouldContinueAutoSelection: () => true,
-  create: async (options) => {
-    const { createLocalEmbeddingProvider } =
-      await import("openclaw/plugin-sdk/memory-core-host-engine-embeddings");
-    const provider = await createLocalEmbeddingProvider({
-      ...options,
-      provider: "local",
-      fallback: "none",
-    });
-    return {
-      provider,
-      runtime: {
-        id: "local",
-        inlineQueryTimeoutMs: 5 * 60_000,
-        inlineBatchTimeoutMs: 10 * 60_000,
-        cacheKeyData: {
-          provider: "local",
-          model: provider.model,
-        },
-      },
-    };
-  },
-};
-
-const builtinMemoryEmbeddingProviderAdapters = [localAdapter] as const;
-
-export { DEFAULT_LOCAL_MODEL };
+const builtinMemoryEmbeddingProviderAdapters = [] as const;

 function getBuiltinMemoryEmbeddingProviderAdapter(
  id: string,
--- a/package.json
+++ b/package.json
@@ -91,6 +91,7 @@
    "!dist/extensions/google-meet/**",
    "!dist/extensions/googlechat/**",
    "!dist/extensions/line/**",
+    "!dist/extensions/llama-cpp/**",
    "!dist/extensions/lobster/**",
    "!dist/extensions/memory-lancedb/**",
    "!dist/extensions/matrix/**",
--- a/packages/memory-host-sdk/src/host/embeddings-worker.ts
+++ b/packages/memory-host-sdk/src/host/embeddings-worker.ts
@@ -73,13 +73,19 @@ function resolveDefaultWorkerScriptPath(): string {

 function serializeLocalEmbeddingOptions(
  options: EmbeddingProviderOptions,
+  runtimeOptions?: LocalEmbeddingProviderRuntimeOptions,
 ): EmbeddingProviderOptions {
  return {
    config: {},
    provider: "local",
    model: options.model,
    fallback: "none",
-    local: options.local,
+    local: {
+      ...options.local,
+      ...(runtimeOptions?.nodeLlamaCppImportUrl
+        ? { nodeLlamaCppImportUrl: runtimeOptions.nodeLlamaCppImportUrl }
+        : {}),
+    } as EmbeddingProviderOptions["local"],
  };
 }

@@ -329,7 +335,7 @@ export async function createLocalEmbeddingWorkerProvider(
  runtimeOptions?: LocalEmbeddingProviderRuntimeOptions,
 ): Promise<EmbeddingProvider> {
  const modelPath = normalizeOptionalString(options.local?.modelPath) || DEFAULT_LOCAL_MODEL;
-  const workerOptions = serializeLocalEmbeddingOptions(options);
+  const workerOptions = serializeLocalEmbeddingOptions(options, runtimeOptions);
  const client = new LocalEmbeddingWorkerClient(
    runtimeOptions?.workerScriptPath ?? resolveDefaultWorkerScriptPath(),
  );
--- a/packages/memory-host-sdk/src/host/embeddings.ts
+++ b/packages/memory-host-sdk/src/host/embeddings.ts
@@ -27,6 +27,7 @@ export { DEFAULT_LOCAL_MODEL } from "./embedding-defaults.js";

 export type LocalEmbeddingProviderRuntimeOptions = {
  workerScriptPath?: string;
+  nodeLlamaCppImportUrl?: string;
 };

 async function disposeResources(
@@ -47,8 +48,9 @@ async function disposeResources(

 export async function createLocalEmbeddingProvider(
  options: EmbeddingProviderOptions,
+  runtimeOptions?: LocalEmbeddingProviderRuntimeOptions,
 ): Promise<EmbeddingProvider> {
-  return await createLocalEmbeddingWorkerProvider(options);
+  return await createLocalEmbeddingWorkerProvider(options, runtimeOptions);
 }

 export async function createLocalEmbeddingProviderInProcess(
@@ -56,10 +58,15 @@ export async function createLocalEmbeddingProviderInProcess(
 ): Promise<EmbeddingProvider> {
  const modelPath = normalizeOptionalString(options.local?.modelPath) || DEFAULT_LOCAL_MODEL;
  const modelCacheDir = normalizeOptionalString(options.local?.modelCacheDir);
+  const nodeLlamaCppImportUrl = normalizeOptionalString(
+    (options.local as EmbeddingProviderOptions["local"] & { nodeLlamaCppImportUrl?: string })
+      ?.nodeLlamaCppImportUrl,
+  );
  const contextSize: number | "auto" = options.local?.contextSize ?? 4096;

  // Lazy-load node-llama-cpp to keep startup light unless local is enabled.
-  const { getLlama, resolveModelFile, LlamaLogLevel } = await importNodeLlamaCpp();
+  const { getLlama, resolveModelFile, LlamaLogLevel } =
+    await importNodeLlamaCpp(nodeLlamaCppImportUrl);

  let llama: Llama | null = null;
  let embeddingModel: LlamaModel | null = null;
--- a/packages/memory-host-sdk/src/host/node-llama.ts
+++ b/packages/memory-host-sdk/src/host/node-llama.ts
@@ -38,6 +38,6 @@ export type NodeLlamaCppModule = {

 const NODE_LLAMA_CPP_MODULE = "node-llama-cpp";

-export async function importNodeLlamaCpp() {
-  return import(NODE_LLAMA_CPP_MODULE) as Promise<NodeLlamaCppModule>;
+export async function importNodeLlamaCpp(moduleSpecifier = NODE_LLAMA_CPP_MODULE) {
+  return import(moduleSpecifier) as Promise<NodeLlamaCppModule>;
 }
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/scripts/lib/official-external-plugin-catalog.json
+++ b/scripts/lib/official-external-plugin-catalog.json
@@ -190,6 +190,27 @@
        }
      }
    },
+    {
+      "name": "@openclaw/llama-cpp-provider",
+      "description": "OpenClaw llama.cpp embedding provider plugin",
+      "source": "official",
+      "kind": "plugin",
+      "openclaw": {
+        "plugin": {
+          "id": "llama-cpp",
+          "label": "llama.cpp Provider"
+        },
+        "contracts": {
+          "embeddingProviders": ["local"],
+          "memoryEmbeddingProviders": ["local"]
+        },
+        "install": {
+          "npmSpec": "@openclaw/llama-cpp-provider",
+          "defaultChoice": "npm",
+          "minHostVersion": ">=2026.6.2"
+        }
+      }
+    },
    {
      "name": "@openclaw/openshell-sandbox",
      "description": "OpenClaw OpenShell sandbox backend",
--- a/src/commands/doctor-memory-search.test.ts
+++ b/src/commands/doctor-memory-search.test.ts
@@ -192,7 +192,7 @@ describe("noteMemorySearchHealth", () => {
    resetMemoryRecallMocks();
  });

-  it("does not warn when local provider is set with no explicit modelPath (default model fallback)", async () => {
+  it("warns when local provider is set but readiness was not confirmed", async () => {
    resolveMemorySearchConfig.mockReturnValue({
      provider: "local",
      local: {},
@@ -201,7 +201,10 @@ describe("noteMemorySearchHealth", () => {

    await noteMemorySearchHealth(cfg, {});

-    expect(note).not.toHaveBeenCalled();
+    expect(note).toHaveBeenCalledTimes(1);
+    const message = firstNoteMessage();
+    expect(message).toContain('Memory search provider is set to "local"');
+    expect(message).toContain("openclaw plugins install @openclaw/llama-cpp-provider");
  });

  it("warns when local provider with default model but gateway probe reports not ready", async () => {
@@ -217,7 +220,7 @@ describe("noteMemorySearchHealth", () => {

    expect(note).toHaveBeenCalledTimes(1);
    const message = firstNoteMessage();
-    expect(message).toContain("gateway reports local embeddings are not ready");
+    expect(message).toContain("local embeddings are not confirmed ready");
    expect(message).toContain("node-llama-cpp not installed");
  });

@@ -235,7 +238,7 @@ describe("noteMemorySearchHealth", () => {
    expect(note).not.toHaveBeenCalled();
  });

-  it("does not treat an inconclusive gateway timeout as local embeddings not ready", async () => {
+  it("warns when local provider readiness probe is inconclusive", async () => {
    resolveMemorySearchConfig.mockReturnValue({
      provider: "local",
      local: {},
@@ -250,10 +253,13 @@ describe("noteMemorySearchHealth", () => {
      },
    });

-    expect(note).not.toHaveBeenCalled();
+    expect(note).toHaveBeenCalledTimes(1);
+    const message = firstNoteMessage();
+    expect(message).toContain("local embeddings are not confirmed ready");
+    expect(message).toContain("gateway timeout after 8000ms");
  });

-  it("does not warn when local provider has an explicit hf: modelPath", async () => {
+  it("warns when local provider has an explicit hf: modelPath but readiness was not confirmed", async () => {
    resolveMemorySearchConfig.mockReturnValue({
      provider: "local",
      local: { modelPath: "hf:some-org/some-model-GGUF/model.gguf" },
@@ -262,7 +268,8 @@ describe("noteMemorySearchHealth", () => {

    await noteMemorySearchHealth(cfg, {});

-    expect(note).not.toHaveBeenCalled();
+    expect(note).toHaveBeenCalledTimes(1);
+    expect(firstNoteMessage()).toContain("a local model path is configured");
  });

  it("does not emit provider guidance when no memory runtime is active", async () => {
--- a/src/commands/doctor-memory-search.ts
+++ b/src/commands/doctor-memory-search.ts
@@ -24,7 +24,6 @@ import {
  checkQmdBinaryAvailability,
  resolveQmdBinaryUnavailableReason,
 } from "../memory-host-sdk/engine-qmd.js";
-import { DEFAULT_LOCAL_MODEL } from "../memory-host-sdk/host/embedding-defaults.js";
 import { hasConfiguredMemorySecretInput } from "../memory-host-sdk/secret.js";
 import {
  auditDreamingArtifacts,
@@ -470,39 +469,29 @@ export async function noteMemorySearchHealth(

  if (provider === "local") {
    const suggestedRemoteProvider = resolveSuggestedRemoteMemoryProvider();
-    if (hasLocalEmbeddings(resolved.local, true)) {
-      // Model path looks valid (explicit file, hf: URL, or default model).
-      // If a gateway probe is available and reports not-ready, warn anyway —
-      // the model download or node-llama-cpp setup may have failed at runtime.
-      if (opts?.gatewayMemoryProbe?.checked && !opts.gatewayMemoryProbe.ready) {
-        const detail = opts.gatewayMemoryProbe.error?.trim();
-        note(
-          [
-            'Memory search provider is set to "local" and a model path is configured,',
-            "but the gateway reports local embeddings are not ready.",
-            detail ? `Gateway probe: ${detail}` : null,
-            "",
-            `Verify: ${formatCliCommand("openclaw memory status --deep")}`,
-          ]
-            .filter(Boolean)
-            .join("\n"),
-          "Memory search",
-        );
-      }
+    if (opts?.gatewayMemoryProbe?.checked && opts.gatewayMemoryProbe.ready) {
      return;
    }
+    const hasExplicitLocalModel = hasLocalEmbeddings(resolved.local);
+    const detail = opts?.gatewayMemoryProbe?.error?.trim();
    note(
      [
-        'Memory search provider is set to "local" but no local model file was found.',
+        hasExplicitLocalModel
+          ? 'Memory search provider is set to "local" and a local model path is configured, but local embeddings are not confirmed ready.'
+          : 'Memory search provider is set to "local", but local embeddings are not confirmed ready.',
+        detail ? `Gateway probe: ${detail}` : null,
        "",
        "Fix (pick one):",
-        `- Install node-llama-cpp and set a local model path in config`,
+        `- Install the llama.cpp provider plugin: ${formatCliCommand("openclaw plugins install @openclaw/llama-cpp-provider")}`,
+        `- Set a local GGUF model path in config`,
        suggestedRemoteProvider
          ? `- Switch to a remote provider: ${formatCliCommand(`openclaw config set agents.defaults.memorySearch.provider ${suggestedRemoteProvider}`)}`
          : `- Switch to a remote embedding provider in config`,
        "",
        `Verify: ${formatCliCommand("openclaw memory status --deep")}`,
-      ].join("\n"),
+      ]
+        .filter(Boolean)
+        .join("\n"),
      "Memory search",
    );
    return;
@@ -612,15 +601,9 @@ export async function noteMemorySearchHealth(
 /**
 * Check whether local embeddings are available.
 *
- * When `useDefaultFallback` is true (explicit `provider: "local"`), an empty
- * modelPath is treated as available because the runtime falls back to
- * DEFAULT_LOCAL_MODEL (an auto-downloaded HuggingFace model).
- *
 */
-function hasLocalEmbeddings(local: { modelPath?: string }, useDefaultFallback = false): boolean {
-  const modelPath =
-    normalizeOptionalString(local.modelPath) ||
-    (useDefaultFallback ? DEFAULT_LOCAL_MODEL : undefined);
+function hasLocalEmbeddings(local: { modelPath?: string }): boolean {
+  const modelPath = normalizeOptionalString(local.modelPath);
  if (!modelPath) {
    return false;
  }
--- a/src/plugins/channel-plugin-ids.test.ts
+++ b/src/plugins/channel-plugin-ids.test.ts
@@ -202,6 +202,29 @@ function createManifestRegistryFixture(): PluginManifestRegistry {
        providers: ["amazon-bedrock"],
        cliBackends: [],
      },
+      {
+        id: "llama-cpp",
+        channels: [],
+        origin: "global",
+        enabledByDefault: true,
+        providers: [],
+        cliBackends: [],
+        contracts: {
+          embeddingProviders: ["local"],
+          memoryEmbeddingProviders: ["local"],
+        },
+      },
+      {
+        id: "legacy-memory-embedding",
+        channels: [],
+        origin: "bundled",
+        enabledByDefault: true,
+        providers: [],
+        cliBackends: [],
+        contracts: {
+          memoryEmbeddingProviders: ["legacy-memory"],
+        },
+      },
      {
        id: "brave",
        channels: [],
@@ -927,6 +950,20 @@ describe("resolveGatewayStartupPluginIds", () => {
      } as OpenClawConfig,
      ["browser", "memory-core"],
    ],
+    [
+      "includes legacy memory embedding providers configured by memory search defaults",
+      {
+        channels: {},
+        agents: {
+          defaults: {
+            memorySearch: {
+              provider: "legacy-memory",
+            },
+          },
+        },
+      } as OpenClawConfig,
+      ["browser", "legacy-memory-embedding", "memory-core"],
+    ],
    [
      "includes explicitly selected external web search providers at startup",
      {
@@ -1095,6 +1132,119 @@ describe("resolveGatewayStartupPluginIds", () => {
    });
  });

+  it("includes auto-enabled memory embedding providers at startup", () => {
+    const rawConfig = {
+      agents: {
+        defaults: {
+          memorySearch: {
+            provider: "local",
+          },
+        },
+      },
+      channels: {},
+      plugins: {
+        allow: ["browser"],
+        slots: {
+          memory: "memory-core",
+        },
+      },
+    } as OpenClawConfig;
+    const effectiveConfig = {
+      ...rawConfig,
+      plugins: {
+        allow: ["browser", "llama-cpp"],
+        entries: {
+          "llama-cpp": {
+            enabled: true,
+          },
+        },
+        slots: {
+          memory: "memory-core",
+        },
+      },
+    } as OpenClawConfig;
+
+    expectStartupPluginIdsCase({
+      config: effectiveConfig,
+      activationSourceConfig: rawConfig,
+      expected: ["browser", "llama-cpp", "memory-core"],
+    });
+  });
+
+  it("includes memory embedding provider alias owners at startup", () => {
+    const rawConfig = {
+      agents: {
+        defaults: {
+          memorySearch: {
+            provider: "local-gpu",
+          },
+        },
+      },
+      channels: {},
+      models: {
+        providers: {
+          "local-gpu": {
+            api: "local",
+            models: [],
+          },
+        },
+      },
+      plugins: {
+        allow: ["browser"],
+        slots: {
+          memory: "memory-core",
+        },
+      },
+    } as OpenClawConfig;
+    const effectiveConfig = {
+      ...rawConfig,
+      plugins: {
+        allow: ["browser", "llama-cpp"],
+        entries: {
+          "llama-cpp": {
+            enabled: true,
+          },
+        },
+        slots: {
+          memory: "memory-core",
+        },
+      },
+    } as OpenClawConfig;
+
+    expectStartupPluginIdsCase({
+      config: effectiveConfig,
+      activationSourceConfig: rawConfig,
+      expected: ["browser", "llama-cpp", "memory-core"],
+    });
+  });
+
+  it("honors explicit plugin disablement for configured memory embedding providers", () => {
+    expectStartupPluginIdsCase({
+      config: {
+        agents: {
+          defaults: {
+            memorySearch: {
+              provider: "local",
+            },
+          },
+        },
+        channels: {},
+        plugins: {
+          allow: ["browser", "llama-cpp"],
+          entries: {
+            "llama-cpp": {
+              enabled: false,
+            },
+          },
+          slots: {
+            memory: "memory-core",
+          },
+        },
+      } as OpenClawConfig,
+      expected: ["browser", "memory-core"],
+    });
+  });
+
  it("does not let runtime-default plugin entries bypass the authored startup allowlist", () => {
    const activationSourceConfig = {
      channels: {},
@@ -1543,6 +1693,34 @@ describe("resolveGatewayStartupPluginIds", () => {
    ).toEqual(["amazon-bedrock", "browser"]);
  });

+  it("keeps configured memory embedding providers in restrictive startup metadata scopes", () => {
+    const registry = createManifestRegistryFixture();
+    const index = createInstalledPluginIndexFixture(registry);
+
+    expect(
+      resolveGatewayStartupMetadataPluginIds({
+        config: {
+          agents: {
+            defaults: {
+              memorySearch: {
+                provider: "local",
+              },
+            },
+          },
+          channels: {},
+          plugins: {
+            allow: ["browser"],
+            slots: {
+              memory: "memory-core",
+            },
+          },
+        } as OpenClawConfig,
+        env: createPluginPlanningTestEnv(),
+        index,
+      }),
+    ).toEqual(["browser", "llama-cpp", "memory-core"]);
+  });
+
  it("uses installed-index model support for restrictive startup shorthand model scopes", () => {
    const registry = createManifestRegistryFixture();
    const index = createInstalledPluginIndexFixture(registry);
--- a/src/plugins/gateway-startup-plugin-ids.ts
+++ b/src/plugins/gateway-startup-plugin-ids.ts
@@ -306,6 +306,22 @@ function manifestOwnsConfiguredWebSearchProvider(params: {
  });
 }

+function manifestOwnsConfiguredMemoryEmbeddingProvider(params: {
+  manifest: PluginManifestRecord | undefined;
+  configuredMemoryEmbeddingProviderIds: ReadonlySet<string>;
+}): boolean {
+  if (params.configuredMemoryEmbeddingProviderIds.size === 0) {
+    return false;
+  }
+  return [
+    ...(params.manifest?.contracts?.embeddingProviders ?? []),
+    ...(params.manifest?.contracts?.memoryEmbeddingProviders ?? []),
+  ].some((providerId) => {
+    const normalized = normalizeOptionalLowercaseString(providerId);
+    return normalized ? params.configuredMemoryEmbeddingProviderIds.has(normalized) : false;
+  });
+}
+
 function listModelProviderRefs(value: unknown): string[] {
  if (typeof value === "string") {
    return [value];
@@ -591,6 +607,52 @@ function collectConfiguredProviderIds(config: OpenClawConfig): string[] {
  ]);
 }

+function collectConfiguredMemoryEmbeddingProviderIds(config: OpenClawConfig): string[] {
+  const providerIds: string[] = [];
+  const pushProviderId = (value: unknown) => {
+    if (typeof value !== "string") {
+      return;
+    }
+    const normalized = normalizeOptionalLowercaseString(value);
+    if (normalized && normalized !== "auto" && normalized !== "none") {
+      providerIds.push(normalized);
+      const aliasedProviderId = resolveConfiguredMemoryEmbeddingProviderAliasId(normalized, config);
+      if (aliasedProviderId) {
+        providerIds.push(aliasedProviderId);
+      }
+    }
+  };
+  pushProviderId(config.agents?.defaults?.memorySearch?.provider);
+  pushProviderId(config.agents?.defaults?.memorySearch?.fallback);
+  for (const agent of config.agents?.list ?? []) {
+    pushProviderId(agent?.memorySearch?.provider);
+    pushProviderId(agent?.memorySearch?.fallback);
+  }
+  return sortUniquePluginIds(providerIds);
+}
+
+function resolveConfiguredMemoryEmbeddingProviderAliasId(
+  providerId: string,
+  config: OpenClawConfig,
+): string | undefined {
+  const providers = config.models?.providers;
+  if (!providers) {
+    return undefined;
+  }
+  const normalizedProviderId = normalizeProviderId(providerId);
+  const providerConfig =
+    providers[providerId] ??
+    Object.entries(providers).find(
+      ([candidateId]) => normalizeProviderId(candidateId) === normalizedProviderId,
+    )?.[1];
+  const api = providerConfig?.api?.trim();
+  if (!api) {
+    return undefined;
+  }
+  const normalizedApi = normalizeOptionalLowercaseString(api);
+  return normalizedApi && normalizedApi !== normalizedProviderId ? normalizedApi : undefined;
+}
+
 function collectValidationConfiguredProviderIds(config: OpenClawConfig): string[] {
  const providerIds: string[] = [];
  const pushProviderId = (value: unknown) => {
@@ -744,6 +806,15 @@ export function resolveGatewayStartupMetadataPluginIds(params: {
  }
  lookup.addDirectProviderOwners(scope, configuredProviderIds);

+  const configuredMemoryEmbeddingProviderIds = sortUniquePluginIds([
+    ...collectConfiguredMemoryEmbeddingProviderIds(params.config),
+    ...collectConfiguredMemoryEmbeddingProviderIds(activationSourceConfig),
+  ]);
+  if (!lookup.hasProviderContributionOwners(configuredMemoryEmbeddingProviderIds)) {
+    return undefined;
+  }
+  lookup.addProviderContributionOwners(scope, configuredMemoryEmbeddingProviderIds);
+
  const configuredShorthandModelIds = sortUniquePluginIds([
    ...collectValidationConfiguredShorthandModelIds(params.config),
    ...collectValidationConfiguredShorthandModelIds(activationSourceConfig),
@@ -891,6 +962,14 @@ export function resolveConfigValidationMetadataPluginIds(params: {
  }
  lookup.addProviderContributionOwners(scope, configuredProviderIds);

+  const configuredMemoryEmbeddingProviderIds = collectConfiguredMemoryEmbeddingProviderIds(
+    params.config,
+  );
+  if (!lookup.hasProviderContributionOwners(configuredMemoryEmbeddingProviderIds)) {
+    return undefined;
+  }
+  lookup.addProviderContributionOwners(scope, configuredMemoryEmbeddingProviderIds);
+
  const configuredShorthandModelIds = collectValidationConfiguredShorthandModelIds(params.config);
  if (!lookup.hasShorthandModelOwners(configuredShorthandModelIds)) {
    return undefined;
@@ -1309,6 +1388,52 @@ function canStartConfiguredWebSearchProviderPlugin(params: {
  return activationState.enabled;
 }

+function canStartConfiguredMemoryEmbeddingProviderPlugin(params: {
+  plugin: InstalledPluginIndexRecord;
+  manifest: PluginManifestRecord | undefined;
+  config: OpenClawConfig;
+  pluginsConfig: ReturnType<typeof normalizePluginsConfigWithRegistry>;
+  activationSource: {
+    plugins: ReturnType<typeof normalizePluginsConfigWithRegistry>;
+    rootConfig?: OpenClawConfig;
+  };
+  configuredMemoryEmbeddingProviderIds: ReadonlySet<string>;
+  platform?: NodeJS.Platform;
+}): boolean {
+  if (
+    !manifestOwnsConfiguredMemoryEmbeddingProvider({
+      manifest: params.manifest,
+      configuredMemoryEmbeddingProviderIds: params.configuredMemoryEmbeddingProviderIds,
+    })
+  ) {
+    return false;
+  }
+  if (!params.pluginsConfig.enabled || !params.activationSource.plugins.enabled) {
+    return false;
+  }
+  if (
+    params.pluginsConfig.deny.includes(params.plugin.pluginId) ||
+    params.activationSource.plugins.deny.includes(params.plugin.pluginId)
+  ) {
+    return false;
+  }
+  if (
+    params.pluginsConfig.entries[params.plugin.pluginId]?.enabled === false ||
+    params.activationSource.plugins.entries[params.plugin.pluginId]?.enabled === false
+  ) {
+    return false;
+  }
+  const activationState = resolveEffectivePluginActivationState({
+    id: params.plugin.pluginId,
+    origin: params.plugin.origin,
+    config: params.pluginsConfig,
+    rootConfig: params.config,
+    enabledByDefault: isPluginEnabledByDefaultForPlatform(params.plugin, params.platform),
+    activationSource: params.activationSource,
+  });
+  return activationState.enabled;
+}
+
 function canStartConfiguredRootPlugin(params: {
  plugin: InstalledPluginIndexRecord;
  manifest: PluginManifestRecord | undefined;
@@ -1596,6 +1721,9 @@ export function resolveGatewayStartupPluginPlanFromRegistry(params: {
  const configuredGenerationProviderIds =
    collectConfiguredGenerationProviderIds(activationSourceConfig);
  const configuredVoiceProviderIds = collectConfiguredVoiceProviderIds(activationSourceConfig);
+  const configuredMemoryEmbeddingProviderIds = new Set(
+    collectConfiguredMemoryEmbeddingProviderIds(activationSourceConfig),
+  );
  const normalizePluginId = createPluginRegistryIdNormalizer(params.index, {
    manifestRegistry: params.manifestRegistry,
  });
@@ -1688,6 +1816,20 @@ export function resolveGatewayStartupPluginPlanFromRegistry(params: {
      pluginIds.push(plugin.pluginId);
      continue;
    }
+    if (
+      canStartConfiguredMemoryEmbeddingProviderPlugin({
+        plugin,
+        manifest,
+        config: params.config,
+        pluginsConfig,
+        activationSource,
+        configuredMemoryEmbeddingProviderIds,
+        platform: params.platform,
+      })
+    ) {
+      pluginIds.push(plugin.pluginId);
+      continue;
+    }
    if (
      canStartConfiguredModelProviderPlugin({
        plugin,
--- a/src/plugins/official-external-plugin-catalog.test.ts
+++ b/src/plugins/official-external-plugin-catalog.test.ts
@@ -50,6 +50,9 @@ describe("official external plugin catalog", () => {
        minHostVersion: ">=2026.5.27",
      },
    );
+    expect(resolveOfficialExternalPluginInstall(expectCatalogEntry("llama-cpp"))?.npmSpec).toBe(
+      "@openclaw/llama-cpp-provider",
+    );
  });

  it("allows invalid-config recovery for externalized stock plugins", () => {