fix(memory): validate memory index identity

* docs: add memory index identity plan

* fix(memory): validate memory index identity

* fix(memory): align status index identity with vector probe

* fix(memory): fail closed on stale fts-only search

* fix(memory): clear sessions-only identity reindex dirty state

* fix(memory): gate targeted session sync by index identity

* fix(memory): clear resolved index identity dirtiness

* fix(memory): block search on missing index identity

* fix(memory): preserve dirty events during identity reindex

* fix(memory): resolve provider aliases for index identity

* fix(memory): report missing identity states accurately

* fix(memory): mark missing session index identity dirty

* test(memory): expose provider alias resolver in mocks

* chore(memory): remove scratch implementation plan

* fix(memory): avoid automatic full reindex on provider cutover

* docs(memory): plan no-schema cutover repair

* fix(memory): pause vector search on index identity mismatch

* fix(memory): freeze dirty identity sync writes

* fix(memory): skip paused-index search retry

* test(memory): keep retry tests on same provider identity

* fix(memory): surface paused index recall

* chore(memory): remove scratch plan from pr

* fix(memory): preserve paused session dirtiness

* fix(memory): make paused recall warning explicit

* docs(memory): document explicit index repair
This commit is contained in:
Onur Solmaz
2026-06-02 14:22:25 +08:00
committed by GitHub
parent 5be282e459
commit a4b4fed412
22 changed files with 1227 additions and 200 deletions

View File

@@ -58,6 +58,15 @@ explicitly to use Gemini, Voyage, Mistral, DeepInfra, Bedrock, GitHub Copilot,
Ollama, a local GGUF model, or an OpenAI-compatible `/v1/embeddings` endpoint.
Legacy configs that still say `provider: "auto"` resolve to `openai`.
<Warning>
Changing the embedding provider, model, provider settings, sources, scope,
chunking, or tokenizer can make the existing SQLite vector index incompatible.
OpenClaw pauses vector search and reports an index identity warning instead of
automatically re-embedding everything. Rebuild when you are ready with
`openclaw memory status --index --agent <id>` or
`openclaw memory index --force --agent <id>`.
</Warning>
If OpenAI embeddings are unreachable from your network, memory recall fails open
instead of blocking the turn. Set the existing `memorySearch.provider` field to a
reachable local, Ollama, regional, or OpenAI-compatible provider to restore
@@ -155,7 +164,8 @@ Use `provider: "openai-compatible"` for a generic OpenAI-compatible
| `outputDimensionality` | `number` | `3072` | For Embedding 2: 768, 1536, or 3072 |
<Warning>
Changing model or `outputDimensionality` triggers an automatic full reindex.
Changing model or `outputDimensionality` changes the index identity. OpenClaw
pauses vector search until you explicitly rebuild the memory index.
</Warning>
</Accordion>

View File

@@ -71,6 +71,28 @@ type MemoryManagerPurpose = Parameters<typeof getMemorySearchManager>[0]["purpos
type MemorySourceName = "memory" | "sessions";
function formatMemoryIndexIdentityWarning(
status: ReturnType<MemoryManager["status"]>,
agentId: string,
): {
reason: string;
fix: string;
} | null {
const indexIdentity = asRecord(asRecord(status.custom)?.indexIdentity);
const reason =
(indexIdentity?.status === "mismatched" || indexIdentity?.status === "missing") &&
typeof indexIdentity.reason === "string"
? indexIdentity.reason
: undefined;
if (!reason) {
return null;
}
return {
reason,
fix: `Run: openclaw memory status --index --agent ${agentId}`,
};
}
type SourceScan = {
source: MemorySourceName;
totalFiles: number | null;
@@ -868,6 +890,12 @@ export async function runMemoryStatus(opts: MemoryCommandOptions) {
lines.push(`${label("Embeddings error")} ${warn(embeddingProbe.error)}`);
}
}
const identityWarning = formatMemoryIndexIdentityWarning(status, agentId);
if (identityWarning) {
lines.push(`${label("Index identity")} ${warn(identityWarning.reason)}`);
lines.push(`${label("Vector search")} ${warn("paused until memory is rebuilt")}`);
lines.push(`${label("Fix")} ${muted(identityWarning.fix)}`);
}
if (status.sourceCounts?.length) {
lines.push(label("By source"));
for (const entry of status.sourceCounts) {
@@ -1256,6 +1284,15 @@ export async function runMemorySearch(
defaultRuntime.writeJson({ results });
return;
}
const identityWarning =
typeof manager.status === "function"
? formatMemoryIndexIdentityWarning(manager.status(), agentId)
: null;
if (identityWarning) {
defaultRuntime.error(
`Memory index warning: ${identityWarning.reason}. Vector memory search is paused until the index is rebuilt. ${identityWarning.fix}`,
);
}
if (results.length === 0) {
defaultRuntime.log("No matches.");
return;

View File

@@ -415,6 +415,36 @@ describe("memory cli", () => {
expect(close).toHaveBeenCalled();
});
it("prints index identity mismatch reasons", async () => {
const close = vi.fn(async () => {});
mockManager({
status: () =>
makeMemoryStatus({
dirty: true,
provider: "ollama",
model: "nomic-embed-text",
requestedProvider: "ollama",
custom: {
indexIdentity: {
status: "mismatched",
reason: "index was built for provider openai, expected ollama",
},
},
}),
close,
});
const log = spyRuntimeLogs(defaultRuntime);
await runMemoryCli(["status"]);
expectLogged(log, "Provider: ollama (requested: ollama)");
expectLogged(log, "Dirty: yes");
expectLogged(log, "Index identity: index was built for provider openai, expected ollama");
expectLogged(log, "Vector search: paused until memory is rebuilt");
expectLogged(log, "Fix: Run: openclaw memory status --index --agent main");
expect(close).toHaveBeenCalled();
});
it("keeps plain status from probing vector or embeddings", async () => {
const close = vi.fn(async () => {});
const probeVectorAvailability = vi.fn(async () => {

View File

@@ -86,6 +86,10 @@ export function setMemoryWorkspaceDir(next: string): void {
workspaceDir = next;
}
export function setMemoryCustomStatus(next: Record<string, unknown> | undefined): void {
customStatus = next;
}
export function setMemorySearchImpl(next: SearchImpl): void {
searchImpl = next;
}
@@ -130,6 +134,10 @@ export function getMemorySearchManagerMockCalls(): number {
return getMemorySearchManagerMock.mock.calls.length;
}
export function getMemorySyncMockCalls(): number {
return stubManager.sync.mock.calls.length;
}
export function getMemorySearchManagerMockConfigs(): unknown[] {
return getMemorySearchManagerMock.mock.calls.map(([params]) => params.cfg);
}

View File

@@ -26,6 +26,7 @@ export function resetEmbeddingMocks(): void {
}
vi.mock("./embeddings.js", () => ({
resolveEmbeddingProviderAdapterId: (providerId: string) => providerId,
createEmbeddingProvider: async () => ({
requestedProvider: "openai",
provider: {

View File

@@ -146,6 +146,17 @@ export function resolveEmbeddingProviderFallbackModel(
return adapter?.defaultModel ?? fallbackSourceModel;
}
export function resolveEmbeddingProviderAdapterId(
providerId: string,
config?: MemoryEmbeddingProviderCreateOptions["config"],
): string | undefined {
try {
return getAdapter(providerId, config).id;
} catch {
return undefined;
}
}
async function createWithAdapter(
adapter: MemoryEmbeddingProviderAdapter,
options: CreateEmbeddingProviderOptions,

View File

@@ -13,6 +13,7 @@ import "./test-runtime-mocks.js";
import type { MemoryIndexManager } from "./index.js";
import { closeAllMemorySearchManagers, getMemorySearchManager } from "./index.js";
import { LOCAL_EMBEDDING_WORKER_ERROR_CODES } from "./manager-local-worker-errors.js";
import type { MemoryIndexMeta } from "./manager-reindex-state.js";
import { closeMemoryIndexManagersForAgent, EMBEDDING_PROBE_CACHE_TTL_MS } from "./manager.js";
import {
DEFAULT_LOCAL_MODEL,
@@ -58,6 +59,14 @@ vi.mock("./embeddings.js", () => {
providerId === "gemini" || providerId === "fallback-provider"
? `${providerId}-embed`
: fallbackSourceModel,
resolveEmbeddingProviderAdapterId: (
providerId: string,
config?: {
models?: {
providers?: Record<string, { api?: string; baseUrl?: string; models?: unknown[] }>;
};
},
) => config?.models?.providers?.[providerId]?.api ?? providerId,
createEmbeddingProvider: async (options: {
provider?: string;
model?: string;
@@ -77,7 +86,9 @@ vi.mock("./embeddings.js", () => {
};
}
const providerId =
options.provider === "gemini" || options.provider === "fallback-provider"
options.provider === "gemini" ||
options.provider === "fallback-provider" ||
options.provider === "ollama"
? options.provider
: "mock";
const model = options.model ?? "mock-embed";
@@ -261,8 +272,9 @@ describe("memory index", () => {
extraPaths?: string[];
sources?: Array<"memory" | "sessions">;
sessionMemory?: boolean;
provider?: "openai" | "gemini" | "fallback-provider";
provider?: string;
fallback?: "none" | "gemini" | "fallback-provider";
providerAliases?: NonNullable<NonNullable<TestCfg["models"]>["providers"]>;
model?: string;
outputDimensionality?: number;
multimodal?: {
@@ -302,6 +314,7 @@ describe("memory index", () => {
},
list: [{ id: "main", default: true }],
},
models: params.providerAliases ? { providers: params.providerAliases } : undefined,
};
}
@@ -323,9 +336,12 @@ describe("memory index", () => {
return manager;
}
async function getFreshManager(cfg: TestCfg): Promise<MemoryIndexManager> {
async function getFreshManager(
cfg: TestCfg,
purpose?: "default" | "status" | "cli",
): Promise<MemoryIndexManager> {
const { getRequiredMemoryIndexManager } = await import("./test-manager-helpers.js");
return await getRequiredMemoryIndexManager({ cfg, agentId: "main" });
return await getRequiredMemoryIndexManager({ cfg, agentId: "main", purpose });
}
async function expectHybridKeywordSearchFindsMemory(cfg: TestCfg) {
@@ -389,6 +405,406 @@ describe("memory index", () => {
}
});
it("does not full-reindex on search when existing metadata belongs to another provider", async () => {
const dbPath = path.join(workspaceDir, "index-provider-cutover.sqlite");
const oldCfg = createCfg({
storePath: dbPath,
model: "old-embed",
hybrid: { enabled: true, vectorWeight: 0.5, textWeight: 0.5 },
});
const oldManager = await getFreshManager(oldCfg);
await oldManager.sync({ reason: "test", force: true });
await oldManager.close?.();
const nextCfg = createCfg({
storePath: dbPath,
provider: "gemini",
model: "new-embed",
hybrid: { enabled: true, vectorWeight: 0.5, textWeight: 0.5 },
});
const nextManager = await getFreshManager(nextCfg);
try {
expect(nextManager.status().dirty).toBe(true);
expect(nextManager.status().custom?.indexIdentity).toEqual({
status: "mismatched",
reason: "index was built for model old-embed, expected new-embed",
});
embedBatchCalls = 0;
const results = await nextManager.search("alpha");
expect(results).toStrictEqual([]);
expect(embedBatchCalls).toBe(0);
expect(nextManager.status().dirty).toBe(true);
await fs.writeFile(
path.join(memoryDir, "2026-01-12.md"),
"# Log\nAlpha memory line changed.\nZebra memory line.",
);
await nextManager.sync({ reason: "watch" });
expect(embedBatchCalls).toBe(0);
const stillPausedResults = await nextManager.search("alpha");
expect(stillPausedResults).toStrictEqual([]);
expect(nextManager.status().dirty).toBe(true);
expect(nextManager.status().custom?.indexIdentity).toEqual({
status: "mismatched",
reason: "index was built for model old-embed, expected new-embed",
});
} finally {
await nextManager.close?.();
}
});
it("keeps status clean when configured provider alias resolves to indexed adapter", async () => {
const dbPath = path.join(workspaceDir, "index-provider-alias-status.sqlite");
const oldCfg = createCfg({
storePath: dbPath,
provider: "ollama",
model: "ollama-embed",
hybrid: { enabled: true, vectorWeight: 0.5, textWeight: 0.5 },
});
const oldManager = await getFreshManager(oldCfg);
await oldManager.sync({ reason: "test", force: true });
await oldManager.close?.();
const aliasCfg = createCfg({
storePath: dbPath,
provider: "ollama-west",
providerAliases: {
"ollama-west": {
api: "ollama",
baseUrl: "http://127.0.0.1:11434",
models: [],
},
},
model: "ollama-embed",
hybrid: { enabled: true, vectorWeight: 0.5, textWeight: 0.5 },
});
const statusManager = await getFreshManager(aliasCfg, "status");
try {
const status = statusManager.status();
expect(status.dirty).toBe(false);
expect(status.custom?.indexIdentity).toEqual({ status: "valid" });
} finally {
await statusManager.close?.();
}
});
it("does not search stale rows when index metadata is missing", async () => {
const dbPath = path.join(workspaceDir, "index-missing-meta-cutover.sqlite");
const cfg = createCfg({
storePath: dbPath,
hybrid: { enabled: true, vectorWeight: 0.5, textWeight: 0.5 },
});
const oldManager = await getFreshManager(cfg);
await oldManager.sync({ reason: "test", force: true });
await oldManager.close?.();
await fs.rm(path.join(memoryDir, "2026-01-12.md"));
const nextManager = await getFreshManager(cfg);
try {
(
nextManager as unknown as {
db: { exec: (sql: string) => void };
}
).db.exec(`DELETE FROM meta WHERE key = 'memory_index_meta_v1'`);
expect(nextManager.status().custom?.indexIdentity).toEqual({
status: "missing",
reason: "index metadata is missing",
});
const results = await nextManager.search("alpha");
expect(results).toStrictEqual([]);
expect(nextManager.status().dirty).toBe(true);
expect(nextManager.status().custom?.indexIdentity).toEqual({
status: "missing",
reason: "index metadata is missing",
});
} finally {
await nextManager.close?.();
}
});
it("does not search stale provider rows after embeddings become unavailable", async () => {
const dbPath = path.join(workspaceDir, "index-provider-unavailable-cutover.sqlite");
const oldCfg = createCfg({
storePath: dbPath,
model: "semantic-embed",
hybrid: { enabled: true, vectorWeight: 0.5, textWeight: 0.5 },
});
const oldManager = await getFreshManager(oldCfg);
await oldManager.sync({ reason: "test", force: true });
await oldManager.close?.();
forceNoProvider = true;
const nextManager = await getFreshManager(oldCfg);
try {
const results = await nextManager.search("alpha");
expect(results).toStrictEqual([]);
expect(nextManager.status().dirty).toBe(true);
expect(nextManager.status().custom?.indexIdentity).toMatchObject({
status: "mismatched",
});
} finally {
await nextManager.close?.();
}
});
it("clears dirty after sessions-only identity reindex", async () => {
try {
vi.stubEnv("OPENCLAW_STATE_DIR", path.join(workspaceDir, ".state-sessions-only-reindex"));
const sessionsDir = resolveSessionTranscriptsDirForAgent("main");
await fs.mkdir(sessionsDir, { recursive: true });
await fs.writeFile(
path.join(sessionsDir, "session-identity.jsonl"),
[
JSON.stringify({
type: "session",
id: "session-identity",
timestamp: "2026-04-07T15:24:04.113Z",
}),
JSON.stringify({
type: "message",
message: {
role: "assistant",
timestamp: "2026-04-07T15:25:04.113Z",
content: [{ type: "text", text: "Session-only identity marker." }],
},
}),
].join("\n") + "\n",
"utf8",
);
const dbPath = path.join(workspaceDir, "index-sessions-only-cutover.sqlite");
const oldCfg = createCfg({
storePath: dbPath,
sources: ["sessions"],
sessionMemory: true,
model: "old-embed",
});
const oldManager = await getFreshManager(oldCfg);
await oldManager.sync({ reason: "test", force: true });
await oldManager.close?.();
const nextCfg = createCfg({
storePath: dbPath,
sources: ["sessions"],
sessionMemory: true,
provider: "gemini",
model: "new-embed",
});
const nextManager = await getFreshManager(nextCfg);
try {
expect(nextManager.status().dirty).toBe(true);
await nextManager.sync({ reason: "test", force: true });
expect(nextManager.status().dirty).toBe(false);
expect(nextManager.status().custom?.indexIdentity).toEqual({ status: "valid" });
} finally {
await nextManager.close?.();
}
} finally {
vi.unstubAllEnvs();
}
});
it("marks sessions-only indexes dirty when metadata is missing but chunks exist", async () => {
try {
vi.stubEnv("OPENCLAW_STATE_DIR", path.join(workspaceDir, ".state-sessions-missing-meta"));
const sessionsDir = resolveSessionTranscriptsDirForAgent("main");
await fs.mkdir(sessionsDir, { recursive: true });
await fs.writeFile(
path.join(sessionsDir, "session-missing-meta.jsonl"),
[
JSON.stringify({
type: "session",
id: "session-missing-meta",
timestamp: "2026-04-07T15:24:04.113Z",
}),
JSON.stringify({
type: "message",
message: {
role: "assistant",
timestamp: "2026-04-07T15:25:04.113Z",
content: [{ type: "text", text: "Sessions missing metadata marker." }],
},
}),
].join("\n") + "\n",
"utf8",
);
const dbPath = path.join(workspaceDir, "index-sessions-missing-meta.sqlite");
const cfg = createCfg({
storePath: dbPath,
sources: ["sessions"],
sessionMemory: true,
});
const oldManager = await getFreshManager(cfg);
await oldManager.sync({ reason: "test", force: true });
await oldManager.close?.();
const nextManager = await getFreshManager(cfg);
try {
(
nextManager as unknown as {
db: { exec: (sql: string) => void };
}
).db.exec(`DELETE FROM meta WHERE key = 'memory_index_meta_v1'`);
const status = nextManager.status();
expect(status.dirty).toBe(true);
expect(status.custom?.indexIdentity).toEqual({
status: "missing",
reason: "index metadata is missing",
});
} finally {
await nextManager.close?.();
}
} finally {
vi.unstubAllEnvs();
}
});
it("keeps provider cutover vector search paused during targeted session sync", async () => {
try {
vi.stubEnv("OPENCLAW_STATE_DIR", path.join(workspaceDir, ".state-targeted-cutover"));
const sessionsDir = resolveSessionTranscriptsDirForAgent("main");
await fs.mkdir(sessionsDir, { recursive: true });
const sessionFile = path.join(sessionsDir, "session-targeted-cutover.jsonl");
await fs.writeFile(
sessionFile,
[
JSON.stringify({
type: "session",
id: "session-targeted-cutover",
timestamp: "2026-04-07T15:24:04.113Z",
}),
JSON.stringify({
type: "message",
message: {
role: "assistant",
timestamp: "2026-04-07T15:25:04.113Z",
content: [{ type: "text", text: "Targeted cutover marker." }],
},
}),
].join("\n") + "\n",
"utf8",
);
const dbPath = path.join(workspaceDir, "index-targeted-session-cutover.sqlite");
const oldCfg = createCfg({
storePath: dbPath,
sources: ["memory", "sessions"],
sessionMemory: true,
model: "old-embed",
});
const oldManager = await getFreshManager(oldCfg);
await oldManager.sync({ reason: "test", force: true });
await oldManager.close?.();
const nextCfg = createCfg({
storePath: dbPath,
sources: ["memory", "sessions"],
sessionMemory: true,
provider: "gemini",
model: "new-embed",
});
const nextManager = await getFreshManager(nextCfg);
try {
expect(nextManager.status().dirty).toBe(true);
embedBatchCalls = 0;
await nextManager.sync({ reason: "test", sessionFiles: [sessionFile] });
expect(embedBatchCalls).toBe(0);
expect(nextManager.status().dirty).toBe(true);
expect(nextManager.status().custom?.indexIdentity).toEqual({
status: "mismatched",
reason: "index was built for model old-embed, expected new-embed",
});
const results = await nextManager.search("alpha");
expect(results).toStrictEqual([]);
} finally {
await nextManager.close?.();
}
} finally {
vi.unstubAllEnvs();
}
});
it("preserves memory dirty events raised during session identity reindex", async () => {
try {
vi.stubEnv("OPENCLAW_STATE_DIR", path.join(workspaceDir, ".state-dirty-during-session"));
const sessionsDir = resolveSessionTranscriptsDirForAgent("main");
await fs.mkdir(sessionsDir, { recursive: true });
await fs.writeFile(
path.join(sessionsDir, "session-dirty-during-reindex.jsonl"),
[
JSON.stringify({
type: "session",
id: "session-dirty-during-reindex",
timestamp: "2026-04-07T15:24:04.113Z",
}),
JSON.stringify({
type: "message",
message: {
role: "assistant",
timestamp: "2026-04-07T15:25:04.113Z",
content: [{ type: "text", text: "Dirty during session marker." }],
},
}),
].join("\n") + "\n",
"utf8",
);
const dbPath = path.join(workspaceDir, "index-dirty-during-session.sqlite");
const oldCfg = createCfg({
storePath: dbPath,
sources: ["memory", "sessions"],
sessionMemory: true,
model: "old-embed",
});
const oldManager = await getFreshManager(oldCfg);
await oldManager.sync({ reason: "test", force: true });
await oldManager.close?.();
const nextCfg = createCfg({
storePath: dbPath,
sources: ["memory", "sessions"],
sessionMemory: true,
provider: "gemini",
model: "new-embed",
});
const nextManager = await getFreshManager(nextCfg);
try {
const fields = nextManager as unknown as {
dirty: boolean;
syncSessionFiles: (params: unknown) => Promise<void>;
};
const syncSessionFiles = fields.syncSessionFiles.bind(nextManager);
fields.syncSessionFiles = async (params) => {
fields.dirty = true;
await syncSessionFiles(params);
};
await nextManager.sync({ reason: "test", force: true });
expect(nextManager.status().dirty).toBe(true);
expect(nextManager.status().custom?.indexIdentity).toEqual({ status: "valid" });
} finally {
await nextManager.close?.();
}
} finally {
vi.unstubAllEnvs();
}
});
it("closes embedding providers when memory index managers close", async () => {
const cfg = createCfg({
storePath: indexMainPath,
@@ -593,7 +1009,7 @@ describe("memory index", () => {
waitForEmbeddingRetry: (delayMs: number, action: string) => Promise<void>;
}
).provider = {
id: "openai",
id: "mock",
model: "mock-embed",
embedQuery: async () => {
queryCalls += 1;
@@ -637,7 +1053,7 @@ describe("memory index", () => {
};
}
).provider = {
id: "openai",
id: "mock",
model: "mock-embed",
embedQuery: async () => {
queryCalls += 1;
@@ -696,6 +1112,76 @@ describe("memory index", () => {
expect(status.vector?.available).toBeUndefined();
});
it("marks older vector indexes dirty after vector store probing", async () => {
const dbPath = path.join(workspaceDir, "index-vector-missing-dims.sqlite");
const legacyCfg = createCfg({
storePath: dbPath,
provider: "gemini",
vectorEnabled: false,
});
const legacyManager = await getFreshManager(legacyCfg);
await legacyManager.sync({ reason: "test", force: true });
await legacyManager.close?.();
const cfg = createCfg({
storePath: dbPath,
provider: "gemini",
vectorEnabled: true,
});
const manager = await getFreshManager(cfg);
try {
const metaAccess = manager as unknown as {
readMeta(): MemoryIndexMeta | null;
};
const meta = metaAccess.readMeta();
if (!meta) {
throw new Error("expected index metadata");
}
expect(meta.vectorDims).toBeUndefined();
await manager.probeVectorStoreAvailability?.();
const status = manager.status();
expect(status.dirty).toBe(true);
expect(status.custom?.indexIdentity).toEqual({
status: "mismatched",
reason: "index vector dimensions are missing",
});
} finally {
await manager.close?.();
}
});
it("keeps empty vector indexes clean after vector store probing", async () => {
await fs.rm(path.join(memoryDir, "2026-01-12.md"));
const dbPath = path.join(workspaceDir, "index-empty-vector.sqlite");
const legacyCfg = createCfg({
storePath: dbPath,
provider: "gemini",
vectorEnabled: false,
});
const legacyManager = await getFreshManager(legacyCfg);
await legacyManager.sync({ reason: "test", force: true });
await legacyManager.close?.();
const cfg = createCfg({
storePath: dbPath,
provider: "gemini",
vectorEnabled: true,
});
const manager = await getFreshManager(cfg, "status");
try {
await manager.probeVectorStoreAvailability?.();
const status = manager.status();
expect(status.dirty).toBe(false);
expect(status.custom?.indexIdentity).toEqual({ status: "valid" });
} finally {
await manager.close?.();
}
});
it("caches embedding probe readiness across transient status managers", async () => {
const cfg = createCfg({ storePath: path.join(workspaceDir, "index-probe-cache.sqlite") });
const first = requireManager(
@@ -778,7 +1264,7 @@ describe("memory index", () => {
});
});
it("activates configured fallback when local embeddings degrade during search", async () => {
it("does not activate fallback during search when index identity is already mismatched", async () => {
const cfg = createCfg({
storePath: path.join(workspaceDir, "index-search-degraded-fallback.sqlite"),
fallback: "fallback-provider",
@@ -810,21 +1296,68 @@ describe("memory index", () => {
const results = await manager.search("alpha");
expect(results.length).toBeGreaterThan(0);
const resultKeys = results.map(
(result) => `${result.source}:${result.path}:${result.startLine}:${result.endLine}`,
);
expect(new Set(resultKeys).size).toBe(resultKeys.length);
expect(providerCalls.slice(callsBeforeSearch).map((call) => call.provider)).toContain(
"fallback-provider",
);
expect(results).toStrictEqual([]);
expect(providerCalls.slice(callsBeforeSearch)).toStrictEqual([]);
expect(
(
manager as unknown as {
provider: { id: string } | null;
}
).provider?.id,
).toBe("fallback-provider");
).toBe("local");
});
it("rebuilds with fallback provider during explicit identity repair", async () => {
const dbPath = path.join(workspaceDir, "index-cli-fallback-identity-repair.sqlite");
const oldCfg = createCfg({
storePath: dbPath,
model: "old-embed",
});
const oldManager = await getFreshManager(oldCfg);
await oldManager.sync({ reason: "test", force: true });
await oldManager.close?.();
const cfg = createCfg({
storePath: dbPath,
model: "new-embed",
fallback: "fallback-provider",
});
const manager = await getFreshManager(cfg);
try {
expect(manager.status().dirty).toBe(true);
const fields = manager as unknown as {
providerInitialized: boolean;
provider: {
id: string;
model: string;
embedQuery: (text: string) => Promise<number[]>;
embedBatch: (texts: string[]) => Promise<number[][]>;
close: () => Promise<void>;
};
};
fields.providerInitialized = true;
fields.provider = {
id: "mock",
model: "new-embed",
embedQuery: async () => {
throw createLocalWorkerExitError();
},
embedBatch: async () => {
throw createLocalWorkerExitError();
},
close: async () => {},
};
await manager.sync({ reason: "cli" });
expect(manager.status().dirty).toBe(false);
expect(manager.status().provider).toBe("fallback-provider");
expect(manager.status().model).toBe("fallback-provider-embed");
expect(manager.status().custom?.indexIdentity).toEqual({ status: "valid" });
await expect(manager.search("alpha")).resolves.not.toStrictEqual([]);
} finally {
await manager.close?.();
}
});
it("activates configured fallback after probe-time local degradation", async () => {
@@ -866,7 +1399,7 @@ describe("memory index", () => {
const results = await manager.search("alpha");
expect(results.length).toBeGreaterThan(0);
expect(results).toStrictEqual([]);
expect(providerCalls.slice(callsBeforeSearch).map((call) => call.provider)).toContain(
"fallback-provider",
);
@@ -879,6 +1412,73 @@ describe("memory index", () => {
).toBe("fallback-provider");
});
it("clears identity dirty after status resolves the indexed fallback provider", async () => {
const dbPath = path.join(workspaceDir, "index-status-fallback-identity.sqlite");
const indexedCfg = createCfg({
storePath: dbPath,
provider: "fallback-provider",
model: "new-embed",
});
const indexedManager = await getFreshManager(indexedCfg);
await indexedManager.sync({ reason: "test", force: true });
await indexedManager.close?.();
const cfg = createCfg({
storePath: dbPath,
fallback: "fallback-provider",
model: "new-embed",
});
const { getRequiredMemoryIndexManager } = await import("./test-manager-helpers.js");
const manager = await getRequiredMemoryIndexManager({
cfg,
agentId: "main",
purpose: "status",
});
try {
expect(manager.status().dirty).toBe(true);
const fields = manager as unknown as {
provider: {
id: string;
model: string;
embedQuery: (text: string) => Promise<number[]>;
embedBatch: (texts: string[]) => Promise<number[][]>;
close: () => Promise<void>;
};
providerInitialized: boolean;
providerRuntime: {
id: string;
cacheKeyData: Record<string, unknown>;
};
providerKey: string;
computeProviderKey: () => string;
};
fields.provider = {
id: "fallback-provider",
model: "new-embed",
embedQuery: async () => [1, 0, 0, 0],
embedBatch: async (texts) => texts.map(() => [1, 0, 0, 0]),
close: async () => {},
};
fields.providerRuntime = {
id: "fallback-provider",
cacheKeyData: {
provider: "fallback-provider",
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
model: "new-embed",
headers: [],
},
};
fields.providerInitialized = true;
fields.providerKey = fields.computeProviderKey();
expect(manager.status().dirty).toBe(false);
expect(manager.status().custom?.indexIdentity).toEqual({ status: "valid" });
} finally {
await manager.close?.();
}
});
it("streams embedding cache rows during safe reindex", async () => {
vi.stubEnv("OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX", "0");
type EmbeddingCacheRow = {

View File

@@ -3,7 +3,8 @@ import { describe, expect, it } from "vitest";
import {
resolveConfiguredScopeHash,
resolveConfiguredSourcesForMeta,
shouldRunFullMemoryReindex,
resolveMemoryIndexIdentityState,
isMemoryIndexIdentityDirty,
type MemoryIndexMeta,
} from "./manager-reindex-state.js";
@@ -21,16 +22,18 @@ function createMeta(overrides: Partial<MemoryIndexMeta> = {}): MemoryIndexMeta {
};
}
function createFullReindexParams(
function createIdentityParams(
overrides: {
meta?: MemoryIndexMeta | null;
provider?: { id: string; model: string } | null;
providerKey?: string;
providerKeyKnown?: boolean;
configuredSources?: MemorySource[];
configuredScopeHash?: string;
chunkTokens?: number;
chunkOverlap?: number;
vectorReady?: boolean;
hasIndexedChunks?: boolean;
ftsTokenizer?: string;
} = {},
) {
@@ -43,26 +46,41 @@ function createFullReindexParams(
chunkTokens: 4000,
chunkOverlap: 0,
vectorReady: false,
hasIndexedChunks: true,
ftsTokenizer: "unicode61",
...overrides,
};
}
describe("memory reindex state", () => {
it("requires a full reindex when the embedding model changes", () => {
it("marks identity dirty when the embedding model changes", () => {
expect(
shouldRunFullMemoryReindex(
createFullReindexParams({
isMemoryIndexIdentityDirty(
createIdentityParams({
provider: { id: "openai", model: "mock-embed-v2" },
}),
),
).toBe(true);
});
it("requires a full reindex when the provider cache key changes", () => {
it("returns a mismatch reason when provider identity changes", () => {
expect(
shouldRunFullMemoryReindex(
createFullReindexParams({
resolveMemoryIndexIdentityState(
createIdentityParams({
provider: { id: "ollama", model: "mock-embed-v1" },
providerKey: "provider-key-ollama",
}),
),
).toEqual({
status: "mismatched",
reason: "index was built for provider openai, expected ollama",
});
});
it("marks identity dirty when the provider cache key changes", () => {
expect(
isMemoryIndexIdentityDirty(
createIdentityParams({
provider: { id: "gemini", model: "gemini-embedding-2-preview" },
providerKey: "provider-key-dims-768",
meta: createMeta({
@@ -75,7 +93,30 @@ describe("memory reindex state", () => {
).toBe(true);
});
it("requires a full reindex when extraPaths change", () => {
it("can defer provider key comparison until provider initialization", () => {
expect(
resolveMemoryIndexIdentityState(
createIdentityParams({
providerKey: undefined,
providerKeyKnown: false,
}),
),
).toEqual({ status: "valid" });
});
it("does not mark identity dirty for vector dimensions before chunks exist", () => {
expect(
resolveMemoryIndexIdentityState(
createIdentityParams({
vectorReady: true,
hasIndexedChunks: false,
meta: createMeta({ vectorDims: undefined }),
}),
),
).toEqual({ status: "valid" });
});
it("marks identity dirty when extraPaths change", () => {
const workspaceDir = "/tmp/workspace";
const firstScopeHash = resolveConfiguredScopeHash({
workspaceDir,
@@ -97,8 +138,8 @@ describe("memory reindex state", () => {
});
expect(
shouldRunFullMemoryReindex(
createFullReindexParams({
isMemoryIndexIdentityDirty(
createIdentityParams({
meta: createMeta({ scopeHash: firstScopeHash }),
configuredScopeHash: secondScopeHash,
}),
@@ -106,17 +147,17 @@ describe("memory reindex state", () => {
).toBe(true);
});
it("requires a full reindex when configured sources add sessions", () => {
it("marks identity dirty when configured sources add sessions", () => {
expect(
shouldRunFullMemoryReindex(
createFullReindexParams({
isMemoryIndexIdentityDirty(
createIdentityParams({
configuredSources: ["memory", "sessions"],
}),
),
).toBe(true);
});
it("requires a full reindex when multimodal settings change", () => {
it("marks identity dirty when multimodal settings change", () => {
const workspaceDir = "/tmp/workspace";
const firstScopeHash = resolveConfiguredScopeHash({
workspaceDir,
@@ -138,8 +179,8 @@ describe("memory reindex state", () => {
});
expect(
shouldRunFullMemoryReindex(
createFullReindexParams({
isMemoryIndexIdentityDirty(
createIdentityParams({
meta: createMeta({ scopeHash: firstScopeHash }),
configuredScopeHash: secondScopeHash,
}),
@@ -149,8 +190,8 @@ describe("memory reindex state", () => {
it("keeps older indexes with missing sources compatible with memory-only config", () => {
expect(
shouldRunFullMemoryReindex(
createFullReindexParams({
isMemoryIndexIdentityDirty(
createIdentityParams({
meta: createMeta({ sources: undefined }),
configuredSources: resolveConfiguredSourcesForMeta(new Set(["memory"])),
}),

View File

@@ -16,6 +16,19 @@ export type MemoryIndexMeta = {
ftsTokenizer?: string;
};
export type MemoryIndexIdentityState =
| {
status: "valid";
}
| {
status: "missing";
reason: string;
}
| {
status: "mismatched";
reason: string;
};
export function resolveConfiguredSourcesForMeta(sources: Iterable<MemorySource>): MemorySource[] {
const normalized = Array.from(sources)
.filter((source): source is MemorySource => source === "memory" || source === "sessions")
@@ -73,31 +86,93 @@ export function resolveConfiguredScopeHash(params: {
);
}
export function shouldRunFullMemoryReindex(params: {
export function isMemoryIndexIdentityDirty(params: {
meta: MemoryIndexMeta | null;
provider: { id: string; model: string } | null;
providerKey?: string;
providerKeyKnown?: boolean;
configuredSources: MemorySource[];
configuredScopeHash: string;
chunkTokens: number;
chunkOverlap: number;
vectorReady: boolean;
hasIndexedChunks?: boolean;
ftsTokenizer: string;
}): boolean {
return resolveMemoryIndexIdentityState(params).status !== "valid";
}
export function resolveMemoryIndexIdentityState(params: {
meta: MemoryIndexMeta | null;
provider: { id: string; model: string } | null;
providerKey?: string;
providerKeyKnown?: boolean;
configuredSources: MemorySource[];
configuredScopeHash: string;
chunkTokens: number;
chunkOverlap: number;
vectorReady: boolean;
hasIndexedChunks?: boolean;
ftsTokenizer: string;
}): MemoryIndexIdentityState {
const { meta } = params;
return (
!meta ||
(params.provider ? meta.model !== params.provider.model : meta.model !== "fts-only") ||
(params.provider ? meta.provider !== params.provider.id : meta.provider !== "none") ||
meta.providerKey !== params.providerKey ||
if (!meta) {
return { status: "missing", reason: "index metadata is missing" };
}
const expectedModel = params.provider ? params.provider.model : "fts-only";
if (meta.model !== expectedModel) {
return {
status: "mismatched",
reason: `index was built for model ${meta.model}, expected ${expectedModel}`,
};
}
const expectedProvider = params.provider ? params.provider.id : "none";
if (meta.provider !== expectedProvider) {
return {
status: "mismatched",
reason: `index was built for provider ${meta.provider}, expected ${expectedProvider}`,
};
}
if (params.providerKeyKnown !== false && meta.providerKey !== params.providerKey) {
return {
status: "mismatched",
reason: "index provider settings changed",
};
}
if (
configuredMetaSourcesDiffer({
meta,
configuredSources: params.configuredSources,
}) ||
meta.scopeHash !== params.configuredScopeHash ||
meta.chunkTokens !== params.chunkTokens ||
meta.chunkOverlap !== params.chunkOverlap ||
(params.vectorReady && !meta.vectorDims) ||
(meta.ftsTokenizer ?? "unicode61") !== params.ftsTokenizer
);
})
) {
return {
status: "mismatched",
reason: "index sources changed",
};
}
if (meta.scopeHash !== params.configuredScopeHash) {
return {
status: "mismatched",
reason: "index scope changed",
};
}
if (meta.chunkTokens !== params.chunkTokens || meta.chunkOverlap !== params.chunkOverlap) {
return {
status: "mismatched",
reason: "index chunking changed",
};
}
if (params.vectorReady && params.hasIndexedChunks !== false && !meta.vectorDims) {
return {
status: "mismatched",
reason: "index vector dimensions are missing",
};
}
if ((meta.ftsTokenizer ?? "unicode61") !== params.ftsTokenizer) {
return {
status: "mismatched",
reason: "index FTS tokenizer changed",
};
}
return { status: "valid" };
}

View File

@@ -573,7 +573,11 @@ describe("searchVector sqlite-vec KNN", () => {
function insertFallbackChunk(
db: InstanceType<typeof DatabaseSync>,
params: { id: string; model: string; vector: number[] },
params: {
id: string;
model: string;
vector: number[];
},
): void {
db.prepare(
"INSERT INTO chunks (id, path, source, start_line, end_line, hash, model, text, embedding, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",

View File

@@ -28,6 +28,17 @@ describe("memory manager status state", () => {
).toBe(true);
});
it("marks status-only managers dirty when index identity mismatches", () => {
expect(
resolveInitialMemoryDirty({
hasMemorySource: false,
statusOnly: true,
hasIndexedMeta: true,
indexIdentityMismatched: true,
}),
).toBe(true);
});
it("reports the requested provider before provider initialization", () => {
expect(
resolveStatusProviderInfo({

View File

@@ -27,8 +27,12 @@ export function resolveInitialMemoryDirty(params: {
hasMemorySource: boolean;
statusOnly: boolean;
hasIndexedMeta: boolean;
indexIdentityMismatched?: boolean;
}): boolean {
return params.hasMemorySource && (params.statusOnly ? !params.hasIndexedMeta : true);
return (
Boolean(params.indexIdentityMismatched) ||
(params.hasMemorySource && (params.statusOnly ? !params.hasIndexedMeta : true))
);
}
export function resolveStatusProviderInfo(params: {

View File

@@ -38,6 +38,7 @@ import { resolveTimerTimeoutMs } from "openclaw/plugin-sdk/number-runtime";
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/string-coerce-runtime";
import {
createEmbeddingProvider,
resolveEmbeddingProviderAdapterId,
type EmbeddingProvider,
type EmbeddingProviderId,
type EmbeddingProviderRuntime,
@@ -54,8 +55,9 @@ import {
import {
resolveConfiguredScopeHash,
resolveConfiguredSourcesForMeta,
shouldRunFullMemoryReindex,
resolveMemoryIndexIdentityState,
type MemoryIndexMeta,
type MemoryIndexIdentityState,
} from "./manager-reindex-state.js";
import { shouldSyncSessionsForReindex } from "./manager-session-reindex.js";
import {
@@ -67,7 +69,10 @@ import {
loadMemorySourceFileState,
resolveMemorySourceExistingHash,
} from "./manager-source-state.js";
import { runMemoryTargetedSessionSync } from "./manager-targeted-sync.js";
import {
markMemoryTargetSessionFilesDirty,
runMemoryTargetedSessionSync,
} from "./manager-targeted-sync.js";
import {
recordMemoryWatchEventPath,
settleMemoryWatchEventPaths,
@@ -269,6 +274,65 @@ export abstract class MemoryManagerSyncOps {
options: { source: MemorySource; content?: string },
): Promise<void>;
protected hasIndexedChunks(): boolean {
const row = this.db.prepare(`SELECT 1 as found FROM chunks LIMIT 1`).get() as
| { found?: number }
| undefined;
return row?.found === 1;
}
protected resolveCurrentIndexIdentityState(params?: {
meta?: MemoryIndexMeta | null;
provider?: { id: string; model: string } | null;
providerKeyKnown?: boolean;
vectorReady?: boolean;
hasIndexedChunks?: boolean;
}): MemoryIndexIdentityState {
const hasProviderOverride = params && "provider" in params;
const configuredProvider =
this.settings.provider === "none"
? null
: {
id:
resolveEmbeddingProviderAdapterId(this.settings.provider, this.cfg) ??
this.settings.provider,
model: this.settings.model,
};
const provider = hasProviderOverride
? params.provider!
: this.provider
? { id: this.provider.id, model: this.provider.model }
: configuredProvider;
const vectorReady =
params && "vectorReady" in params
? Boolean(params.vectorReady)
: this.vector.available === true;
return resolveMemoryIndexIdentityState({
meta: params && "meta" in params ? params.meta! : this.readMeta(),
provider,
providerKey: params?.providerKeyKnown === false ? undefined : (this.providerKey ?? undefined),
providerKeyKnown: params?.providerKeyKnown,
configuredSources: resolveConfiguredSourcesForMeta(this.sources),
configuredScopeHash: resolveConfiguredScopeHash({
workspaceDir: this.workspaceDir,
extraPaths: this.settings.extraPaths,
multimodal: {
enabled: this.settings.multimodal.enabled,
modalities: this.settings.multimodal.modalities,
maxFileBytes: this.settings.multimodal.maxFileBytes,
},
}),
chunkTokens: this.settings.chunking.tokens,
chunkOverlap: this.settings.chunking.overlap,
vectorReady,
hasIndexedChunks:
params && "hasIndexedChunks" in params
? Boolean(params.hasIndexedChunks)
: this.hasIndexedChunks(),
ftsTokenizer: this.settings.store.fts.tokenizer,
});
}
protected resetVectorState(): void {
this.vectorReady = null;
this.vector.available = null;
@@ -1691,60 +1755,69 @@ export abstract class MemoryManagerSyncOps {
}
const vectorReady = await this.ensureVectorReady();
const meta = this.readMeta();
const configuredSources = resolveConfiguredSourcesForMeta(this.sources);
const configuredScopeHash = resolveConfiguredScopeHash({
workspaceDir: this.workspaceDir,
extraPaths: this.settings.extraPaths,
multimodal: {
enabled: this.settings.multimodal.enabled,
modalities: this.settings.multimodal.modalities,
maxFileBytes: this.settings.multimodal.maxFileBytes,
},
});
const targetSessionFiles = this.normalizeTargetSessionFiles(params?.sessionFiles);
const hasTargetSessionFiles = targetSessionFiles !== null;
if (params?.reason === "cli" && !params.force && !hasTargetSessionFiles) {
await this.markSessionStartupCatchupDirtyFiles();
}
const targetedSessionSync = await runMemoryTargetedSessionSync({
hasSessionSource: this.sources.has("sessions"),
targetSessionFiles,
reason: params?.reason,
progress: progress ?? undefined,
useUnsafeReindex:
process.env.OPENCLAW_TEST_FAST === "1" &&
process.env.OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX === "1",
sessionsDirtyFiles: this.sessionsDirtyFiles,
syncSessionFiles: async (targetedParams) => {
await this.syncSessionFiles(targetedParams);
},
shouldFallbackOnError: (err) => this.shouldFallbackOnError(err),
activateFallbackProvider: async (reason) => await this.activateFallbackProvider(reason),
runSafeReindex: async (reindexParams) => {
await this.runSafeReindex(reindexParams);
},
runUnsafeReindex: async (reindexParams) => {
await this.runUnsafeReindex(reindexParams);
},
const indexIdentity = resolveMemoryIndexIdentityState({
meta,
// Also detects provider→FTS-only transitions so orphaned old-model FTS rows are cleaned up.
provider: this.provider ? { id: this.provider.id, model: this.provider.model } : null,
providerKey: this.providerKey ?? undefined,
configuredSources: resolveConfiguredSourcesForMeta(this.sources),
configuredScopeHash: resolveConfiguredScopeHash({
workspaceDir: this.workspaceDir,
extraPaths: this.settings.extraPaths,
multimodal: {
enabled: this.settings.multimodal.enabled,
modalities: this.settings.multimodal.modalities,
maxFileBytes: this.settings.multimodal.maxFileBytes,
},
}),
chunkTokens: this.settings.chunking.tokens,
chunkOverlap: this.settings.chunking.overlap,
vectorReady,
hasIndexedChunks: this.hasIndexedChunks(),
ftsTokenizer: this.settings.store.fts.tokenizer,
});
if (targetedSessionSync.handled) {
this.sessionsDirty = targetedSessionSync.sessionsDirty;
return;
}
const hasIndexedChunks = this.hasIndexedChunks();
const needsInitialIndex = indexIdentity.status !== "valid" && !hasIndexedChunks;
const needsExplicitIdentityReindex =
params?.reason === "cli" && indexIdentity.status !== "valid" && !hasTargetSessionFiles;
const needsFullReindex =
(params?.force && !hasTargetSessionFiles) ||
shouldRunFullMemoryReindex({
meta,
// Also detects provider→FTS-only transitions so orphaned old-model FTS rows are cleaned up.
provider: this.provider ? { id: this.provider.id, model: this.provider.model } : null,
providerKey: this.providerKey ?? undefined,
configuredSources,
configuredScopeHash,
chunkTokens: this.settings.chunking.tokens,
chunkOverlap: this.settings.chunking.overlap,
vectorReady,
ftsTokenizer: this.settings.store.fts.tokenizer,
needsInitialIndex ||
needsExplicitIdentityReindex;
if (indexIdentity.status !== "valid" && !needsFullReindex) {
this.dirty = true;
const sessionsDirty = markMemoryTargetSessionFilesDirty({
sessionsDirtyFiles: this.sessionsDirtyFiles,
targetSessionFiles,
});
if (sessionsDirty) {
this.sessionsDirty = true;
}
return;
}
if (!needsFullReindex) {
const targetedSessionSync = await runMemoryTargetedSessionSync({
hasSessionSource: this.sources.has("sessions"),
targetSessionFiles,
reason: params?.reason,
progress: progress ?? undefined,
sessionsDirtyFiles: this.sessionsDirtyFiles,
syncSessionFiles: async (targetedParams) => {
await this.syncSessionFiles(targetedParams);
},
shouldFallbackOnError: (err) => this.shouldFallbackOnError(err),
activateFallbackProvider: async (reason) => await this.activateFallbackProvider(reason),
});
if (targetedSessionSync.handled) {
this.sessionsDirty = targetedSessionSync.sessionsDirty;
return;
}
}
try {
if (needsFullReindex) {
if (
@@ -1794,20 +1867,17 @@ export abstract class MemoryManagerSyncOps {
const activated =
this.shouldFallbackOnError(err) && (await this.activateFallbackProvider(reason));
if (activated) {
await this.runSafeReindex({
reason: params?.reason ?? "fallback",
force: true,
progress: progress ?? undefined,
});
if (needsFullReindex && !hasTargetSessionFiles) {
await this.runSafeReindex({
reason: params?.reason ?? "fallback",
force: true,
progress: progress ?? undefined,
});
}
return;
}
if (!this.provider && this.fts.enabled && this.shouldFallbackOnError(err)) {
log.warn(`memory embeddings unavailable; rebuilding lexical memory index only: ${reason}`);
await this.runSafeReindex({
reason: params?.reason ?? "embedding-degraded",
force: true,
progress: progress ?? undefined,
});
log.warn(`memory embeddings unavailable; leaving memory index dirty: ${reason}`);
return;
}
throw err;
@@ -1965,6 +2035,9 @@ export abstract class MemoryManagerSyncOps {
} else {
this.sessionsDirty = false;
}
if (!shouldSyncMemory) {
this.dirty = false;
}
const meta: MemoryIndexMeta = {
model: this.provider?.model ?? "fts-only",
@@ -2045,6 +2118,9 @@ export abstract class MemoryManagerSyncOps {
} else {
this.sessionsDirty = false;
}
if (!shouldSyncMemory) {
this.dirty = false;
}
const nextMeta: MemoryIndexMeta = {
model: this.provider?.model ?? "fts-only",

View File

@@ -38,6 +38,7 @@ vi.mock("openclaw/plugin-sdk/memory-core-host-engine-qmd", () => {
});
vi.mock("./embeddings.js", () => ({
resolveEmbeddingProviderAdapterId: (providerId: string) => providerId,
createEmbeddingProvider: vi.fn(),
}));

View File

@@ -1,6 +1,7 @@
import { describe, expect, it, vi } from "vitest";
import {
clearMemorySyncedSessionFiles,
markMemoryTargetSessionFilesDirty,
runMemoryTargetedSessionSync,
} from "./manager-targeted-sync.js";
@@ -18,61 +19,48 @@ describe("memory targeted session sync", () => {
expect(sessionsDirty).toBe(true);
});
it("runs a full reindex after fallback activates during targeted sync", async () => {
const activateFallbackProvider = vi.fn(async () => true);
const runSafeReindex = vi.fn(async () => {});
const runUnsafeReindex = vi.fn(async () => {});
it("marks target sessions dirty while identity sync is paused", () => {
const targetSessionPath = "/tmp/paused-target.jsonl";
const sessionsDirtyFiles = new Set(["/tmp/other-dirty.jsonl"]);
await runMemoryTargetedSessionSync({
const sessionsDirty = markMemoryTargetSessionFilesDirty({
sessionsDirtyFiles,
targetSessionFiles: [targetSessionPath],
});
expect(sessionsDirty).toBe(true);
expect(sessionsDirtyFiles.has(targetSessionPath)).toBe(true);
expect(sessionsDirtyFiles.has("/tmp/other-dirty.jsonl")).toBe(true);
});
it("leaves targeted sessions dirty after fallback activates during targeted sync", async () => {
const activateFallbackProvider = vi.fn(async () => true);
const syncSessionFiles = vi
.fn()
.mockRejectedValueOnce(new Error("embedding backend failed"))
.mockResolvedValueOnce(undefined);
const sessionsDirtyFiles = new Set(["/tmp/targeted-fallback.jsonl", "/tmp/other-dirty.jsonl"]);
const result = await runMemoryTargetedSessionSync({
hasSessionSource: true,
targetSessionFiles: new Set(["/tmp/targeted-fallback.jsonl"]),
reason: "post-compaction",
progress: undefined,
useUnsafeReindex: false,
sessionsDirtyFiles: new Set(),
syncSessionFiles: async () => {
throw new Error("embedding backend failed");
},
sessionsDirtyFiles,
syncSessionFiles,
shouldFallbackOnError: () => true,
activateFallbackProvider,
runSafeReindex,
runUnsafeReindex,
});
expect(activateFallbackProvider).toHaveBeenCalledWith("embedding backend failed");
expect(runSafeReindex).toHaveBeenCalledWith({
reason: "post-compaction",
force: true,
expect(syncSessionFiles).toHaveBeenCalledTimes(1);
expect(syncSessionFiles).toHaveBeenCalledWith({
needsFullReindex: false,
targetSessionFiles: ["/tmp/targeted-fallback.jsonl"],
progress: undefined,
});
expect(runUnsafeReindex).not.toHaveBeenCalled();
});
it("uses the unsafe reindex path when enabled", async () => {
const runSafeReindex = vi.fn(async () => {});
const runUnsafeReindex = vi.fn(async () => {});
await runMemoryTargetedSessionSync({
hasSessionSource: true,
targetSessionFiles: new Set(["/tmp/targeted-fallback.jsonl"]),
reason: "post-compaction",
progress: undefined,
useUnsafeReindex: true,
sessionsDirtyFiles: new Set(),
syncSessionFiles: async () => {
throw new Error("embedding backend failed");
},
shouldFallbackOnError: () => true,
activateFallbackProvider: async () => true,
runSafeReindex,
runUnsafeReindex,
});
expect(runUnsafeReindex).toHaveBeenCalledWith({
reason: "post-compaction",
force: true,
progress: undefined,
});
expect(runSafeReindex).not.toHaveBeenCalled();
expect(result).toEqual({ handled: true, sessionsDirty: true });
expect(sessionsDirtyFiles.has("/tmp/targeted-fallback.jsonl")).toBe(true);
expect(sessionsDirtyFiles.has("/tmp/other-dirty.jsonl")).toBe(true);
});
});

View File

@@ -22,12 +22,23 @@ export function clearMemorySyncedSessionFiles(params: {
return params.sessionsDirtyFiles.size > 0;
}
export function markMemoryTargetSessionFilesDirty(params: {
sessionsDirtyFiles: Set<string>;
targetSessionFiles?: Iterable<string> | null;
}): boolean {
if (params.targetSessionFiles) {
for (const targetSessionFile of params.targetSessionFiles) {
params.sessionsDirtyFiles.add(targetSessionFile);
}
}
return params.sessionsDirtyFiles.size > 0;
}
export async function runMemoryTargetedSessionSync(params: {
hasSessionSource: boolean;
targetSessionFiles: Set<string> | null;
reason?: string;
progress?: TargetedSyncProgress;
useUnsafeReindex: boolean;
sessionsDirtyFiles: Set<string>;
syncSessionFiles: (params: {
needsFullReindex: boolean;
@@ -36,16 +47,6 @@ export async function runMemoryTargetedSessionSync(params: {
}) => Promise<void>;
shouldFallbackOnError: (err: unknown) => boolean;
activateFallbackProvider: (reason: string) => Promise<boolean>;
runSafeReindex: (params: {
reason?: string;
force?: boolean;
progress?: TargetedSyncProgress;
}) => Promise<void>;
runUnsafeReindex: (params: {
reason?: string;
force?: boolean;
progress?: TargetedSyncProgress;
}) => Promise<void>;
}): Promise<{ handled: boolean; sessionsDirty: boolean }> {
if (!params.hasSessionSource || !params.targetSessionFiles) {
return {
@@ -74,19 +75,12 @@ export async function runMemoryTargetedSessionSync(params: {
if (!activated) {
throw err;
}
const reindexParams = {
reason: params.reason,
force: true,
progress: params.progress,
};
if (params.useUnsafeReindex) {
await params.runUnsafeReindex(reindexParams);
} else {
await params.runSafeReindex(reindexParams);
}
return {
handled: true,
sessionsDirty: params.sessionsDirtyFiles.size > 0,
sessionsDirty: markMemoryTargetSessionFilesDirty({
sessionsDirtyFiles: params.sessionsDirtyFiles,
targetSessionFiles: params.targetSessionFiles,
}),
};
}
}

View File

@@ -15,6 +15,7 @@ vi.mock("./embeddings.js", () => ({
provider: null,
providerUnavailableReason: "No embeddings provider available.",
}),
resolveEmbeddingProviderAdapterId: (providerId: string) => providerId,
resolveEmbeddingProviderFallbackModel: () => "fts-only",
}));

View File

@@ -47,6 +47,7 @@ import {
resolveMemoryProviderState,
type MemoryProviderLifecycleState,
} from "./manager-provider-state.js";
import type { MemoryIndexIdentityState } from "./manager-reindex-state.js";
import { resolveMemorySearchPreflight } from "./manager-search-preflight.js";
import { searchKeyword, searchVector } from "./manager-search.js";
import {
@@ -171,6 +172,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
protected override sessionsDirty = false;
protected override sessionsDirtyFiles = new Set<string>();
protected override sessionPendingFiles = new Set<string>();
private indexIdentityDirty = false;
protected override sessionDeltas = new Map<
string,
{ lastSize: number; pendingBytes: number; pendingMessages: number }
@@ -183,6 +185,10 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
private readonlyRecoverySuccesses = 0;
private readonlyRecoveryFailures = 0;
private readonlyRecoveryLastError?: string;
private indexIdentityState: MemoryIndexIdentityState = {
status: "missing",
reason: "index metadata is missing",
};
private static async loadProviderResult(params: {
cfg: OpenClawConfig;
@@ -267,6 +273,14 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
if (meta?.vectorDims) {
this.vector.dims = meta.vectorDims;
}
const initialIndexIdentity = this.resolveCurrentIndexIdentityState({
meta,
providerKeyKnown: Boolean(params.providerResult),
});
this.indexIdentityState = initialIndexIdentity;
this.indexIdentityDirty =
initialIndexIdentity.status === "mismatched" ||
(initialIndexIdentity.status === "missing" && this.sources.has("memory"));
const transient = params.purpose === "status" || params.purpose === "cli";
if (!transient) {
this.ensureWatcher();
@@ -377,6 +391,23 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
}
}
private refreshIndexIdentityDirty(params?: { providerKeyKnown?: boolean }) {
const provider = this.providerInitialized
? this.provider
? { id: this.provider.id, model: this.provider.model }
: null
: undefined;
const state = this.resolveCurrentIndexIdentityState({
...(provider !== undefined ? { provider } : {}),
providerKeyKnown: params?.providerKeyKnown,
});
this.indexIdentityState = state;
this.indexIdentityDirty =
state.status === "mismatched" ||
(state.status === "missing" && (this.sources.has("memory") || this.hasIndexedChunks()));
return state;
}
async search(
query: string,
opts?: {
@@ -423,6 +454,27 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
if (preflight.shouldInitializeProvider) {
await this.ensureProviderInitialized();
}
if (!this.provider && this.providerLifecycle.mode === "degraded") {
const activatedFallback = await this.activateFallbackProvider(
this.providerLifecycle.reason,
).catch((fallbackErr: unknown) => {
log.warn(
`memory search: failed to activate fallback provider: ${formatErrorMessage(fallbackErr)}`,
);
return false;
});
if (activatedFallback) {
this.refreshIndexIdentityDirty({
providerKeyKnown: this.providerInitialized,
});
}
}
const indexIdentity = this.refreshIndexIdentityDirty({
providerKeyKnown: this.providerInitialized,
});
if (indexIdentity.status !== "valid") {
return [];
}
const minScore = opts?.minScore ?? this.settings.query.minScore;
const maxResults = opts?.maxResults ?? this.settings.query.maxResults;
const searchSources =
@@ -443,20 +495,6 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
Math.max(1, Math.floor(maxResults * hybrid.candidateMultiplier)),
);
if (!this.provider && this.providerLifecycle.mode === "degraded") {
const activatedFallback = await this.activateFallbackProvider(
this.providerLifecycle.reason,
).catch((fallbackErr: unknown) => {
log.warn(
`memory search: failed to activate fallback provider: ${formatErrorMessage(fallbackErr)}`,
);
return false;
});
if (activatedFallback) {
await this.runSafeReindex({ reason: "fallback", force: true });
}
}
// FTS-only mode: no embedding provider available
if (!this.provider) {
if (!this.fts.enabled || !this.fts.available) {
@@ -552,7 +590,13 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
})
: false;
if (activatedFallback) {
await this.runSafeReindex({ reason: "fallback", force: true });
if (
this.refreshIndexIdentityDirty({
providerKeyKnown: this.providerInitialized,
}).status !== "valid"
) {
return [];
}
keywordResults = await loadKeywordResults();
queryVec = await this.embedQueryWithRetry(cleaned);
} else if (!this.provider && this.fts.enabled && this.fts.available) {
@@ -856,6 +900,9 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
}
status(): MemoryProviderStatus {
this.refreshIndexIdentityDirty({
providerKeyKnown: this.providerInitialized,
});
const sourceFilter = this.buildSourceFilter();
const aggregateState = collectMemoryStatusAggregate({
db: {
@@ -884,7 +931,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
backend: "builtin",
files: aggregateState.files,
chunks: aggregateState.chunks,
dirty: this.dirty || this.sessionsDirty,
dirty: this.dirty || this.sessionsDirty || this.indexIdentityDirty,
workspaceDir: this.workspaceDir,
dbPath: this.settings.store.path,
provider: providerInfo.provider,
@@ -937,6 +984,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
searchMode: providerInfo.searchMode,
providerState: this.providerLifecycle,
providerUnavailableReason: this.providerUnavailableReason,
indexIdentity: this.indexIdentityState,
readonlyRecovery: {
attempts: this.readonlyRecoveryAttempts,
successes: this.readonlyRecoverySuccesses,

View File

@@ -126,6 +126,7 @@ vi.mock("./sqlite-vec.js", () => ({
}));
vi.mock("./embeddings.js", () => ({
resolveEmbeddingProviderAdapterId: (providerId: string) => providerId,
createEmbeddingProvider: async () => ({
requestedProvider: "openai",
provider: {

View File

@@ -117,15 +117,25 @@ export function createMemoryTool(params: {
};
}
export function buildMemorySearchUnavailableResult(error: string | undefined) {
export function buildMemorySearchUnavailableResult(
error: string | undefined,
overrides?: {
warning?: string;
action?: string;
},
) {
const reason = (error ?? "memory search unavailable").trim() || "memory search unavailable";
const isQuotaError = /insufficient_quota|quota|429/.test(normalizeLowercaseStringOrEmpty(reason));
const warning = isQuotaError
? "Memory search is unavailable because the embedding provider quota is exhausted."
: "Memory search is unavailable due to an embedding/provider error.";
const action = isQuotaError
? "Top up or switch embedding provider, then retry memory_search."
: "Check embedding provider configuration and retry memory_search.";
const warning =
overrides?.warning ??
(isQuotaError
? "Memory search is unavailable because the embedding provider quota is exhausted."
: "Memory search is unavailable due to an embedding/provider error.");
const action =
overrides?.action ??
(isQuotaError
? "Top up or switch embedding provider, then retry memory_search."
: "Check embedding provider configuration and retry memory_search.");
return {
results: [],
disabled: true,

View File

@@ -3,8 +3,10 @@ import {
getMemorySearchManagerMockCalls,
getMemorySearchManagerMockConfigs,
getMemorySearchManagerMockParams,
getMemorySyncMockCalls,
resetMemoryToolMockState,
setMemoryBackend,
setMemoryCustomStatus,
setMemorySearchImpl,
setMemorySearchManagerImpl,
} from "./memory-tool-manager-mock.js";
@@ -256,6 +258,39 @@ describe("memory_search unavailable payloads", () => {
expect(searchCalls).toBe(2);
});
it("returns unavailable metadata when the index identity is paused", async () => {
let searchCalls = 0;
setMemorySearchImpl(async () => {
searchCalls += 1;
return [];
});
const reason = "index was built for provider openai, expected ollama";
setMemoryCustomStatus({
indexIdentity: {
status: "mismatched",
reason,
},
});
const tool = createMemorySearchToolOrThrow({
config: {
agents: { list: [{ id: "main", default: true }] },
memory: { citations: "off" },
},
});
const result = await tool.execute("paused-index", { query: "hidden thread codename" });
expectUnavailableMemorySearchDetails(result.details, {
error: reason,
warning:
"Tell the user: memory search is paused because the memory index was built with a different embedding provider/model/settings.",
action:
"Tell the user to run: openclaw memory status --index or openclaw memory index --force.",
});
expect(searchCalls).toBe(1);
expect(getMemorySyncMockCalls()).toBe(0);
});
it("returns structured search debug metadata for qmd results", async () => {
setMemoryBackend("qmd");
setMemorySearchImpl(async (opts) => {

View File

@@ -18,6 +18,7 @@ import {
resolveMemoryDreamingConfig,
resolveMemoryDeepDreamingConfig,
} from "openclaw/plugin-sdk/memory-core-host-status";
import { asRecord } from "./dreaming-shared.js";
import { filterMemorySearchHitsBySessionVisibility } from "./session-search-visibility.js";
import { recordShortTermRecalls } from "./short-term-promotion.js";
import {
@@ -109,6 +110,28 @@ async function runMemorySearchToolWithDeadline<T>(params: {
}
}
const PAUSED_MEMORY_INDEX_WARNING =
"Tell the user: memory search is paused because the memory index was built with a different embedding provider/model/settings.";
const PAUSED_MEMORY_INDEX_ACTION =
"Tell the user to run: openclaw memory status --index or openclaw memory index --force.";
function resolvePausedMemoryIndexIdentityReason(status: { custom?: unknown }): string | undefined {
const indexIdentity = asRecord(asRecord(status.custom)?.indexIdentity);
if (indexIdentity?.status !== "mismatched" && indexIdentity?.status !== "missing") {
return undefined;
}
return typeof indexIdentity.reason === "string" && indexIdentity.reason.trim()
? indexIdentity.reason.trim()
: "memory index identity is missing or mismatched";
}
function buildPausedMemoryIndexUnavailableResult(reason: string) {
return buildMemorySearchUnavailableResult(reason, {
warning: PAUSED_MEMORY_INDEX_WARNING,
action: PAUSED_MEMORY_INDEX_ACTION,
});
}
function sortMemorySearchToolResults<T extends { score: number; path: string }>(results: T[]): T[] {
return results.toSorted((left, right) => {
if (left.score !== right.score) {
@@ -316,7 +339,7 @@ export function createMemorySearchTool(options: {
label: "Memory Search",
name: "memory_search",
description:
"Mandatory recall step: semantically search MEMORY.md + memory/*.md (and optional session transcripts) before answering questions about prior work, decisions, dates, people, preferences, or todos. Optional `corpus=wiki` or `corpus=all` also searches registered compiled-wiki supplements. `corpus=memory` restricts hits to indexed memory files (excludes session transcript chunks from ranking). `corpus=sessions` restricts hits to indexed session transcripts (same visibility rules as session history tools). If response has disabled=true, memory retrieval is unavailable and should be surfaced to the user.",
"Mandatory recall step: semantically search MEMORY.md + memory/*.md (and optional session transcripts) before answering questions about prior work, decisions, dates, people, preferences, or todos. Optional `corpus=wiki` or `corpus=all` also searches registered compiled-wiki supplements. `corpus=memory` restricts hits to indexed memory files (excludes session transcript chunks from ranking). `corpus=sessions` restricts hits to indexed session transcripts (same visibility rules as session history tools). If response has disabled=true, memory retrieval is unavailable; you must tell the user and include the warning/action guidance.",
parameters: MemorySearchSchema,
execute:
({ cfg, agentId }) =>
@@ -400,6 +423,7 @@ export function createMemorySearchTool(options: {
let model: string | undefined;
let fallback: unknown;
let searchMode: string | undefined;
let pausedIndexIdentityReason: string | undefined;
let searchDebug:
| {
backend: string;
@@ -447,9 +471,21 @@ export function createMemorySearchTool(options: {
activeMemory = refreshed;
rawResults = await activeMemory.manager.search(query, searchOptions);
}
const statusBeforeRetry = activeMemory.manager.status();
pausedIndexIdentityReason =
resolvePausedMemoryIndexIdentityReason(statusBeforeRetry);
if (pausedIndexIdentityReason) {
return;
}
if (rawResults.length === 0 && activeMemory.manager.sync) {
await activeMemory.manager.sync({ reason: "search", force: true });
rawResults = await activeMemory.manager.search(query, searchOptions);
pausedIndexIdentityReason = resolvePausedMemoryIndexIdentityReason(
activeMemory.manager.status(),
);
if (pausedIndexIdentityReason) {
return;
}
}
rawResults = await filterMemorySearchHitsBySessionVisibility({
cfg,
@@ -500,6 +536,11 @@ export function createMemorySearchTool(options: {
hits: rawResults.length,
};
});
if (pausedIndexIdentityReason) {
return jsonResult(
buildPausedMemoryIndexUnavailableResult(pausedIndexIdentityReason),
);
}
}
const supplementResults = shouldQuerySupplements
? await runUnavailablePhase(