mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-06 05:51:15 +08:00
fix(memory): validate memory index identity
* docs: add memory index identity plan * fix(memory): validate memory index identity * fix(memory): align status index identity with vector probe * fix(memory): fail closed on stale fts-only search * fix(memory): clear sessions-only identity reindex dirty state * fix(memory): gate targeted session sync by index identity * fix(memory): clear resolved index identity dirtiness * fix(memory): block search on missing index identity * fix(memory): preserve dirty events during identity reindex * fix(memory): resolve provider aliases for index identity * fix(memory): report missing identity states accurately * fix(memory): mark missing session index identity dirty * test(memory): expose provider alias resolver in mocks * chore(memory): remove scratch implementation plan * fix(memory): avoid automatic full reindex on provider cutover * docs(memory): plan no-schema cutover repair * fix(memory): pause vector search on index identity mismatch * fix(memory): freeze dirty identity sync writes * fix(memory): skip paused-index search retry * test(memory): keep retry tests on same provider identity * fix(memory): surface paused index recall * chore(memory): remove scratch plan from pr * fix(memory): preserve paused session dirtiness * fix(memory): make paused recall warning explicit * docs(memory): document explicit index repair
This commit is contained in:
@@ -58,6 +58,15 @@ explicitly to use Gemini, Voyage, Mistral, DeepInfra, Bedrock, GitHub Copilot,
|
||||
Ollama, a local GGUF model, or an OpenAI-compatible `/v1/embeddings` endpoint.
|
||||
Legacy configs that still say `provider: "auto"` resolve to `openai`.
|
||||
|
||||
<Warning>
|
||||
Changing the embedding provider, model, provider settings, sources, scope,
|
||||
chunking, or tokenizer can make the existing SQLite vector index incompatible.
|
||||
OpenClaw pauses vector search and reports an index identity warning instead of
|
||||
automatically re-embedding everything. Rebuild when you are ready with
|
||||
`openclaw memory status --index --agent <id>` or
|
||||
`openclaw memory index --force --agent <id>`.
|
||||
</Warning>
|
||||
|
||||
If OpenAI embeddings are unreachable from your network, memory recall fails open
|
||||
instead of blocking the turn. Set the existing `memorySearch.provider` field to a
|
||||
reachable local, Ollama, regional, or OpenAI-compatible provider to restore
|
||||
@@ -155,7 +164,8 @@ Use `provider: "openai-compatible"` for a generic OpenAI-compatible
|
||||
| `outputDimensionality` | `number` | `3072` | For Embedding 2: 768, 1536, or 3072 |
|
||||
|
||||
<Warning>
|
||||
Changing model or `outputDimensionality` triggers an automatic full reindex.
|
||||
Changing model or `outputDimensionality` changes the index identity. OpenClaw
|
||||
pauses vector search until you explicitly rebuild the memory index.
|
||||
</Warning>
|
||||
|
||||
</Accordion>
|
||||
|
||||
@@ -71,6 +71,28 @@ type MemoryManagerPurpose = Parameters<typeof getMemorySearchManager>[0]["purpos
|
||||
|
||||
type MemorySourceName = "memory" | "sessions";
|
||||
|
||||
function formatMemoryIndexIdentityWarning(
|
||||
status: ReturnType<MemoryManager["status"]>,
|
||||
agentId: string,
|
||||
): {
|
||||
reason: string;
|
||||
fix: string;
|
||||
} | null {
|
||||
const indexIdentity = asRecord(asRecord(status.custom)?.indexIdentity);
|
||||
const reason =
|
||||
(indexIdentity?.status === "mismatched" || indexIdentity?.status === "missing") &&
|
||||
typeof indexIdentity.reason === "string"
|
||||
? indexIdentity.reason
|
||||
: undefined;
|
||||
if (!reason) {
|
||||
return null;
|
||||
}
|
||||
return {
|
||||
reason,
|
||||
fix: `Run: openclaw memory status --index --agent ${agentId}`,
|
||||
};
|
||||
}
|
||||
|
||||
type SourceScan = {
|
||||
source: MemorySourceName;
|
||||
totalFiles: number | null;
|
||||
@@ -868,6 +890,12 @@ export async function runMemoryStatus(opts: MemoryCommandOptions) {
|
||||
lines.push(`${label("Embeddings error")} ${warn(embeddingProbe.error)}`);
|
||||
}
|
||||
}
|
||||
const identityWarning = formatMemoryIndexIdentityWarning(status, agentId);
|
||||
if (identityWarning) {
|
||||
lines.push(`${label("Index identity")} ${warn(identityWarning.reason)}`);
|
||||
lines.push(`${label("Vector search")} ${warn("paused until memory is rebuilt")}`);
|
||||
lines.push(`${label("Fix")} ${muted(identityWarning.fix)}`);
|
||||
}
|
||||
if (status.sourceCounts?.length) {
|
||||
lines.push(label("By source"));
|
||||
for (const entry of status.sourceCounts) {
|
||||
@@ -1256,6 +1284,15 @@ export async function runMemorySearch(
|
||||
defaultRuntime.writeJson({ results });
|
||||
return;
|
||||
}
|
||||
const identityWarning =
|
||||
typeof manager.status === "function"
|
||||
? formatMemoryIndexIdentityWarning(manager.status(), agentId)
|
||||
: null;
|
||||
if (identityWarning) {
|
||||
defaultRuntime.error(
|
||||
`Memory index warning: ${identityWarning.reason}. Vector memory search is paused until the index is rebuilt. ${identityWarning.fix}`,
|
||||
);
|
||||
}
|
||||
if (results.length === 0) {
|
||||
defaultRuntime.log("No matches.");
|
||||
return;
|
||||
|
||||
@@ -415,6 +415,36 @@ describe("memory cli", () => {
|
||||
expect(close).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("prints index identity mismatch reasons", async () => {
|
||||
const close = vi.fn(async () => {});
|
||||
mockManager({
|
||||
status: () =>
|
||||
makeMemoryStatus({
|
||||
dirty: true,
|
||||
provider: "ollama",
|
||||
model: "nomic-embed-text",
|
||||
requestedProvider: "ollama",
|
||||
custom: {
|
||||
indexIdentity: {
|
||||
status: "mismatched",
|
||||
reason: "index was built for provider openai, expected ollama",
|
||||
},
|
||||
},
|
||||
}),
|
||||
close,
|
||||
});
|
||||
|
||||
const log = spyRuntimeLogs(defaultRuntime);
|
||||
await runMemoryCli(["status"]);
|
||||
|
||||
expectLogged(log, "Provider: ollama (requested: ollama)");
|
||||
expectLogged(log, "Dirty: yes");
|
||||
expectLogged(log, "Index identity: index was built for provider openai, expected ollama");
|
||||
expectLogged(log, "Vector search: paused until memory is rebuilt");
|
||||
expectLogged(log, "Fix: Run: openclaw memory status --index --agent main");
|
||||
expect(close).toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("keeps plain status from probing vector or embeddings", async () => {
|
||||
const close = vi.fn(async () => {});
|
||||
const probeVectorAvailability = vi.fn(async () => {
|
||||
|
||||
@@ -86,6 +86,10 @@ export function setMemoryWorkspaceDir(next: string): void {
|
||||
workspaceDir = next;
|
||||
}
|
||||
|
||||
export function setMemoryCustomStatus(next: Record<string, unknown> | undefined): void {
|
||||
customStatus = next;
|
||||
}
|
||||
|
||||
export function setMemorySearchImpl(next: SearchImpl): void {
|
||||
searchImpl = next;
|
||||
}
|
||||
@@ -130,6 +134,10 @@ export function getMemorySearchManagerMockCalls(): number {
|
||||
return getMemorySearchManagerMock.mock.calls.length;
|
||||
}
|
||||
|
||||
export function getMemorySyncMockCalls(): number {
|
||||
return stubManager.sync.mock.calls.length;
|
||||
}
|
||||
|
||||
export function getMemorySearchManagerMockConfigs(): unknown[] {
|
||||
return getMemorySearchManagerMock.mock.calls.map(([params]) => params.cfg);
|
||||
}
|
||||
|
||||
@@ -26,6 +26,7 @@ export function resetEmbeddingMocks(): void {
|
||||
}
|
||||
|
||||
vi.mock("./embeddings.js", () => ({
|
||||
resolveEmbeddingProviderAdapterId: (providerId: string) => providerId,
|
||||
createEmbeddingProvider: async () => ({
|
||||
requestedProvider: "openai",
|
||||
provider: {
|
||||
|
||||
@@ -146,6 +146,17 @@ export function resolveEmbeddingProviderFallbackModel(
|
||||
return adapter?.defaultModel ?? fallbackSourceModel;
|
||||
}
|
||||
|
||||
export function resolveEmbeddingProviderAdapterId(
|
||||
providerId: string,
|
||||
config?: MemoryEmbeddingProviderCreateOptions["config"],
|
||||
): string | undefined {
|
||||
try {
|
||||
return getAdapter(providerId, config).id;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
async function createWithAdapter(
|
||||
adapter: MemoryEmbeddingProviderAdapter,
|
||||
options: CreateEmbeddingProviderOptions,
|
||||
|
||||
@@ -13,6 +13,7 @@ import "./test-runtime-mocks.js";
|
||||
import type { MemoryIndexManager } from "./index.js";
|
||||
import { closeAllMemorySearchManagers, getMemorySearchManager } from "./index.js";
|
||||
import { LOCAL_EMBEDDING_WORKER_ERROR_CODES } from "./manager-local-worker-errors.js";
|
||||
import type { MemoryIndexMeta } from "./manager-reindex-state.js";
|
||||
import { closeMemoryIndexManagersForAgent, EMBEDDING_PROBE_CACHE_TTL_MS } from "./manager.js";
|
||||
import {
|
||||
DEFAULT_LOCAL_MODEL,
|
||||
@@ -58,6 +59,14 @@ vi.mock("./embeddings.js", () => {
|
||||
providerId === "gemini" || providerId === "fallback-provider"
|
||||
? `${providerId}-embed`
|
||||
: fallbackSourceModel,
|
||||
resolveEmbeddingProviderAdapterId: (
|
||||
providerId: string,
|
||||
config?: {
|
||||
models?: {
|
||||
providers?: Record<string, { api?: string; baseUrl?: string; models?: unknown[] }>;
|
||||
};
|
||||
},
|
||||
) => config?.models?.providers?.[providerId]?.api ?? providerId,
|
||||
createEmbeddingProvider: async (options: {
|
||||
provider?: string;
|
||||
model?: string;
|
||||
@@ -77,7 +86,9 @@ vi.mock("./embeddings.js", () => {
|
||||
};
|
||||
}
|
||||
const providerId =
|
||||
options.provider === "gemini" || options.provider === "fallback-provider"
|
||||
options.provider === "gemini" ||
|
||||
options.provider === "fallback-provider" ||
|
||||
options.provider === "ollama"
|
||||
? options.provider
|
||||
: "mock";
|
||||
const model = options.model ?? "mock-embed";
|
||||
@@ -261,8 +272,9 @@ describe("memory index", () => {
|
||||
extraPaths?: string[];
|
||||
sources?: Array<"memory" | "sessions">;
|
||||
sessionMemory?: boolean;
|
||||
provider?: "openai" | "gemini" | "fallback-provider";
|
||||
provider?: string;
|
||||
fallback?: "none" | "gemini" | "fallback-provider";
|
||||
providerAliases?: NonNullable<NonNullable<TestCfg["models"]>["providers"]>;
|
||||
model?: string;
|
||||
outputDimensionality?: number;
|
||||
multimodal?: {
|
||||
@@ -302,6 +314,7 @@ describe("memory index", () => {
|
||||
},
|
||||
list: [{ id: "main", default: true }],
|
||||
},
|
||||
models: params.providerAliases ? { providers: params.providerAliases } : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
@@ -323,9 +336,12 @@ describe("memory index", () => {
|
||||
return manager;
|
||||
}
|
||||
|
||||
async function getFreshManager(cfg: TestCfg): Promise<MemoryIndexManager> {
|
||||
async function getFreshManager(
|
||||
cfg: TestCfg,
|
||||
purpose?: "default" | "status" | "cli",
|
||||
): Promise<MemoryIndexManager> {
|
||||
const { getRequiredMemoryIndexManager } = await import("./test-manager-helpers.js");
|
||||
return await getRequiredMemoryIndexManager({ cfg, agentId: "main" });
|
||||
return await getRequiredMemoryIndexManager({ cfg, agentId: "main", purpose });
|
||||
}
|
||||
|
||||
async function expectHybridKeywordSearchFindsMemory(cfg: TestCfg) {
|
||||
@@ -389,6 +405,406 @@ describe("memory index", () => {
|
||||
}
|
||||
});
|
||||
|
||||
it("does not full-reindex on search when existing metadata belongs to another provider", async () => {
|
||||
const dbPath = path.join(workspaceDir, "index-provider-cutover.sqlite");
|
||||
const oldCfg = createCfg({
|
||||
storePath: dbPath,
|
||||
model: "old-embed",
|
||||
hybrid: { enabled: true, vectorWeight: 0.5, textWeight: 0.5 },
|
||||
});
|
||||
const oldManager = await getFreshManager(oldCfg);
|
||||
await oldManager.sync({ reason: "test", force: true });
|
||||
await oldManager.close?.();
|
||||
|
||||
const nextCfg = createCfg({
|
||||
storePath: dbPath,
|
||||
provider: "gemini",
|
||||
model: "new-embed",
|
||||
hybrid: { enabled: true, vectorWeight: 0.5, textWeight: 0.5 },
|
||||
});
|
||||
const nextManager = await getFreshManager(nextCfg);
|
||||
try {
|
||||
expect(nextManager.status().dirty).toBe(true);
|
||||
expect(nextManager.status().custom?.indexIdentity).toEqual({
|
||||
status: "mismatched",
|
||||
reason: "index was built for model old-embed, expected new-embed",
|
||||
});
|
||||
embedBatchCalls = 0;
|
||||
|
||||
const results = await nextManager.search("alpha");
|
||||
|
||||
expect(results).toStrictEqual([]);
|
||||
expect(embedBatchCalls).toBe(0);
|
||||
expect(nextManager.status().dirty).toBe(true);
|
||||
|
||||
await fs.writeFile(
|
||||
path.join(memoryDir, "2026-01-12.md"),
|
||||
"# Log\nAlpha memory line changed.\nZebra memory line.",
|
||||
);
|
||||
await nextManager.sync({ reason: "watch" });
|
||||
|
||||
expect(embedBatchCalls).toBe(0);
|
||||
const stillPausedResults = await nextManager.search("alpha");
|
||||
expect(stillPausedResults).toStrictEqual([]);
|
||||
expect(nextManager.status().dirty).toBe(true);
|
||||
expect(nextManager.status().custom?.indexIdentity).toEqual({
|
||||
status: "mismatched",
|
||||
reason: "index was built for model old-embed, expected new-embed",
|
||||
});
|
||||
} finally {
|
||||
await nextManager.close?.();
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps status clean when configured provider alias resolves to indexed adapter", async () => {
|
||||
const dbPath = path.join(workspaceDir, "index-provider-alias-status.sqlite");
|
||||
const oldCfg = createCfg({
|
||||
storePath: dbPath,
|
||||
provider: "ollama",
|
||||
model: "ollama-embed",
|
||||
hybrid: { enabled: true, vectorWeight: 0.5, textWeight: 0.5 },
|
||||
});
|
||||
const oldManager = await getFreshManager(oldCfg);
|
||||
await oldManager.sync({ reason: "test", force: true });
|
||||
await oldManager.close?.();
|
||||
|
||||
const aliasCfg = createCfg({
|
||||
storePath: dbPath,
|
||||
provider: "ollama-west",
|
||||
providerAliases: {
|
||||
"ollama-west": {
|
||||
api: "ollama",
|
||||
baseUrl: "http://127.0.0.1:11434",
|
||||
models: [],
|
||||
},
|
||||
},
|
||||
model: "ollama-embed",
|
||||
hybrid: { enabled: true, vectorWeight: 0.5, textWeight: 0.5 },
|
||||
});
|
||||
const statusManager = await getFreshManager(aliasCfg, "status");
|
||||
try {
|
||||
const status = statusManager.status();
|
||||
|
||||
expect(status.dirty).toBe(false);
|
||||
expect(status.custom?.indexIdentity).toEqual({ status: "valid" });
|
||||
} finally {
|
||||
await statusManager.close?.();
|
||||
}
|
||||
});
|
||||
|
||||
it("does not search stale rows when index metadata is missing", async () => {
|
||||
const dbPath = path.join(workspaceDir, "index-missing-meta-cutover.sqlite");
|
||||
const cfg = createCfg({
|
||||
storePath: dbPath,
|
||||
hybrid: { enabled: true, vectorWeight: 0.5, textWeight: 0.5 },
|
||||
});
|
||||
const oldManager = await getFreshManager(cfg);
|
||||
await oldManager.sync({ reason: "test", force: true });
|
||||
await oldManager.close?.();
|
||||
await fs.rm(path.join(memoryDir, "2026-01-12.md"));
|
||||
|
||||
const nextManager = await getFreshManager(cfg);
|
||||
try {
|
||||
(
|
||||
nextManager as unknown as {
|
||||
db: { exec: (sql: string) => void };
|
||||
}
|
||||
).db.exec(`DELETE FROM meta WHERE key = 'memory_index_meta_v1'`);
|
||||
expect(nextManager.status().custom?.indexIdentity).toEqual({
|
||||
status: "missing",
|
||||
reason: "index metadata is missing",
|
||||
});
|
||||
|
||||
const results = await nextManager.search("alpha");
|
||||
|
||||
expect(results).toStrictEqual([]);
|
||||
expect(nextManager.status().dirty).toBe(true);
|
||||
expect(nextManager.status().custom?.indexIdentity).toEqual({
|
||||
status: "missing",
|
||||
reason: "index metadata is missing",
|
||||
});
|
||||
} finally {
|
||||
await nextManager.close?.();
|
||||
}
|
||||
});
|
||||
|
||||
it("does not search stale provider rows after embeddings become unavailable", async () => {
|
||||
const dbPath = path.join(workspaceDir, "index-provider-unavailable-cutover.sqlite");
|
||||
const oldCfg = createCfg({
|
||||
storePath: dbPath,
|
||||
model: "semantic-embed",
|
||||
hybrid: { enabled: true, vectorWeight: 0.5, textWeight: 0.5 },
|
||||
});
|
||||
const oldManager = await getFreshManager(oldCfg);
|
||||
await oldManager.sync({ reason: "test", force: true });
|
||||
await oldManager.close?.();
|
||||
|
||||
forceNoProvider = true;
|
||||
const nextManager = await getFreshManager(oldCfg);
|
||||
try {
|
||||
const results = await nextManager.search("alpha");
|
||||
|
||||
expect(results).toStrictEqual([]);
|
||||
expect(nextManager.status().dirty).toBe(true);
|
||||
expect(nextManager.status().custom?.indexIdentity).toMatchObject({
|
||||
status: "mismatched",
|
||||
});
|
||||
} finally {
|
||||
await nextManager.close?.();
|
||||
}
|
||||
});
|
||||
|
||||
it("clears dirty after sessions-only identity reindex", async () => {
|
||||
try {
|
||||
vi.stubEnv("OPENCLAW_STATE_DIR", path.join(workspaceDir, ".state-sessions-only-reindex"));
|
||||
const sessionsDir = resolveSessionTranscriptsDirForAgent("main");
|
||||
await fs.mkdir(sessionsDir, { recursive: true });
|
||||
await fs.writeFile(
|
||||
path.join(sessionsDir, "session-identity.jsonl"),
|
||||
[
|
||||
JSON.stringify({
|
||||
type: "session",
|
||||
id: "session-identity",
|
||||
timestamp: "2026-04-07T15:24:04.113Z",
|
||||
}),
|
||||
JSON.stringify({
|
||||
type: "message",
|
||||
message: {
|
||||
role: "assistant",
|
||||
timestamp: "2026-04-07T15:25:04.113Z",
|
||||
content: [{ type: "text", text: "Session-only identity marker." }],
|
||||
},
|
||||
}),
|
||||
].join("\n") + "\n",
|
||||
"utf8",
|
||||
);
|
||||
|
||||
const dbPath = path.join(workspaceDir, "index-sessions-only-cutover.sqlite");
|
||||
const oldCfg = createCfg({
|
||||
storePath: dbPath,
|
||||
sources: ["sessions"],
|
||||
sessionMemory: true,
|
||||
model: "old-embed",
|
||||
});
|
||||
const oldManager = await getFreshManager(oldCfg);
|
||||
await oldManager.sync({ reason: "test", force: true });
|
||||
await oldManager.close?.();
|
||||
|
||||
const nextCfg = createCfg({
|
||||
storePath: dbPath,
|
||||
sources: ["sessions"],
|
||||
sessionMemory: true,
|
||||
provider: "gemini",
|
||||
model: "new-embed",
|
||||
});
|
||||
const nextManager = await getFreshManager(nextCfg);
|
||||
try {
|
||||
expect(nextManager.status().dirty).toBe(true);
|
||||
|
||||
await nextManager.sync({ reason: "test", force: true });
|
||||
|
||||
expect(nextManager.status().dirty).toBe(false);
|
||||
expect(nextManager.status().custom?.indexIdentity).toEqual({ status: "valid" });
|
||||
} finally {
|
||||
await nextManager.close?.();
|
||||
}
|
||||
} finally {
|
||||
vi.unstubAllEnvs();
|
||||
}
|
||||
});
|
||||
|
||||
it("marks sessions-only indexes dirty when metadata is missing but chunks exist", async () => {
|
||||
try {
|
||||
vi.stubEnv("OPENCLAW_STATE_DIR", path.join(workspaceDir, ".state-sessions-missing-meta"));
|
||||
const sessionsDir = resolveSessionTranscriptsDirForAgent("main");
|
||||
await fs.mkdir(sessionsDir, { recursive: true });
|
||||
await fs.writeFile(
|
||||
path.join(sessionsDir, "session-missing-meta.jsonl"),
|
||||
[
|
||||
JSON.stringify({
|
||||
type: "session",
|
||||
id: "session-missing-meta",
|
||||
timestamp: "2026-04-07T15:24:04.113Z",
|
||||
}),
|
||||
JSON.stringify({
|
||||
type: "message",
|
||||
message: {
|
||||
role: "assistant",
|
||||
timestamp: "2026-04-07T15:25:04.113Z",
|
||||
content: [{ type: "text", text: "Sessions missing metadata marker." }],
|
||||
},
|
||||
}),
|
||||
].join("\n") + "\n",
|
||||
"utf8",
|
||||
);
|
||||
|
||||
const dbPath = path.join(workspaceDir, "index-sessions-missing-meta.sqlite");
|
||||
const cfg = createCfg({
|
||||
storePath: dbPath,
|
||||
sources: ["sessions"],
|
||||
sessionMemory: true,
|
||||
});
|
||||
const oldManager = await getFreshManager(cfg);
|
||||
await oldManager.sync({ reason: "test", force: true });
|
||||
await oldManager.close?.();
|
||||
|
||||
const nextManager = await getFreshManager(cfg);
|
||||
try {
|
||||
(
|
||||
nextManager as unknown as {
|
||||
db: { exec: (sql: string) => void };
|
||||
}
|
||||
).db.exec(`DELETE FROM meta WHERE key = 'memory_index_meta_v1'`);
|
||||
|
||||
const status = nextManager.status();
|
||||
|
||||
expect(status.dirty).toBe(true);
|
||||
expect(status.custom?.indexIdentity).toEqual({
|
||||
status: "missing",
|
||||
reason: "index metadata is missing",
|
||||
});
|
||||
} finally {
|
||||
await nextManager.close?.();
|
||||
}
|
||||
} finally {
|
||||
vi.unstubAllEnvs();
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps provider cutover vector search paused during targeted session sync", async () => {
|
||||
try {
|
||||
vi.stubEnv("OPENCLAW_STATE_DIR", path.join(workspaceDir, ".state-targeted-cutover"));
|
||||
const sessionsDir = resolveSessionTranscriptsDirForAgent("main");
|
||||
await fs.mkdir(sessionsDir, { recursive: true });
|
||||
const sessionFile = path.join(sessionsDir, "session-targeted-cutover.jsonl");
|
||||
await fs.writeFile(
|
||||
sessionFile,
|
||||
[
|
||||
JSON.stringify({
|
||||
type: "session",
|
||||
id: "session-targeted-cutover",
|
||||
timestamp: "2026-04-07T15:24:04.113Z",
|
||||
}),
|
||||
JSON.stringify({
|
||||
type: "message",
|
||||
message: {
|
||||
role: "assistant",
|
||||
timestamp: "2026-04-07T15:25:04.113Z",
|
||||
content: [{ type: "text", text: "Targeted cutover marker." }],
|
||||
},
|
||||
}),
|
||||
].join("\n") + "\n",
|
||||
"utf8",
|
||||
);
|
||||
|
||||
const dbPath = path.join(workspaceDir, "index-targeted-session-cutover.sqlite");
|
||||
const oldCfg = createCfg({
|
||||
storePath: dbPath,
|
||||
sources: ["memory", "sessions"],
|
||||
sessionMemory: true,
|
||||
model: "old-embed",
|
||||
});
|
||||
const oldManager = await getFreshManager(oldCfg);
|
||||
await oldManager.sync({ reason: "test", force: true });
|
||||
await oldManager.close?.();
|
||||
|
||||
const nextCfg = createCfg({
|
||||
storePath: dbPath,
|
||||
sources: ["memory", "sessions"],
|
||||
sessionMemory: true,
|
||||
provider: "gemini",
|
||||
model: "new-embed",
|
||||
});
|
||||
const nextManager = await getFreshManager(nextCfg);
|
||||
try {
|
||||
expect(nextManager.status().dirty).toBe(true);
|
||||
embedBatchCalls = 0;
|
||||
|
||||
await nextManager.sync({ reason: "test", sessionFiles: [sessionFile] });
|
||||
|
||||
expect(embedBatchCalls).toBe(0);
|
||||
expect(nextManager.status().dirty).toBe(true);
|
||||
expect(nextManager.status().custom?.indexIdentity).toEqual({
|
||||
status: "mismatched",
|
||||
reason: "index was built for model old-embed, expected new-embed",
|
||||
});
|
||||
const results = await nextManager.search("alpha");
|
||||
expect(results).toStrictEqual([]);
|
||||
} finally {
|
||||
await nextManager.close?.();
|
||||
}
|
||||
} finally {
|
||||
vi.unstubAllEnvs();
|
||||
}
|
||||
});
|
||||
|
||||
it("preserves memory dirty events raised during session identity reindex", async () => {
|
||||
try {
|
||||
vi.stubEnv("OPENCLAW_STATE_DIR", path.join(workspaceDir, ".state-dirty-during-session"));
|
||||
const sessionsDir = resolveSessionTranscriptsDirForAgent("main");
|
||||
await fs.mkdir(sessionsDir, { recursive: true });
|
||||
await fs.writeFile(
|
||||
path.join(sessionsDir, "session-dirty-during-reindex.jsonl"),
|
||||
[
|
||||
JSON.stringify({
|
||||
type: "session",
|
||||
id: "session-dirty-during-reindex",
|
||||
timestamp: "2026-04-07T15:24:04.113Z",
|
||||
}),
|
||||
JSON.stringify({
|
||||
type: "message",
|
||||
message: {
|
||||
role: "assistant",
|
||||
timestamp: "2026-04-07T15:25:04.113Z",
|
||||
content: [{ type: "text", text: "Dirty during session marker." }],
|
||||
},
|
||||
}),
|
||||
].join("\n") + "\n",
|
||||
"utf8",
|
||||
);
|
||||
|
||||
const dbPath = path.join(workspaceDir, "index-dirty-during-session.sqlite");
|
||||
const oldCfg = createCfg({
|
||||
storePath: dbPath,
|
||||
sources: ["memory", "sessions"],
|
||||
sessionMemory: true,
|
||||
model: "old-embed",
|
||||
});
|
||||
const oldManager = await getFreshManager(oldCfg);
|
||||
await oldManager.sync({ reason: "test", force: true });
|
||||
await oldManager.close?.();
|
||||
|
||||
const nextCfg = createCfg({
|
||||
storePath: dbPath,
|
||||
sources: ["memory", "sessions"],
|
||||
sessionMemory: true,
|
||||
provider: "gemini",
|
||||
model: "new-embed",
|
||||
});
|
||||
const nextManager = await getFreshManager(nextCfg);
|
||||
try {
|
||||
const fields = nextManager as unknown as {
|
||||
dirty: boolean;
|
||||
syncSessionFiles: (params: unknown) => Promise<void>;
|
||||
};
|
||||
const syncSessionFiles = fields.syncSessionFiles.bind(nextManager);
|
||||
fields.syncSessionFiles = async (params) => {
|
||||
fields.dirty = true;
|
||||
await syncSessionFiles(params);
|
||||
};
|
||||
|
||||
await nextManager.sync({ reason: "test", force: true });
|
||||
|
||||
expect(nextManager.status().dirty).toBe(true);
|
||||
expect(nextManager.status().custom?.indexIdentity).toEqual({ status: "valid" });
|
||||
} finally {
|
||||
await nextManager.close?.();
|
||||
}
|
||||
} finally {
|
||||
vi.unstubAllEnvs();
|
||||
}
|
||||
});
|
||||
|
||||
it("closes embedding providers when memory index managers close", async () => {
|
||||
const cfg = createCfg({
|
||||
storePath: indexMainPath,
|
||||
@@ -593,7 +1009,7 @@ describe("memory index", () => {
|
||||
waitForEmbeddingRetry: (delayMs: number, action: string) => Promise<void>;
|
||||
}
|
||||
).provider = {
|
||||
id: "openai",
|
||||
id: "mock",
|
||||
model: "mock-embed",
|
||||
embedQuery: async () => {
|
||||
queryCalls += 1;
|
||||
@@ -637,7 +1053,7 @@ describe("memory index", () => {
|
||||
};
|
||||
}
|
||||
).provider = {
|
||||
id: "openai",
|
||||
id: "mock",
|
||||
model: "mock-embed",
|
||||
embedQuery: async () => {
|
||||
queryCalls += 1;
|
||||
@@ -696,6 +1112,76 @@ describe("memory index", () => {
|
||||
expect(status.vector?.available).toBeUndefined();
|
||||
});
|
||||
|
||||
it("marks older vector indexes dirty after vector store probing", async () => {
|
||||
const dbPath = path.join(workspaceDir, "index-vector-missing-dims.sqlite");
|
||||
const legacyCfg = createCfg({
|
||||
storePath: dbPath,
|
||||
provider: "gemini",
|
||||
vectorEnabled: false,
|
||||
});
|
||||
const legacyManager = await getFreshManager(legacyCfg);
|
||||
await legacyManager.sync({ reason: "test", force: true });
|
||||
await legacyManager.close?.();
|
||||
|
||||
const cfg = createCfg({
|
||||
storePath: dbPath,
|
||||
provider: "gemini",
|
||||
vectorEnabled: true,
|
||||
});
|
||||
const manager = await getFreshManager(cfg);
|
||||
try {
|
||||
const metaAccess = manager as unknown as {
|
||||
readMeta(): MemoryIndexMeta | null;
|
||||
};
|
||||
const meta = metaAccess.readMeta();
|
||||
if (!meta) {
|
||||
throw new Error("expected index metadata");
|
||||
}
|
||||
expect(meta.vectorDims).toBeUndefined();
|
||||
|
||||
await manager.probeVectorStoreAvailability?.();
|
||||
const status = manager.status();
|
||||
|
||||
expect(status.dirty).toBe(true);
|
||||
expect(status.custom?.indexIdentity).toEqual({
|
||||
status: "mismatched",
|
||||
reason: "index vector dimensions are missing",
|
||||
});
|
||||
} finally {
|
||||
await manager.close?.();
|
||||
}
|
||||
});
|
||||
|
||||
it("keeps empty vector indexes clean after vector store probing", async () => {
|
||||
await fs.rm(path.join(memoryDir, "2026-01-12.md"));
|
||||
const dbPath = path.join(workspaceDir, "index-empty-vector.sqlite");
|
||||
const legacyCfg = createCfg({
|
||||
storePath: dbPath,
|
||||
provider: "gemini",
|
||||
vectorEnabled: false,
|
||||
});
|
||||
const legacyManager = await getFreshManager(legacyCfg);
|
||||
await legacyManager.sync({ reason: "test", force: true });
|
||||
await legacyManager.close?.();
|
||||
|
||||
const cfg = createCfg({
|
||||
storePath: dbPath,
|
||||
provider: "gemini",
|
||||
vectorEnabled: true,
|
||||
});
|
||||
const manager = await getFreshManager(cfg, "status");
|
||||
try {
|
||||
await manager.probeVectorStoreAvailability?.();
|
||||
|
||||
const status = manager.status();
|
||||
|
||||
expect(status.dirty).toBe(false);
|
||||
expect(status.custom?.indexIdentity).toEqual({ status: "valid" });
|
||||
} finally {
|
||||
await manager.close?.();
|
||||
}
|
||||
});
|
||||
|
||||
it("caches embedding probe readiness across transient status managers", async () => {
|
||||
const cfg = createCfg({ storePath: path.join(workspaceDir, "index-probe-cache.sqlite") });
|
||||
const first = requireManager(
|
||||
@@ -778,7 +1264,7 @@ describe("memory index", () => {
|
||||
});
|
||||
});
|
||||
|
||||
it("activates configured fallback when local embeddings degrade during search", async () => {
|
||||
it("does not activate fallback during search when index identity is already mismatched", async () => {
|
||||
const cfg = createCfg({
|
||||
storePath: path.join(workspaceDir, "index-search-degraded-fallback.sqlite"),
|
||||
fallback: "fallback-provider",
|
||||
@@ -810,21 +1296,68 @@ describe("memory index", () => {
|
||||
|
||||
const results = await manager.search("alpha");
|
||||
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
const resultKeys = results.map(
|
||||
(result) => `${result.source}:${result.path}:${result.startLine}:${result.endLine}`,
|
||||
);
|
||||
expect(new Set(resultKeys).size).toBe(resultKeys.length);
|
||||
expect(providerCalls.slice(callsBeforeSearch).map((call) => call.provider)).toContain(
|
||||
"fallback-provider",
|
||||
);
|
||||
expect(results).toStrictEqual([]);
|
||||
expect(providerCalls.slice(callsBeforeSearch)).toStrictEqual([]);
|
||||
expect(
|
||||
(
|
||||
manager as unknown as {
|
||||
provider: { id: string } | null;
|
||||
}
|
||||
).provider?.id,
|
||||
).toBe("fallback-provider");
|
||||
).toBe("local");
|
||||
});
|
||||
|
||||
it("rebuilds with fallback provider during explicit identity repair", async () => {
|
||||
const dbPath = path.join(workspaceDir, "index-cli-fallback-identity-repair.sqlite");
|
||||
const oldCfg = createCfg({
|
||||
storePath: dbPath,
|
||||
model: "old-embed",
|
||||
});
|
||||
const oldManager = await getFreshManager(oldCfg);
|
||||
await oldManager.sync({ reason: "test", force: true });
|
||||
await oldManager.close?.();
|
||||
|
||||
const cfg = createCfg({
|
||||
storePath: dbPath,
|
||||
model: "new-embed",
|
||||
fallback: "fallback-provider",
|
||||
});
|
||||
const manager = await getFreshManager(cfg);
|
||||
try {
|
||||
expect(manager.status().dirty).toBe(true);
|
||||
const fields = manager as unknown as {
|
||||
providerInitialized: boolean;
|
||||
provider: {
|
||||
id: string;
|
||||
model: string;
|
||||
embedQuery: (text: string) => Promise<number[]>;
|
||||
embedBatch: (texts: string[]) => Promise<number[][]>;
|
||||
close: () => Promise<void>;
|
||||
};
|
||||
};
|
||||
fields.providerInitialized = true;
|
||||
fields.provider = {
|
||||
id: "mock",
|
||||
model: "new-embed",
|
||||
embedQuery: async () => {
|
||||
throw createLocalWorkerExitError();
|
||||
},
|
||||
embedBatch: async () => {
|
||||
throw createLocalWorkerExitError();
|
||||
},
|
||||
close: async () => {},
|
||||
};
|
||||
|
||||
await manager.sync({ reason: "cli" });
|
||||
|
||||
expect(manager.status().dirty).toBe(false);
|
||||
expect(manager.status().provider).toBe("fallback-provider");
|
||||
expect(manager.status().model).toBe("fallback-provider-embed");
|
||||
expect(manager.status().custom?.indexIdentity).toEqual({ status: "valid" });
|
||||
await expect(manager.search("alpha")).resolves.not.toStrictEqual([]);
|
||||
} finally {
|
||||
await manager.close?.();
|
||||
}
|
||||
});
|
||||
|
||||
it("activates configured fallback after probe-time local degradation", async () => {
|
||||
@@ -866,7 +1399,7 @@ describe("memory index", () => {
|
||||
|
||||
const results = await manager.search("alpha");
|
||||
|
||||
expect(results.length).toBeGreaterThan(0);
|
||||
expect(results).toStrictEqual([]);
|
||||
expect(providerCalls.slice(callsBeforeSearch).map((call) => call.provider)).toContain(
|
||||
"fallback-provider",
|
||||
);
|
||||
@@ -879,6 +1412,73 @@ describe("memory index", () => {
|
||||
).toBe("fallback-provider");
|
||||
});
|
||||
|
||||
it("clears identity dirty after status resolves the indexed fallback provider", async () => {
|
||||
const dbPath = path.join(workspaceDir, "index-status-fallback-identity.sqlite");
|
||||
const indexedCfg = createCfg({
|
||||
storePath: dbPath,
|
||||
provider: "fallback-provider",
|
||||
model: "new-embed",
|
||||
});
|
||||
const indexedManager = await getFreshManager(indexedCfg);
|
||||
await indexedManager.sync({ reason: "test", force: true });
|
||||
await indexedManager.close?.();
|
||||
|
||||
const cfg = createCfg({
|
||||
storePath: dbPath,
|
||||
fallback: "fallback-provider",
|
||||
model: "new-embed",
|
||||
});
|
||||
const { getRequiredMemoryIndexManager } = await import("./test-manager-helpers.js");
|
||||
const manager = await getRequiredMemoryIndexManager({
|
||||
cfg,
|
||||
agentId: "main",
|
||||
purpose: "status",
|
||||
});
|
||||
try {
|
||||
expect(manager.status().dirty).toBe(true);
|
||||
|
||||
const fields = manager as unknown as {
|
||||
provider: {
|
||||
id: string;
|
||||
model: string;
|
||||
embedQuery: (text: string) => Promise<number[]>;
|
||||
embedBatch: (texts: string[]) => Promise<number[][]>;
|
||||
close: () => Promise<void>;
|
||||
};
|
||||
providerInitialized: boolean;
|
||||
providerRuntime: {
|
||||
id: string;
|
||||
cacheKeyData: Record<string, unknown>;
|
||||
};
|
||||
providerKey: string;
|
||||
computeProviderKey: () => string;
|
||||
};
|
||||
fields.provider = {
|
||||
id: "fallback-provider",
|
||||
model: "new-embed",
|
||||
embedQuery: async () => [1, 0, 0, 0],
|
||||
embedBatch: async (texts) => texts.map(() => [1, 0, 0, 0]),
|
||||
close: async () => {},
|
||||
};
|
||||
fields.providerRuntime = {
|
||||
id: "fallback-provider",
|
||||
cacheKeyData: {
|
||||
provider: "fallback-provider",
|
||||
baseUrl: "https://generativelanguage.googleapis.com/v1beta",
|
||||
model: "new-embed",
|
||||
headers: [],
|
||||
},
|
||||
};
|
||||
fields.providerInitialized = true;
|
||||
fields.providerKey = fields.computeProviderKey();
|
||||
|
||||
expect(manager.status().dirty).toBe(false);
|
||||
expect(manager.status().custom?.indexIdentity).toEqual({ status: "valid" });
|
||||
} finally {
|
||||
await manager.close?.();
|
||||
}
|
||||
});
|
||||
|
||||
it("streams embedding cache rows during safe reindex", async () => {
|
||||
vi.stubEnv("OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX", "0");
|
||||
type EmbeddingCacheRow = {
|
||||
|
||||
@@ -3,7 +3,8 @@ import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
resolveConfiguredScopeHash,
|
||||
resolveConfiguredSourcesForMeta,
|
||||
shouldRunFullMemoryReindex,
|
||||
resolveMemoryIndexIdentityState,
|
||||
isMemoryIndexIdentityDirty,
|
||||
type MemoryIndexMeta,
|
||||
} from "./manager-reindex-state.js";
|
||||
|
||||
@@ -21,16 +22,18 @@ function createMeta(overrides: Partial<MemoryIndexMeta> = {}): MemoryIndexMeta {
|
||||
};
|
||||
}
|
||||
|
||||
function createFullReindexParams(
|
||||
function createIdentityParams(
|
||||
overrides: {
|
||||
meta?: MemoryIndexMeta | null;
|
||||
provider?: { id: string; model: string } | null;
|
||||
providerKey?: string;
|
||||
providerKeyKnown?: boolean;
|
||||
configuredSources?: MemorySource[];
|
||||
configuredScopeHash?: string;
|
||||
chunkTokens?: number;
|
||||
chunkOverlap?: number;
|
||||
vectorReady?: boolean;
|
||||
hasIndexedChunks?: boolean;
|
||||
ftsTokenizer?: string;
|
||||
} = {},
|
||||
) {
|
||||
@@ -43,26 +46,41 @@ function createFullReindexParams(
|
||||
chunkTokens: 4000,
|
||||
chunkOverlap: 0,
|
||||
vectorReady: false,
|
||||
hasIndexedChunks: true,
|
||||
ftsTokenizer: "unicode61",
|
||||
...overrides,
|
||||
};
|
||||
}
|
||||
|
||||
describe("memory reindex state", () => {
|
||||
it("requires a full reindex when the embedding model changes", () => {
|
||||
it("marks identity dirty when the embedding model changes", () => {
|
||||
expect(
|
||||
shouldRunFullMemoryReindex(
|
||||
createFullReindexParams({
|
||||
isMemoryIndexIdentityDirty(
|
||||
createIdentityParams({
|
||||
provider: { id: "openai", model: "mock-embed-v2" },
|
||||
}),
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("requires a full reindex when the provider cache key changes", () => {
|
||||
it("returns a mismatch reason when provider identity changes", () => {
|
||||
expect(
|
||||
shouldRunFullMemoryReindex(
|
||||
createFullReindexParams({
|
||||
resolveMemoryIndexIdentityState(
|
||||
createIdentityParams({
|
||||
provider: { id: "ollama", model: "mock-embed-v1" },
|
||||
providerKey: "provider-key-ollama",
|
||||
}),
|
||||
),
|
||||
).toEqual({
|
||||
status: "mismatched",
|
||||
reason: "index was built for provider openai, expected ollama",
|
||||
});
|
||||
});
|
||||
|
||||
it("marks identity dirty when the provider cache key changes", () => {
|
||||
expect(
|
||||
isMemoryIndexIdentityDirty(
|
||||
createIdentityParams({
|
||||
provider: { id: "gemini", model: "gemini-embedding-2-preview" },
|
||||
providerKey: "provider-key-dims-768",
|
||||
meta: createMeta({
|
||||
@@ -75,7 +93,30 @@ describe("memory reindex state", () => {
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("requires a full reindex when extraPaths change", () => {
|
||||
it("can defer provider key comparison until provider initialization", () => {
|
||||
expect(
|
||||
resolveMemoryIndexIdentityState(
|
||||
createIdentityParams({
|
||||
providerKey: undefined,
|
||||
providerKeyKnown: false,
|
||||
}),
|
||||
),
|
||||
).toEqual({ status: "valid" });
|
||||
});
|
||||
|
||||
it("does not mark identity dirty for vector dimensions before chunks exist", () => {
|
||||
expect(
|
||||
resolveMemoryIndexIdentityState(
|
||||
createIdentityParams({
|
||||
vectorReady: true,
|
||||
hasIndexedChunks: false,
|
||||
meta: createMeta({ vectorDims: undefined }),
|
||||
}),
|
||||
),
|
||||
).toEqual({ status: "valid" });
|
||||
});
|
||||
|
||||
it("marks identity dirty when extraPaths change", () => {
|
||||
const workspaceDir = "/tmp/workspace";
|
||||
const firstScopeHash = resolveConfiguredScopeHash({
|
||||
workspaceDir,
|
||||
@@ -97,8 +138,8 @@ describe("memory reindex state", () => {
|
||||
});
|
||||
|
||||
expect(
|
||||
shouldRunFullMemoryReindex(
|
||||
createFullReindexParams({
|
||||
isMemoryIndexIdentityDirty(
|
||||
createIdentityParams({
|
||||
meta: createMeta({ scopeHash: firstScopeHash }),
|
||||
configuredScopeHash: secondScopeHash,
|
||||
}),
|
||||
@@ -106,17 +147,17 @@ describe("memory reindex state", () => {
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("requires a full reindex when configured sources add sessions", () => {
|
||||
it("marks identity dirty when configured sources add sessions", () => {
|
||||
expect(
|
||||
shouldRunFullMemoryReindex(
|
||||
createFullReindexParams({
|
||||
isMemoryIndexIdentityDirty(
|
||||
createIdentityParams({
|
||||
configuredSources: ["memory", "sessions"],
|
||||
}),
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("requires a full reindex when multimodal settings change", () => {
|
||||
it("marks identity dirty when multimodal settings change", () => {
|
||||
const workspaceDir = "/tmp/workspace";
|
||||
const firstScopeHash = resolveConfiguredScopeHash({
|
||||
workspaceDir,
|
||||
@@ -138,8 +179,8 @@ describe("memory reindex state", () => {
|
||||
});
|
||||
|
||||
expect(
|
||||
shouldRunFullMemoryReindex(
|
||||
createFullReindexParams({
|
||||
isMemoryIndexIdentityDirty(
|
||||
createIdentityParams({
|
||||
meta: createMeta({ scopeHash: firstScopeHash }),
|
||||
configuredScopeHash: secondScopeHash,
|
||||
}),
|
||||
@@ -149,8 +190,8 @@ describe("memory reindex state", () => {
|
||||
|
||||
it("keeps older indexes with missing sources compatible with memory-only config", () => {
|
||||
expect(
|
||||
shouldRunFullMemoryReindex(
|
||||
createFullReindexParams({
|
||||
isMemoryIndexIdentityDirty(
|
||||
createIdentityParams({
|
||||
meta: createMeta({ sources: undefined }),
|
||||
configuredSources: resolveConfiguredSourcesForMeta(new Set(["memory"])),
|
||||
}),
|
||||
|
||||
@@ -16,6 +16,19 @@ export type MemoryIndexMeta = {
|
||||
ftsTokenizer?: string;
|
||||
};
|
||||
|
||||
export type MemoryIndexIdentityState =
|
||||
| {
|
||||
status: "valid";
|
||||
}
|
||||
| {
|
||||
status: "missing";
|
||||
reason: string;
|
||||
}
|
||||
| {
|
||||
status: "mismatched";
|
||||
reason: string;
|
||||
};
|
||||
|
||||
export function resolveConfiguredSourcesForMeta(sources: Iterable<MemorySource>): MemorySource[] {
|
||||
const normalized = Array.from(sources)
|
||||
.filter((source): source is MemorySource => source === "memory" || source === "sessions")
|
||||
@@ -73,31 +86,93 @@ export function resolveConfiguredScopeHash(params: {
|
||||
);
|
||||
}
|
||||
|
||||
export function shouldRunFullMemoryReindex(params: {
|
||||
export function isMemoryIndexIdentityDirty(params: {
|
||||
meta: MemoryIndexMeta | null;
|
||||
provider: { id: string; model: string } | null;
|
||||
providerKey?: string;
|
||||
providerKeyKnown?: boolean;
|
||||
configuredSources: MemorySource[];
|
||||
configuredScopeHash: string;
|
||||
chunkTokens: number;
|
||||
chunkOverlap: number;
|
||||
vectorReady: boolean;
|
||||
hasIndexedChunks?: boolean;
|
||||
ftsTokenizer: string;
|
||||
}): boolean {
|
||||
return resolveMemoryIndexIdentityState(params).status !== "valid";
|
||||
}
|
||||
|
||||
export function resolveMemoryIndexIdentityState(params: {
|
||||
meta: MemoryIndexMeta | null;
|
||||
provider: { id: string; model: string } | null;
|
||||
providerKey?: string;
|
||||
providerKeyKnown?: boolean;
|
||||
configuredSources: MemorySource[];
|
||||
configuredScopeHash: string;
|
||||
chunkTokens: number;
|
||||
chunkOverlap: number;
|
||||
vectorReady: boolean;
|
||||
hasIndexedChunks?: boolean;
|
||||
ftsTokenizer: string;
|
||||
}): MemoryIndexIdentityState {
|
||||
const { meta } = params;
|
||||
return (
|
||||
!meta ||
|
||||
(params.provider ? meta.model !== params.provider.model : meta.model !== "fts-only") ||
|
||||
(params.provider ? meta.provider !== params.provider.id : meta.provider !== "none") ||
|
||||
meta.providerKey !== params.providerKey ||
|
||||
if (!meta) {
|
||||
return { status: "missing", reason: "index metadata is missing" };
|
||||
}
|
||||
const expectedModel = params.provider ? params.provider.model : "fts-only";
|
||||
if (meta.model !== expectedModel) {
|
||||
return {
|
||||
status: "mismatched",
|
||||
reason: `index was built for model ${meta.model}, expected ${expectedModel}`,
|
||||
};
|
||||
}
|
||||
const expectedProvider = params.provider ? params.provider.id : "none";
|
||||
if (meta.provider !== expectedProvider) {
|
||||
return {
|
||||
status: "mismatched",
|
||||
reason: `index was built for provider ${meta.provider}, expected ${expectedProvider}`,
|
||||
};
|
||||
}
|
||||
if (params.providerKeyKnown !== false && meta.providerKey !== params.providerKey) {
|
||||
return {
|
||||
status: "mismatched",
|
||||
reason: "index provider settings changed",
|
||||
};
|
||||
}
|
||||
if (
|
||||
configuredMetaSourcesDiffer({
|
||||
meta,
|
||||
configuredSources: params.configuredSources,
|
||||
}) ||
|
||||
meta.scopeHash !== params.configuredScopeHash ||
|
||||
meta.chunkTokens !== params.chunkTokens ||
|
||||
meta.chunkOverlap !== params.chunkOverlap ||
|
||||
(params.vectorReady && !meta.vectorDims) ||
|
||||
(meta.ftsTokenizer ?? "unicode61") !== params.ftsTokenizer
|
||||
);
|
||||
})
|
||||
) {
|
||||
return {
|
||||
status: "mismatched",
|
||||
reason: "index sources changed",
|
||||
};
|
||||
}
|
||||
if (meta.scopeHash !== params.configuredScopeHash) {
|
||||
return {
|
||||
status: "mismatched",
|
||||
reason: "index scope changed",
|
||||
};
|
||||
}
|
||||
if (meta.chunkTokens !== params.chunkTokens || meta.chunkOverlap !== params.chunkOverlap) {
|
||||
return {
|
||||
status: "mismatched",
|
||||
reason: "index chunking changed",
|
||||
};
|
||||
}
|
||||
if (params.vectorReady && params.hasIndexedChunks !== false && !meta.vectorDims) {
|
||||
return {
|
||||
status: "mismatched",
|
||||
reason: "index vector dimensions are missing",
|
||||
};
|
||||
}
|
||||
if ((meta.ftsTokenizer ?? "unicode61") !== params.ftsTokenizer) {
|
||||
return {
|
||||
status: "mismatched",
|
||||
reason: "index FTS tokenizer changed",
|
||||
};
|
||||
}
|
||||
return { status: "valid" };
|
||||
}
|
||||
|
||||
@@ -573,7 +573,11 @@ describe("searchVector sqlite-vec KNN", () => {
|
||||
|
||||
function insertFallbackChunk(
|
||||
db: InstanceType<typeof DatabaseSync>,
|
||||
params: { id: string; model: string; vector: number[] },
|
||||
params: {
|
||||
id: string;
|
||||
model: string;
|
||||
vector: number[];
|
||||
},
|
||||
): void {
|
||||
db.prepare(
|
||||
"INSERT INTO chunks (id, path, source, start_line, end_line, hash, model, text, embedding, updated_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
||||
|
||||
@@ -28,6 +28,17 @@ describe("memory manager status state", () => {
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("marks status-only managers dirty when index identity mismatches", () => {
|
||||
expect(
|
||||
resolveInitialMemoryDirty({
|
||||
hasMemorySource: false,
|
||||
statusOnly: true,
|
||||
hasIndexedMeta: true,
|
||||
indexIdentityMismatched: true,
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("reports the requested provider before provider initialization", () => {
|
||||
expect(
|
||||
resolveStatusProviderInfo({
|
||||
|
||||
@@ -27,8 +27,12 @@ export function resolveInitialMemoryDirty(params: {
|
||||
hasMemorySource: boolean;
|
||||
statusOnly: boolean;
|
||||
hasIndexedMeta: boolean;
|
||||
indexIdentityMismatched?: boolean;
|
||||
}): boolean {
|
||||
return params.hasMemorySource && (params.statusOnly ? !params.hasIndexedMeta : true);
|
||||
return (
|
||||
Boolean(params.indexIdentityMismatched) ||
|
||||
(params.hasMemorySource && (params.statusOnly ? !params.hasIndexedMeta : true))
|
||||
);
|
||||
}
|
||||
|
||||
export function resolveStatusProviderInfo(params: {
|
||||
|
||||
@@ -38,6 +38,7 @@ import { resolveTimerTimeoutMs } from "openclaw/plugin-sdk/number-runtime";
|
||||
import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/string-coerce-runtime";
|
||||
import {
|
||||
createEmbeddingProvider,
|
||||
resolveEmbeddingProviderAdapterId,
|
||||
type EmbeddingProvider,
|
||||
type EmbeddingProviderId,
|
||||
type EmbeddingProviderRuntime,
|
||||
@@ -54,8 +55,9 @@ import {
|
||||
import {
|
||||
resolveConfiguredScopeHash,
|
||||
resolveConfiguredSourcesForMeta,
|
||||
shouldRunFullMemoryReindex,
|
||||
resolveMemoryIndexIdentityState,
|
||||
type MemoryIndexMeta,
|
||||
type MemoryIndexIdentityState,
|
||||
} from "./manager-reindex-state.js";
|
||||
import { shouldSyncSessionsForReindex } from "./manager-session-reindex.js";
|
||||
import {
|
||||
@@ -67,7 +69,10 @@ import {
|
||||
loadMemorySourceFileState,
|
||||
resolveMemorySourceExistingHash,
|
||||
} from "./manager-source-state.js";
|
||||
import { runMemoryTargetedSessionSync } from "./manager-targeted-sync.js";
|
||||
import {
|
||||
markMemoryTargetSessionFilesDirty,
|
||||
runMemoryTargetedSessionSync,
|
||||
} from "./manager-targeted-sync.js";
|
||||
import {
|
||||
recordMemoryWatchEventPath,
|
||||
settleMemoryWatchEventPaths,
|
||||
@@ -269,6 +274,65 @@ export abstract class MemoryManagerSyncOps {
|
||||
options: { source: MemorySource; content?: string },
|
||||
): Promise<void>;
|
||||
|
||||
protected hasIndexedChunks(): boolean {
|
||||
const row = this.db.prepare(`SELECT 1 as found FROM chunks LIMIT 1`).get() as
|
||||
| { found?: number }
|
||||
| undefined;
|
||||
return row?.found === 1;
|
||||
}
|
||||
|
||||
protected resolveCurrentIndexIdentityState(params?: {
|
||||
meta?: MemoryIndexMeta | null;
|
||||
provider?: { id: string; model: string } | null;
|
||||
providerKeyKnown?: boolean;
|
||||
vectorReady?: boolean;
|
||||
hasIndexedChunks?: boolean;
|
||||
}): MemoryIndexIdentityState {
|
||||
const hasProviderOverride = params && "provider" in params;
|
||||
const configuredProvider =
|
||||
this.settings.provider === "none"
|
||||
? null
|
||||
: {
|
||||
id:
|
||||
resolveEmbeddingProviderAdapterId(this.settings.provider, this.cfg) ??
|
||||
this.settings.provider,
|
||||
model: this.settings.model,
|
||||
};
|
||||
const provider = hasProviderOverride
|
||||
? params.provider!
|
||||
: this.provider
|
||||
? { id: this.provider.id, model: this.provider.model }
|
||||
: configuredProvider;
|
||||
const vectorReady =
|
||||
params && "vectorReady" in params
|
||||
? Boolean(params.vectorReady)
|
||||
: this.vector.available === true;
|
||||
return resolveMemoryIndexIdentityState({
|
||||
meta: params && "meta" in params ? params.meta! : this.readMeta(),
|
||||
provider,
|
||||
providerKey: params?.providerKeyKnown === false ? undefined : (this.providerKey ?? undefined),
|
||||
providerKeyKnown: params?.providerKeyKnown,
|
||||
configuredSources: resolveConfiguredSourcesForMeta(this.sources),
|
||||
configuredScopeHash: resolveConfiguredScopeHash({
|
||||
workspaceDir: this.workspaceDir,
|
||||
extraPaths: this.settings.extraPaths,
|
||||
multimodal: {
|
||||
enabled: this.settings.multimodal.enabled,
|
||||
modalities: this.settings.multimodal.modalities,
|
||||
maxFileBytes: this.settings.multimodal.maxFileBytes,
|
||||
},
|
||||
}),
|
||||
chunkTokens: this.settings.chunking.tokens,
|
||||
chunkOverlap: this.settings.chunking.overlap,
|
||||
vectorReady,
|
||||
hasIndexedChunks:
|
||||
params && "hasIndexedChunks" in params
|
||||
? Boolean(params.hasIndexedChunks)
|
||||
: this.hasIndexedChunks(),
|
||||
ftsTokenizer: this.settings.store.fts.tokenizer,
|
||||
});
|
||||
}
|
||||
|
||||
protected resetVectorState(): void {
|
||||
this.vectorReady = null;
|
||||
this.vector.available = null;
|
||||
@@ -1691,60 +1755,69 @@ export abstract class MemoryManagerSyncOps {
|
||||
}
|
||||
const vectorReady = await this.ensureVectorReady();
|
||||
const meta = this.readMeta();
|
||||
const configuredSources = resolveConfiguredSourcesForMeta(this.sources);
|
||||
const configuredScopeHash = resolveConfiguredScopeHash({
|
||||
workspaceDir: this.workspaceDir,
|
||||
extraPaths: this.settings.extraPaths,
|
||||
multimodal: {
|
||||
enabled: this.settings.multimodal.enabled,
|
||||
modalities: this.settings.multimodal.modalities,
|
||||
maxFileBytes: this.settings.multimodal.maxFileBytes,
|
||||
},
|
||||
});
|
||||
const targetSessionFiles = this.normalizeTargetSessionFiles(params?.sessionFiles);
|
||||
const hasTargetSessionFiles = targetSessionFiles !== null;
|
||||
if (params?.reason === "cli" && !params.force && !hasTargetSessionFiles) {
|
||||
await this.markSessionStartupCatchupDirtyFiles();
|
||||
}
|
||||
const targetedSessionSync = await runMemoryTargetedSessionSync({
|
||||
hasSessionSource: this.sources.has("sessions"),
|
||||
targetSessionFiles,
|
||||
reason: params?.reason,
|
||||
progress: progress ?? undefined,
|
||||
useUnsafeReindex:
|
||||
process.env.OPENCLAW_TEST_FAST === "1" &&
|
||||
process.env.OPENCLAW_TEST_MEMORY_UNSAFE_REINDEX === "1",
|
||||
sessionsDirtyFiles: this.sessionsDirtyFiles,
|
||||
syncSessionFiles: async (targetedParams) => {
|
||||
await this.syncSessionFiles(targetedParams);
|
||||
},
|
||||
shouldFallbackOnError: (err) => this.shouldFallbackOnError(err),
|
||||
activateFallbackProvider: async (reason) => await this.activateFallbackProvider(reason),
|
||||
runSafeReindex: async (reindexParams) => {
|
||||
await this.runSafeReindex(reindexParams);
|
||||
},
|
||||
runUnsafeReindex: async (reindexParams) => {
|
||||
await this.runUnsafeReindex(reindexParams);
|
||||
},
|
||||
const indexIdentity = resolveMemoryIndexIdentityState({
|
||||
meta,
|
||||
// Also detects provider→FTS-only transitions so orphaned old-model FTS rows are cleaned up.
|
||||
provider: this.provider ? { id: this.provider.id, model: this.provider.model } : null,
|
||||
providerKey: this.providerKey ?? undefined,
|
||||
configuredSources: resolveConfiguredSourcesForMeta(this.sources),
|
||||
configuredScopeHash: resolveConfiguredScopeHash({
|
||||
workspaceDir: this.workspaceDir,
|
||||
extraPaths: this.settings.extraPaths,
|
||||
multimodal: {
|
||||
enabled: this.settings.multimodal.enabled,
|
||||
modalities: this.settings.multimodal.modalities,
|
||||
maxFileBytes: this.settings.multimodal.maxFileBytes,
|
||||
},
|
||||
}),
|
||||
chunkTokens: this.settings.chunking.tokens,
|
||||
chunkOverlap: this.settings.chunking.overlap,
|
||||
vectorReady,
|
||||
hasIndexedChunks: this.hasIndexedChunks(),
|
||||
ftsTokenizer: this.settings.store.fts.tokenizer,
|
||||
});
|
||||
if (targetedSessionSync.handled) {
|
||||
this.sessionsDirty = targetedSessionSync.sessionsDirty;
|
||||
return;
|
||||
}
|
||||
const hasIndexedChunks = this.hasIndexedChunks();
|
||||
const needsInitialIndex = indexIdentity.status !== "valid" && !hasIndexedChunks;
|
||||
const needsExplicitIdentityReindex =
|
||||
params?.reason === "cli" && indexIdentity.status !== "valid" && !hasTargetSessionFiles;
|
||||
const needsFullReindex =
|
||||
(params?.force && !hasTargetSessionFiles) ||
|
||||
shouldRunFullMemoryReindex({
|
||||
meta,
|
||||
// Also detects provider→FTS-only transitions so orphaned old-model FTS rows are cleaned up.
|
||||
provider: this.provider ? { id: this.provider.id, model: this.provider.model } : null,
|
||||
providerKey: this.providerKey ?? undefined,
|
||||
configuredSources,
|
||||
configuredScopeHash,
|
||||
chunkTokens: this.settings.chunking.tokens,
|
||||
chunkOverlap: this.settings.chunking.overlap,
|
||||
vectorReady,
|
||||
ftsTokenizer: this.settings.store.fts.tokenizer,
|
||||
needsInitialIndex ||
|
||||
needsExplicitIdentityReindex;
|
||||
if (indexIdentity.status !== "valid" && !needsFullReindex) {
|
||||
this.dirty = true;
|
||||
const sessionsDirty = markMemoryTargetSessionFilesDirty({
|
||||
sessionsDirtyFiles: this.sessionsDirtyFiles,
|
||||
targetSessionFiles,
|
||||
});
|
||||
if (sessionsDirty) {
|
||||
this.sessionsDirty = true;
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (!needsFullReindex) {
|
||||
const targetedSessionSync = await runMemoryTargetedSessionSync({
|
||||
hasSessionSource: this.sources.has("sessions"),
|
||||
targetSessionFiles,
|
||||
reason: params?.reason,
|
||||
progress: progress ?? undefined,
|
||||
sessionsDirtyFiles: this.sessionsDirtyFiles,
|
||||
syncSessionFiles: async (targetedParams) => {
|
||||
await this.syncSessionFiles(targetedParams);
|
||||
},
|
||||
shouldFallbackOnError: (err) => this.shouldFallbackOnError(err),
|
||||
activateFallbackProvider: async (reason) => await this.activateFallbackProvider(reason),
|
||||
});
|
||||
if (targetedSessionSync.handled) {
|
||||
this.sessionsDirty = targetedSessionSync.sessionsDirty;
|
||||
return;
|
||||
}
|
||||
}
|
||||
try {
|
||||
if (needsFullReindex) {
|
||||
if (
|
||||
@@ -1794,20 +1867,17 @@ export abstract class MemoryManagerSyncOps {
|
||||
const activated =
|
||||
this.shouldFallbackOnError(err) && (await this.activateFallbackProvider(reason));
|
||||
if (activated) {
|
||||
await this.runSafeReindex({
|
||||
reason: params?.reason ?? "fallback",
|
||||
force: true,
|
||||
progress: progress ?? undefined,
|
||||
});
|
||||
if (needsFullReindex && !hasTargetSessionFiles) {
|
||||
await this.runSafeReindex({
|
||||
reason: params?.reason ?? "fallback",
|
||||
force: true,
|
||||
progress: progress ?? undefined,
|
||||
});
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (!this.provider && this.fts.enabled && this.shouldFallbackOnError(err)) {
|
||||
log.warn(`memory embeddings unavailable; rebuilding lexical memory index only: ${reason}`);
|
||||
await this.runSafeReindex({
|
||||
reason: params?.reason ?? "embedding-degraded",
|
||||
force: true,
|
||||
progress: progress ?? undefined,
|
||||
});
|
||||
log.warn(`memory embeddings unavailable; leaving memory index dirty: ${reason}`);
|
||||
return;
|
||||
}
|
||||
throw err;
|
||||
@@ -1965,6 +2035,9 @@ export abstract class MemoryManagerSyncOps {
|
||||
} else {
|
||||
this.sessionsDirty = false;
|
||||
}
|
||||
if (!shouldSyncMemory) {
|
||||
this.dirty = false;
|
||||
}
|
||||
|
||||
const meta: MemoryIndexMeta = {
|
||||
model: this.provider?.model ?? "fts-only",
|
||||
@@ -2045,6 +2118,9 @@ export abstract class MemoryManagerSyncOps {
|
||||
} else {
|
||||
this.sessionsDirty = false;
|
||||
}
|
||||
if (!shouldSyncMemory) {
|
||||
this.dirty = false;
|
||||
}
|
||||
|
||||
const nextMeta: MemoryIndexMeta = {
|
||||
model: this.provider?.model ?? "fts-only",
|
||||
|
||||
@@ -38,6 +38,7 @@ vi.mock("openclaw/plugin-sdk/memory-core-host-engine-qmd", () => {
|
||||
});
|
||||
|
||||
vi.mock("./embeddings.js", () => ({
|
||||
resolveEmbeddingProviderAdapterId: (providerId: string) => providerId,
|
||||
createEmbeddingProvider: vi.fn(),
|
||||
}));
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import {
|
||||
clearMemorySyncedSessionFiles,
|
||||
markMemoryTargetSessionFilesDirty,
|
||||
runMemoryTargetedSessionSync,
|
||||
} from "./manager-targeted-sync.js";
|
||||
|
||||
@@ -18,61 +19,48 @@ describe("memory targeted session sync", () => {
|
||||
expect(sessionsDirty).toBe(true);
|
||||
});
|
||||
|
||||
it("runs a full reindex after fallback activates during targeted sync", async () => {
|
||||
const activateFallbackProvider = vi.fn(async () => true);
|
||||
const runSafeReindex = vi.fn(async () => {});
|
||||
const runUnsafeReindex = vi.fn(async () => {});
|
||||
it("marks target sessions dirty while identity sync is paused", () => {
|
||||
const targetSessionPath = "/tmp/paused-target.jsonl";
|
||||
const sessionsDirtyFiles = new Set(["/tmp/other-dirty.jsonl"]);
|
||||
|
||||
await runMemoryTargetedSessionSync({
|
||||
const sessionsDirty = markMemoryTargetSessionFilesDirty({
|
||||
sessionsDirtyFiles,
|
||||
targetSessionFiles: [targetSessionPath],
|
||||
});
|
||||
|
||||
expect(sessionsDirty).toBe(true);
|
||||
expect(sessionsDirtyFiles.has(targetSessionPath)).toBe(true);
|
||||
expect(sessionsDirtyFiles.has("/tmp/other-dirty.jsonl")).toBe(true);
|
||||
});
|
||||
|
||||
it("leaves targeted sessions dirty after fallback activates during targeted sync", async () => {
|
||||
const activateFallbackProvider = vi.fn(async () => true);
|
||||
const syncSessionFiles = vi
|
||||
.fn()
|
||||
.mockRejectedValueOnce(new Error("embedding backend failed"))
|
||||
.mockResolvedValueOnce(undefined);
|
||||
const sessionsDirtyFiles = new Set(["/tmp/targeted-fallback.jsonl", "/tmp/other-dirty.jsonl"]);
|
||||
|
||||
const result = await runMemoryTargetedSessionSync({
|
||||
hasSessionSource: true,
|
||||
targetSessionFiles: new Set(["/tmp/targeted-fallback.jsonl"]),
|
||||
reason: "post-compaction",
|
||||
progress: undefined,
|
||||
useUnsafeReindex: false,
|
||||
sessionsDirtyFiles: new Set(),
|
||||
syncSessionFiles: async () => {
|
||||
throw new Error("embedding backend failed");
|
||||
},
|
||||
sessionsDirtyFiles,
|
||||
syncSessionFiles,
|
||||
shouldFallbackOnError: () => true,
|
||||
activateFallbackProvider,
|
||||
runSafeReindex,
|
||||
runUnsafeReindex,
|
||||
});
|
||||
|
||||
expect(activateFallbackProvider).toHaveBeenCalledWith("embedding backend failed");
|
||||
expect(runSafeReindex).toHaveBeenCalledWith({
|
||||
reason: "post-compaction",
|
||||
force: true,
|
||||
expect(syncSessionFiles).toHaveBeenCalledTimes(1);
|
||||
expect(syncSessionFiles).toHaveBeenCalledWith({
|
||||
needsFullReindex: false,
|
||||
targetSessionFiles: ["/tmp/targeted-fallback.jsonl"],
|
||||
progress: undefined,
|
||||
});
|
||||
expect(runUnsafeReindex).not.toHaveBeenCalled();
|
||||
});
|
||||
|
||||
it("uses the unsafe reindex path when enabled", async () => {
|
||||
const runSafeReindex = vi.fn(async () => {});
|
||||
const runUnsafeReindex = vi.fn(async () => {});
|
||||
|
||||
await runMemoryTargetedSessionSync({
|
||||
hasSessionSource: true,
|
||||
targetSessionFiles: new Set(["/tmp/targeted-fallback.jsonl"]),
|
||||
reason: "post-compaction",
|
||||
progress: undefined,
|
||||
useUnsafeReindex: true,
|
||||
sessionsDirtyFiles: new Set(),
|
||||
syncSessionFiles: async () => {
|
||||
throw new Error("embedding backend failed");
|
||||
},
|
||||
shouldFallbackOnError: () => true,
|
||||
activateFallbackProvider: async () => true,
|
||||
runSafeReindex,
|
||||
runUnsafeReindex,
|
||||
});
|
||||
|
||||
expect(runUnsafeReindex).toHaveBeenCalledWith({
|
||||
reason: "post-compaction",
|
||||
force: true,
|
||||
progress: undefined,
|
||||
});
|
||||
expect(runSafeReindex).not.toHaveBeenCalled();
|
||||
expect(result).toEqual({ handled: true, sessionsDirty: true });
|
||||
expect(sessionsDirtyFiles.has("/tmp/targeted-fallback.jsonl")).toBe(true);
|
||||
expect(sessionsDirtyFiles.has("/tmp/other-dirty.jsonl")).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -22,12 +22,23 @@ export function clearMemorySyncedSessionFiles(params: {
|
||||
return params.sessionsDirtyFiles.size > 0;
|
||||
}
|
||||
|
||||
export function markMemoryTargetSessionFilesDirty(params: {
|
||||
sessionsDirtyFiles: Set<string>;
|
||||
targetSessionFiles?: Iterable<string> | null;
|
||||
}): boolean {
|
||||
if (params.targetSessionFiles) {
|
||||
for (const targetSessionFile of params.targetSessionFiles) {
|
||||
params.sessionsDirtyFiles.add(targetSessionFile);
|
||||
}
|
||||
}
|
||||
return params.sessionsDirtyFiles.size > 0;
|
||||
}
|
||||
|
||||
export async function runMemoryTargetedSessionSync(params: {
|
||||
hasSessionSource: boolean;
|
||||
targetSessionFiles: Set<string> | null;
|
||||
reason?: string;
|
||||
progress?: TargetedSyncProgress;
|
||||
useUnsafeReindex: boolean;
|
||||
sessionsDirtyFiles: Set<string>;
|
||||
syncSessionFiles: (params: {
|
||||
needsFullReindex: boolean;
|
||||
@@ -36,16 +47,6 @@ export async function runMemoryTargetedSessionSync(params: {
|
||||
}) => Promise<void>;
|
||||
shouldFallbackOnError: (err: unknown) => boolean;
|
||||
activateFallbackProvider: (reason: string) => Promise<boolean>;
|
||||
runSafeReindex: (params: {
|
||||
reason?: string;
|
||||
force?: boolean;
|
||||
progress?: TargetedSyncProgress;
|
||||
}) => Promise<void>;
|
||||
runUnsafeReindex: (params: {
|
||||
reason?: string;
|
||||
force?: boolean;
|
||||
progress?: TargetedSyncProgress;
|
||||
}) => Promise<void>;
|
||||
}): Promise<{ handled: boolean; sessionsDirty: boolean }> {
|
||||
if (!params.hasSessionSource || !params.targetSessionFiles) {
|
||||
return {
|
||||
@@ -74,19 +75,12 @@ export async function runMemoryTargetedSessionSync(params: {
|
||||
if (!activated) {
|
||||
throw err;
|
||||
}
|
||||
const reindexParams = {
|
||||
reason: params.reason,
|
||||
force: true,
|
||||
progress: params.progress,
|
||||
};
|
||||
if (params.useUnsafeReindex) {
|
||||
await params.runUnsafeReindex(reindexParams);
|
||||
} else {
|
||||
await params.runSafeReindex(reindexParams);
|
||||
}
|
||||
return {
|
||||
handled: true,
|
||||
sessionsDirty: params.sessionsDirtyFiles.size > 0,
|
||||
sessionsDirty: markMemoryTargetSessionFilesDirty({
|
||||
sessionsDirtyFiles: params.sessionsDirtyFiles,
|
||||
targetSessionFiles: params.targetSessionFiles,
|
||||
}),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ vi.mock("./embeddings.js", () => ({
|
||||
provider: null,
|
||||
providerUnavailableReason: "No embeddings provider available.",
|
||||
}),
|
||||
resolveEmbeddingProviderAdapterId: (providerId: string) => providerId,
|
||||
resolveEmbeddingProviderFallbackModel: () => "fts-only",
|
||||
}));
|
||||
|
||||
|
||||
@@ -47,6 +47,7 @@ import {
|
||||
resolveMemoryProviderState,
|
||||
type MemoryProviderLifecycleState,
|
||||
} from "./manager-provider-state.js";
|
||||
import type { MemoryIndexIdentityState } from "./manager-reindex-state.js";
|
||||
import { resolveMemorySearchPreflight } from "./manager-search-preflight.js";
|
||||
import { searchKeyword, searchVector } from "./manager-search.js";
|
||||
import {
|
||||
@@ -171,6 +172,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
protected override sessionsDirty = false;
|
||||
protected override sessionsDirtyFiles = new Set<string>();
|
||||
protected override sessionPendingFiles = new Set<string>();
|
||||
private indexIdentityDirty = false;
|
||||
protected override sessionDeltas = new Map<
|
||||
string,
|
||||
{ lastSize: number; pendingBytes: number; pendingMessages: number }
|
||||
@@ -183,6 +185,10 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
private readonlyRecoverySuccesses = 0;
|
||||
private readonlyRecoveryFailures = 0;
|
||||
private readonlyRecoveryLastError?: string;
|
||||
private indexIdentityState: MemoryIndexIdentityState = {
|
||||
status: "missing",
|
||||
reason: "index metadata is missing",
|
||||
};
|
||||
|
||||
private static async loadProviderResult(params: {
|
||||
cfg: OpenClawConfig;
|
||||
@@ -267,6 +273,14 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
if (meta?.vectorDims) {
|
||||
this.vector.dims = meta.vectorDims;
|
||||
}
|
||||
const initialIndexIdentity = this.resolveCurrentIndexIdentityState({
|
||||
meta,
|
||||
providerKeyKnown: Boolean(params.providerResult),
|
||||
});
|
||||
this.indexIdentityState = initialIndexIdentity;
|
||||
this.indexIdentityDirty =
|
||||
initialIndexIdentity.status === "mismatched" ||
|
||||
(initialIndexIdentity.status === "missing" && this.sources.has("memory"));
|
||||
const transient = params.purpose === "status" || params.purpose === "cli";
|
||||
if (!transient) {
|
||||
this.ensureWatcher();
|
||||
@@ -377,6 +391,23 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
}
|
||||
}
|
||||
|
||||
private refreshIndexIdentityDirty(params?: { providerKeyKnown?: boolean }) {
|
||||
const provider = this.providerInitialized
|
||||
? this.provider
|
||||
? { id: this.provider.id, model: this.provider.model }
|
||||
: null
|
||||
: undefined;
|
||||
const state = this.resolveCurrentIndexIdentityState({
|
||||
...(provider !== undefined ? { provider } : {}),
|
||||
providerKeyKnown: params?.providerKeyKnown,
|
||||
});
|
||||
this.indexIdentityState = state;
|
||||
this.indexIdentityDirty =
|
||||
state.status === "mismatched" ||
|
||||
(state.status === "missing" && (this.sources.has("memory") || this.hasIndexedChunks()));
|
||||
return state;
|
||||
}
|
||||
|
||||
async search(
|
||||
query: string,
|
||||
opts?: {
|
||||
@@ -423,6 +454,27 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
if (preflight.shouldInitializeProvider) {
|
||||
await this.ensureProviderInitialized();
|
||||
}
|
||||
if (!this.provider && this.providerLifecycle.mode === "degraded") {
|
||||
const activatedFallback = await this.activateFallbackProvider(
|
||||
this.providerLifecycle.reason,
|
||||
).catch((fallbackErr: unknown) => {
|
||||
log.warn(
|
||||
`memory search: failed to activate fallback provider: ${formatErrorMessage(fallbackErr)}`,
|
||||
);
|
||||
return false;
|
||||
});
|
||||
if (activatedFallback) {
|
||||
this.refreshIndexIdentityDirty({
|
||||
providerKeyKnown: this.providerInitialized,
|
||||
});
|
||||
}
|
||||
}
|
||||
const indexIdentity = this.refreshIndexIdentityDirty({
|
||||
providerKeyKnown: this.providerInitialized,
|
||||
});
|
||||
if (indexIdentity.status !== "valid") {
|
||||
return [];
|
||||
}
|
||||
const minScore = opts?.minScore ?? this.settings.query.minScore;
|
||||
const maxResults = opts?.maxResults ?? this.settings.query.maxResults;
|
||||
const searchSources =
|
||||
@@ -443,20 +495,6 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
Math.max(1, Math.floor(maxResults * hybrid.candidateMultiplier)),
|
||||
);
|
||||
|
||||
if (!this.provider && this.providerLifecycle.mode === "degraded") {
|
||||
const activatedFallback = await this.activateFallbackProvider(
|
||||
this.providerLifecycle.reason,
|
||||
).catch((fallbackErr: unknown) => {
|
||||
log.warn(
|
||||
`memory search: failed to activate fallback provider: ${formatErrorMessage(fallbackErr)}`,
|
||||
);
|
||||
return false;
|
||||
});
|
||||
if (activatedFallback) {
|
||||
await this.runSafeReindex({ reason: "fallback", force: true });
|
||||
}
|
||||
}
|
||||
|
||||
// FTS-only mode: no embedding provider available
|
||||
if (!this.provider) {
|
||||
if (!this.fts.enabled || !this.fts.available) {
|
||||
@@ -552,7 +590,13 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
})
|
||||
: false;
|
||||
if (activatedFallback) {
|
||||
await this.runSafeReindex({ reason: "fallback", force: true });
|
||||
if (
|
||||
this.refreshIndexIdentityDirty({
|
||||
providerKeyKnown: this.providerInitialized,
|
||||
}).status !== "valid"
|
||||
) {
|
||||
return [];
|
||||
}
|
||||
keywordResults = await loadKeywordResults();
|
||||
queryVec = await this.embedQueryWithRetry(cleaned);
|
||||
} else if (!this.provider && this.fts.enabled && this.fts.available) {
|
||||
@@ -856,6 +900,9 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
}
|
||||
|
||||
status(): MemoryProviderStatus {
|
||||
this.refreshIndexIdentityDirty({
|
||||
providerKeyKnown: this.providerInitialized,
|
||||
});
|
||||
const sourceFilter = this.buildSourceFilter();
|
||||
const aggregateState = collectMemoryStatusAggregate({
|
||||
db: {
|
||||
@@ -884,7 +931,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
backend: "builtin",
|
||||
files: aggregateState.files,
|
||||
chunks: aggregateState.chunks,
|
||||
dirty: this.dirty || this.sessionsDirty,
|
||||
dirty: this.dirty || this.sessionsDirty || this.indexIdentityDirty,
|
||||
workspaceDir: this.workspaceDir,
|
||||
dbPath: this.settings.store.path,
|
||||
provider: providerInfo.provider,
|
||||
@@ -937,6 +984,7 @@ export class MemoryIndexManager extends MemoryManagerEmbeddingOps implements Mem
|
||||
searchMode: providerInfo.searchMode,
|
||||
providerState: this.providerLifecycle,
|
||||
providerUnavailableReason: this.providerUnavailableReason,
|
||||
indexIdentity: this.indexIdentityState,
|
||||
readonlyRecovery: {
|
||||
attempts: this.readonlyRecoveryAttempts,
|
||||
successes: this.readonlyRecoverySuccesses,
|
||||
|
||||
@@ -126,6 +126,7 @@ vi.mock("./sqlite-vec.js", () => ({
|
||||
}));
|
||||
|
||||
vi.mock("./embeddings.js", () => ({
|
||||
resolveEmbeddingProviderAdapterId: (providerId: string) => providerId,
|
||||
createEmbeddingProvider: async () => ({
|
||||
requestedProvider: "openai",
|
||||
provider: {
|
||||
|
||||
@@ -117,15 +117,25 @@ export function createMemoryTool(params: {
|
||||
};
|
||||
}
|
||||
|
||||
export function buildMemorySearchUnavailableResult(error: string | undefined) {
|
||||
export function buildMemorySearchUnavailableResult(
|
||||
error: string | undefined,
|
||||
overrides?: {
|
||||
warning?: string;
|
||||
action?: string;
|
||||
},
|
||||
) {
|
||||
const reason = (error ?? "memory search unavailable").trim() || "memory search unavailable";
|
||||
const isQuotaError = /insufficient_quota|quota|429/.test(normalizeLowercaseStringOrEmpty(reason));
|
||||
const warning = isQuotaError
|
||||
? "Memory search is unavailable because the embedding provider quota is exhausted."
|
||||
: "Memory search is unavailable due to an embedding/provider error.";
|
||||
const action = isQuotaError
|
||||
? "Top up or switch embedding provider, then retry memory_search."
|
||||
: "Check embedding provider configuration and retry memory_search.";
|
||||
const warning =
|
||||
overrides?.warning ??
|
||||
(isQuotaError
|
||||
? "Memory search is unavailable because the embedding provider quota is exhausted."
|
||||
: "Memory search is unavailable due to an embedding/provider error.");
|
||||
const action =
|
||||
overrides?.action ??
|
||||
(isQuotaError
|
||||
? "Top up or switch embedding provider, then retry memory_search."
|
||||
: "Check embedding provider configuration and retry memory_search.");
|
||||
return {
|
||||
results: [],
|
||||
disabled: true,
|
||||
|
||||
@@ -3,8 +3,10 @@ import {
|
||||
getMemorySearchManagerMockCalls,
|
||||
getMemorySearchManagerMockConfigs,
|
||||
getMemorySearchManagerMockParams,
|
||||
getMemorySyncMockCalls,
|
||||
resetMemoryToolMockState,
|
||||
setMemoryBackend,
|
||||
setMemoryCustomStatus,
|
||||
setMemorySearchImpl,
|
||||
setMemorySearchManagerImpl,
|
||||
} from "./memory-tool-manager-mock.js";
|
||||
@@ -256,6 +258,39 @@ describe("memory_search unavailable payloads", () => {
|
||||
expect(searchCalls).toBe(2);
|
||||
});
|
||||
|
||||
it("returns unavailable metadata when the index identity is paused", async () => {
|
||||
let searchCalls = 0;
|
||||
setMemorySearchImpl(async () => {
|
||||
searchCalls += 1;
|
||||
return [];
|
||||
});
|
||||
const reason = "index was built for provider openai, expected ollama";
|
||||
setMemoryCustomStatus({
|
||||
indexIdentity: {
|
||||
status: "mismatched",
|
||||
reason,
|
||||
},
|
||||
});
|
||||
|
||||
const tool = createMemorySearchToolOrThrow({
|
||||
config: {
|
||||
agents: { list: [{ id: "main", default: true }] },
|
||||
memory: { citations: "off" },
|
||||
},
|
||||
});
|
||||
const result = await tool.execute("paused-index", { query: "hidden thread codename" });
|
||||
|
||||
expectUnavailableMemorySearchDetails(result.details, {
|
||||
error: reason,
|
||||
warning:
|
||||
"Tell the user: memory search is paused because the memory index was built with a different embedding provider/model/settings.",
|
||||
action:
|
||||
"Tell the user to run: openclaw memory status --index or openclaw memory index --force.",
|
||||
});
|
||||
expect(searchCalls).toBe(1);
|
||||
expect(getMemorySyncMockCalls()).toBe(0);
|
||||
});
|
||||
|
||||
it("returns structured search debug metadata for qmd results", async () => {
|
||||
setMemoryBackend("qmd");
|
||||
setMemorySearchImpl(async (opts) => {
|
||||
|
||||
@@ -18,6 +18,7 @@ import {
|
||||
resolveMemoryDreamingConfig,
|
||||
resolveMemoryDeepDreamingConfig,
|
||||
} from "openclaw/plugin-sdk/memory-core-host-status";
|
||||
import { asRecord } from "./dreaming-shared.js";
|
||||
import { filterMemorySearchHitsBySessionVisibility } from "./session-search-visibility.js";
|
||||
import { recordShortTermRecalls } from "./short-term-promotion.js";
|
||||
import {
|
||||
@@ -109,6 +110,28 @@ async function runMemorySearchToolWithDeadline<T>(params: {
|
||||
}
|
||||
}
|
||||
|
||||
const PAUSED_MEMORY_INDEX_WARNING =
|
||||
"Tell the user: memory search is paused because the memory index was built with a different embedding provider/model/settings.";
|
||||
const PAUSED_MEMORY_INDEX_ACTION =
|
||||
"Tell the user to run: openclaw memory status --index or openclaw memory index --force.";
|
||||
|
||||
function resolvePausedMemoryIndexIdentityReason(status: { custom?: unknown }): string | undefined {
|
||||
const indexIdentity = asRecord(asRecord(status.custom)?.indexIdentity);
|
||||
if (indexIdentity?.status !== "mismatched" && indexIdentity?.status !== "missing") {
|
||||
return undefined;
|
||||
}
|
||||
return typeof indexIdentity.reason === "string" && indexIdentity.reason.trim()
|
||||
? indexIdentity.reason.trim()
|
||||
: "memory index identity is missing or mismatched";
|
||||
}
|
||||
|
||||
function buildPausedMemoryIndexUnavailableResult(reason: string) {
|
||||
return buildMemorySearchUnavailableResult(reason, {
|
||||
warning: PAUSED_MEMORY_INDEX_WARNING,
|
||||
action: PAUSED_MEMORY_INDEX_ACTION,
|
||||
});
|
||||
}
|
||||
|
||||
function sortMemorySearchToolResults<T extends { score: number; path: string }>(results: T[]): T[] {
|
||||
return results.toSorted((left, right) => {
|
||||
if (left.score !== right.score) {
|
||||
@@ -316,7 +339,7 @@ export function createMemorySearchTool(options: {
|
||||
label: "Memory Search",
|
||||
name: "memory_search",
|
||||
description:
|
||||
"Mandatory recall step: semantically search MEMORY.md + memory/*.md (and optional session transcripts) before answering questions about prior work, decisions, dates, people, preferences, or todos. Optional `corpus=wiki` or `corpus=all` also searches registered compiled-wiki supplements. `corpus=memory` restricts hits to indexed memory files (excludes session transcript chunks from ranking). `corpus=sessions` restricts hits to indexed session transcripts (same visibility rules as session history tools). If response has disabled=true, memory retrieval is unavailable and should be surfaced to the user.",
|
||||
"Mandatory recall step: semantically search MEMORY.md + memory/*.md (and optional session transcripts) before answering questions about prior work, decisions, dates, people, preferences, or todos. Optional `corpus=wiki` or `corpus=all` also searches registered compiled-wiki supplements. `corpus=memory` restricts hits to indexed memory files (excludes session transcript chunks from ranking). `corpus=sessions` restricts hits to indexed session transcripts (same visibility rules as session history tools). If response has disabled=true, memory retrieval is unavailable; you must tell the user and include the warning/action guidance.",
|
||||
parameters: MemorySearchSchema,
|
||||
execute:
|
||||
({ cfg, agentId }) =>
|
||||
@@ -400,6 +423,7 @@ export function createMemorySearchTool(options: {
|
||||
let model: string | undefined;
|
||||
let fallback: unknown;
|
||||
let searchMode: string | undefined;
|
||||
let pausedIndexIdentityReason: string | undefined;
|
||||
let searchDebug:
|
||||
| {
|
||||
backend: string;
|
||||
@@ -447,9 +471,21 @@ export function createMemorySearchTool(options: {
|
||||
activeMemory = refreshed;
|
||||
rawResults = await activeMemory.manager.search(query, searchOptions);
|
||||
}
|
||||
const statusBeforeRetry = activeMemory.manager.status();
|
||||
pausedIndexIdentityReason =
|
||||
resolvePausedMemoryIndexIdentityReason(statusBeforeRetry);
|
||||
if (pausedIndexIdentityReason) {
|
||||
return;
|
||||
}
|
||||
if (rawResults.length === 0 && activeMemory.manager.sync) {
|
||||
await activeMemory.manager.sync({ reason: "search", force: true });
|
||||
rawResults = await activeMemory.manager.search(query, searchOptions);
|
||||
pausedIndexIdentityReason = resolvePausedMemoryIndexIdentityReason(
|
||||
activeMemory.manager.status(),
|
||||
);
|
||||
if (pausedIndexIdentityReason) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
rawResults = await filterMemorySearchHitsBySessionVisibility({
|
||||
cfg,
|
||||
@@ -500,6 +536,11 @@ export function createMemorySearchTool(options: {
|
||||
hits: rawResults.length,
|
||||
};
|
||||
});
|
||||
if (pausedIndexIdentityReason) {
|
||||
return jsonResult(
|
||||
buildPausedMemoryIndexUnavailableResult(pausedIndexIdentityReason),
|
||||
);
|
||||
}
|
||||
}
|
||||
const supplementResults = shouldQuerySupplements
|
||||
? await runUnavailablePhase(
|
||||
|
||||
Reference in New Issue
Block a user