From 3f3463bde5e69cfcca9df814e961a0dac7f39f5d Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 9 Nov 2023 16:21:26 -0800 Subject: [PATCH] Improved memory calculation for HNSW and removed vector-specific code --- src/hnsw.h | 2 +- src/hnswbuild.c | 41 +++++++++++++++++++++++------------------ 2 files changed, 24 insertions(+), 19 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index eb2aa9f..8990cdc 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -163,7 +163,7 @@ typedef struct HnswBuildState HnswElement entryPoint; double ml; int maxLevel; - double maxInMemoryElements; + long memoryLeft; bool flushed; Vector *normvec; diff --git a/src/hnswbuild.c b/src/hnswbuild.c index 18959d5..ee6c7ad 100644 --- a/src/hnswbuild.c +++ b/src/hnswbuild.c @@ -313,6 +313,21 @@ InsertTuple(Relation index, Datum *values, HnswElement element, HnswBuildState * return *dup == NULL; } +/* + * Get the memory used by an element + */ +static long +HnswElementMemory(HnswElement e, int m) +{ + long elementSize = sizeof(HnswElementData); + + elementSize += sizeof(HnswNeighborArray) * (e->level + 1); + elementSize += sizeof(HnswCandidate) * (m * (e->level + 2)); + elementSize += sizeof(ItemPointerData); + elementSize += VARSIZE_ANY(e->vec); + return elementSize; +} + /* * Callback for table_index_build_scan */ @@ -334,7 +349,7 @@ BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, if (isnull[0]) return; - if (buildstate->indtuples >= buildstate->maxInMemoryElements) + if (buildstate->memoryLeft <= 0) { if (!buildstate->flushed) { @@ -374,31 +389,21 @@ BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values, /* Add outside memory context */ if (dup != NULL) + { HnswAddHeapTid(dup, tid); + buildstate->memoryLeft -= sizeof(ItemPointerData); + } /* Add to buildstate or free */ if (inserted) + { buildstate->elements = lappend(buildstate->elements, element); + buildstate->memoryLeft -= HnswElementMemory(element, buildstate->m); + } else HnswFreeElement(element); } -/* - * Get the max number of elements that fit into maintenance_work_mem - */ -static double -HnswGetMaxInMemoryElements(int m, double ml, int dimensions) -{ - Size elementSize = sizeof(HnswElementData); - double avgLevel = -log(0.5) * ml; - - elementSize += sizeof(HnswNeighborArray) * (avgLevel + 1); - elementSize += sizeof(HnswCandidate) * (m * (avgLevel + 2)); - elementSize += sizeof(ItemPointerData); - elementSize += VECTOR_SIZE(dimensions); - return (maintenance_work_mem * 1024L) / elementSize; -} - /* * Initialize the build state */ @@ -436,7 +441,7 @@ InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, Index buildstate->entryPoint = NULL; buildstate->ml = HnswGetMl(buildstate->m); buildstate->maxLevel = HnswGetMaxLevel(buildstate->m); - buildstate->maxInMemoryElements = HnswGetMaxInMemoryElements(buildstate->m, buildstate->ml, buildstate->dimensions); + buildstate->memoryLeft = maintenance_work_mem * 1024L; buildstate->flushed = false; /* Reuse for each tuple */