mirror of
https://github.com/pgvector/pgvector.git
synced 2026-07-02 18:50:56 +08:00
Improved memory calculation for HNSW and removed vector-specific code
This commit is contained in:
@@ -163,7 +163,7 @@ typedef struct HnswBuildState
|
||||
HnswElement entryPoint;
|
||||
double ml;
|
||||
int maxLevel;
|
||||
double maxInMemoryElements;
|
||||
long memoryLeft;
|
||||
bool flushed;
|
||||
Vector *normvec;
|
||||
|
||||
|
||||
@@ -313,6 +313,21 @@ InsertTuple(Relation index, Datum *values, HnswElement element, HnswBuildState *
|
||||
return *dup == NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the memory used by an element
|
||||
*/
|
||||
static long
|
||||
HnswElementMemory(HnswElement e, int m)
|
||||
{
|
||||
long elementSize = sizeof(HnswElementData);
|
||||
|
||||
elementSize += sizeof(HnswNeighborArray) * (e->level + 1);
|
||||
elementSize += sizeof(HnswCandidate) * (m * (e->level + 2));
|
||||
elementSize += sizeof(ItemPointerData);
|
||||
elementSize += VARSIZE_ANY(e->vec);
|
||||
return elementSize;
|
||||
}
|
||||
|
||||
/*
|
||||
* Callback for table_index_build_scan
|
||||
*/
|
||||
@@ -334,7 +349,7 @@ BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values,
|
||||
if (isnull[0])
|
||||
return;
|
||||
|
||||
if (buildstate->indtuples >= buildstate->maxInMemoryElements)
|
||||
if (buildstate->memoryLeft <= 0)
|
||||
{
|
||||
if (!buildstate->flushed)
|
||||
{
|
||||
@@ -374,31 +389,21 @@ BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values,
|
||||
|
||||
/* Add outside memory context */
|
||||
if (dup != NULL)
|
||||
{
|
||||
HnswAddHeapTid(dup, tid);
|
||||
buildstate->memoryLeft -= sizeof(ItemPointerData);
|
||||
}
|
||||
|
||||
/* Add to buildstate or free */
|
||||
if (inserted)
|
||||
{
|
||||
buildstate->elements = lappend(buildstate->elements, element);
|
||||
buildstate->memoryLeft -= HnswElementMemory(element, buildstate->m);
|
||||
}
|
||||
else
|
||||
HnswFreeElement(element);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the max number of elements that fit into maintenance_work_mem
|
||||
*/
|
||||
static double
|
||||
HnswGetMaxInMemoryElements(int m, double ml, int dimensions)
|
||||
{
|
||||
Size elementSize = sizeof(HnswElementData);
|
||||
double avgLevel = -log(0.5) * ml;
|
||||
|
||||
elementSize += sizeof(HnswNeighborArray) * (avgLevel + 1);
|
||||
elementSize += sizeof(HnswCandidate) * (m * (avgLevel + 2));
|
||||
elementSize += sizeof(ItemPointerData);
|
||||
elementSize += VECTOR_SIZE(dimensions);
|
||||
return (maintenance_work_mem * 1024L) / elementSize;
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize the build state
|
||||
*/
|
||||
@@ -436,7 +441,7 @@ InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, Index
|
||||
buildstate->entryPoint = NULL;
|
||||
buildstate->ml = HnswGetMl(buildstate->m);
|
||||
buildstate->maxLevel = HnswGetMaxLevel(buildstate->m);
|
||||
buildstate->maxInMemoryElements = HnswGetMaxInMemoryElements(buildstate->m, buildstate->ml, buildstate->dimensions);
|
||||
buildstate->memoryLeft = maintenance_work_mem * 1024L;
|
||||
buildstate->flushed = false;
|
||||
|
||||
/* Reuse for each tuple */
|
||||
|
||||
Reference in New Issue
Block a user