mirror of
https://github.com/pgvector/pgvector.git
synced 2026-06-30 01:31:15 +08:00
Improved HNSW build and insert code
This commit is contained in:
@@ -339,7 +339,6 @@ void HnswGetMetaPageInfo(Relation index, int *m, HnswElement * entryPoint);
|
||||
HnswElement HnswInitElement(ItemPointer tid, int m, double ml, int maxLevel);
|
||||
HnswElement HnswInitElementFromBlock(BlockNumber blkno, OffsetNumber offno);
|
||||
void HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool existing);
|
||||
HnswElement HnswFindDuplicate(HnswElement e);
|
||||
HnswCandidate *HnswEntryCandidate(HnswElement em, Datum q, Relation rel, FmgrInfo *procinfo, Oid collation, bool loadVec);
|
||||
void HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber insertPage, ForkNumber forkNum, bool building);
|
||||
void HnswSetNeighborTuple(HnswNeighborTuple ntup, HnswElement e, int m);
|
||||
|
||||
@@ -316,6 +316,33 @@ HnswElementMemory(HnswElement e, int m)
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Find duplicate element
|
||||
*/
|
||||
static bool
|
||||
HnswFindDuplicateInMemory(HnswElement element)
|
||||
{
|
||||
HnswNeighborArray *neighbors = &element->neighbors[0];
|
||||
|
||||
for (int i = 0; i < neighbors->length; i++)
|
||||
{
|
||||
HnswCandidate *neighbor = &neighbors->items[i];
|
||||
|
||||
/* Exit early since ordered by distance */
|
||||
if (!datumIsEqual(element->value, neighbor->element->value, false, -1))
|
||||
return false;
|
||||
|
||||
/* Check for space */
|
||||
if (neighbor->element->heaptidsLength < HNSW_HEAPTIDS)
|
||||
{
|
||||
HnswAddHeapTid(neighbor->element, &element->heaptids[0]);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* Insert tuple into in-memory graph
|
||||
*/
|
||||
@@ -330,7 +357,6 @@ InsertTupleInMemory(Relation index, Datum *values, ItemPointer heaptid, HnswBuil
|
||||
int m = buildstate->m;
|
||||
MemoryContext oldCtx;
|
||||
HnswElement element;
|
||||
HnswElement dup;
|
||||
|
||||
/* Detoast once for all calls */
|
||||
Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
|
||||
@@ -348,38 +374,6 @@ InsertTupleInMemory(Relation index, Datum *values, ItemPointer heaptid, HnswBuil
|
||||
element->value = datumCopy(value, false, -1);
|
||||
MemoryContextSwitchTo(oldCtx);
|
||||
|
||||
/* Insert element in graph */
|
||||
HnswInsertElement(element, entryPoint, NULL, procinfo, collation, m, efConstruction, false);
|
||||
|
||||
/* Look for duplicate */
|
||||
dup = HnswFindDuplicate(element);
|
||||
|
||||
if (dup == NULL)
|
||||
{
|
||||
/* Add element */
|
||||
slist_push_head(&graph->elements, &element->next);
|
||||
|
||||
/* Update neighbors */
|
||||
for (int lc = element->level; lc >= 0; lc--)
|
||||
{
|
||||
int lm = HnswGetLayerM(m, lc);
|
||||
HnswNeighborArray *neighbors = &element->neighbors[lc];
|
||||
|
||||
for (int i = 0; i < neighbors->length; i++)
|
||||
HnswUpdateConnection(element, &neighbors->items[i], lm, lc, NULL, NULL, procinfo, collation);
|
||||
}
|
||||
|
||||
/* Update entry point if needed */
|
||||
if (entryPoint == NULL || element->level > entryPoint->level)
|
||||
graph->entryPoint = element;
|
||||
}
|
||||
else
|
||||
{
|
||||
/* No need to free element since memory unlikely to be reallocated */
|
||||
/* Element is also used to estimate memory usage below */
|
||||
HnswAddHeapTid(dup, heaptid);
|
||||
}
|
||||
|
||||
/* Update memory usage */
|
||||
#if PG_VERSION_NUM >= 130000
|
||||
graph->memoryUsed = MemoryContextMemAllocated(buildstate->graphCtx, false);
|
||||
@@ -387,6 +381,33 @@ InsertTupleInMemory(Relation index, Datum *values, ItemPointer heaptid, HnswBuil
|
||||
graph->memoryUsed += HnswElementMemory(element, buildstate->m);
|
||||
#endif
|
||||
|
||||
/* Insert element in graph */
|
||||
HnswInsertElement(element, entryPoint, NULL, procinfo, collation, m, efConstruction, false);
|
||||
|
||||
/* Look for duplicate */
|
||||
if (HnswFindDuplicateInMemory(element))
|
||||
{
|
||||
/* No need to free element since memory unlikely to be reallocated */
|
||||
return true;
|
||||
}
|
||||
|
||||
/* Add element */
|
||||
slist_push_head(&graph->elements, &element->next);
|
||||
|
||||
/* Update neighbors */
|
||||
for (int lc = element->level; lc >= 0; lc--)
|
||||
{
|
||||
int lm = HnswGetLayerM(m, lc);
|
||||
HnswNeighborArray *neighbors = &element->neighbors[lc];
|
||||
|
||||
for (int i = 0; i < neighbors->length; i++)
|
||||
HnswUpdateConnection(element, &neighbors->items[i], lm, lc, NULL, NULL, procinfo, collation);
|
||||
}
|
||||
|
||||
/* Update entry point if needed */
|
||||
if (entryPoint == NULL || element->level > entryPoint->level)
|
||||
graph->entryPoint = element;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -447,7 +447,7 @@ HnswUpdateNeighborPages(Relation index, FmgrInfo *procinfo, Oid collation, HnswE
|
||||
* Add a heap TID to an existing element
|
||||
*/
|
||||
static bool
|
||||
HnswAddDuplicateToPage(Relation index, HnswElement element, HnswElement dup, bool building)
|
||||
HnswAddDuplicate(Relation index, HnswElement element, HnswElement dup, bool building)
|
||||
{
|
||||
Buffer buf;
|
||||
Page page;
|
||||
@@ -508,10 +508,10 @@ HnswAddDuplicateToPage(Relation index, HnswElement element, HnswElement dup, boo
|
||||
}
|
||||
|
||||
/*
|
||||
* Add duplicate if found
|
||||
* Find duplicate element
|
||||
*/
|
||||
static bool
|
||||
HnswAddDuplicateIfFound(Relation index, HnswElement element, bool building)
|
||||
HnswFindDuplicate(Relation index, HnswElement element, bool building)
|
||||
{
|
||||
HnswNeighborArray *neighbors = &element->neighbors[0];
|
||||
|
||||
@@ -519,12 +519,11 @@ HnswAddDuplicateIfFound(Relation index, HnswElement element, bool building)
|
||||
{
|
||||
HnswCandidate *neighbor = &neighbors->items[i];
|
||||
|
||||
/* Exit early if not duplicate since ordered by distance */
|
||||
/* Exit early since ordered by distance */
|
||||
if (!datumIsEqual(element->value, neighbor->element->value, false, -1))
|
||||
return false;
|
||||
|
||||
/* If adding fails, continue to next duplicate element */
|
||||
if (HnswAddDuplicateToPage(index, element, neighbor->element, building))
|
||||
if (HnswAddDuplicate(index, element, neighbor->element, building))
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -539,8 +538,8 @@ WriteElement(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement elem
|
||||
{
|
||||
BlockNumber newInsertPage = InvalidBlockNumber;
|
||||
|
||||
/* Try to add to existing page */
|
||||
if (HnswAddDuplicateIfFound(index, element, building))
|
||||
/* Look for duplicate */
|
||||
if (HnswFindDuplicate(index, element, building))
|
||||
return;
|
||||
|
||||
/* Write element and neighbor tuples */
|
||||
@@ -553,7 +552,7 @@ WriteElement(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement elem
|
||||
/* Update neighbors */
|
||||
HnswUpdateNeighborPages(index, procinfo, collation, element, m, false, building);
|
||||
|
||||
/* Update metapage if needed */
|
||||
/* Update entry point if needed */
|
||||
if (entryPoint == NULL || element->level > entryPoint->level)
|
||||
HnswUpdateMetaPage(index, HNSW_UPDATE_ENTRY_GREATER, element, InvalidBlockNumber, MAIN_FORKNUM, building);
|
||||
}
|
||||
|
||||
@@ -908,30 +908,6 @@ SelectNeighbors(List *c, int m, int lc, FmgrInfo *procinfo, Oid collation, HnswE
|
||||
return r;
|
||||
}
|
||||
|
||||
/*
|
||||
* Find duplicate element
|
||||
*/
|
||||
HnswElement
|
||||
HnswFindDuplicate(HnswElement e)
|
||||
{
|
||||
HnswNeighborArray *neighbors = &e->neighbors[0];
|
||||
|
||||
for (int i = 0; i < neighbors->length; i++)
|
||||
{
|
||||
HnswCandidate *neighbor = &neighbors->items[i];
|
||||
|
||||
/* Exit early since ordered by distance */
|
||||
if (!datumIsEqual(e->value, neighbor->element->value, false, -1))
|
||||
break;
|
||||
|
||||
/* Check for space */
|
||||
if (neighbor->element->heaptidsLength < HNSW_HEAPTIDS)
|
||||
return neighbor->element;
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Add connections
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user