mirror of
https://github.com/pgvector/pgvector.git
synced 2026-07-04 19:50:59 +08:00
Improved HNSW build and insert code
This commit is contained in:
@@ -339,7 +339,6 @@ void HnswGetMetaPageInfo(Relation index, int *m, HnswElement * entryPoint);
|
|||||||
HnswElement HnswInitElement(ItemPointer tid, int m, double ml, int maxLevel);
|
HnswElement HnswInitElement(ItemPointer tid, int m, double ml, int maxLevel);
|
||||||
HnswElement HnswInitElementFromBlock(BlockNumber blkno, OffsetNumber offno);
|
HnswElement HnswInitElementFromBlock(BlockNumber blkno, OffsetNumber offno);
|
||||||
void HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool existing);
|
void HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool existing);
|
||||||
HnswElement HnswFindDuplicate(HnswElement e);
|
|
||||||
HnswCandidate *HnswEntryCandidate(HnswElement em, Datum q, Relation rel, FmgrInfo *procinfo, Oid collation, bool loadVec);
|
HnswCandidate *HnswEntryCandidate(HnswElement em, Datum q, Relation rel, FmgrInfo *procinfo, Oid collation, bool loadVec);
|
||||||
void HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber insertPage, ForkNumber forkNum, bool building);
|
void HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber insertPage, ForkNumber forkNum, bool building);
|
||||||
void HnswSetNeighborTuple(HnswNeighborTuple ntup, HnswElement e, int m);
|
void HnswSetNeighborTuple(HnswNeighborTuple ntup, HnswElement e, int m);
|
||||||
|
|||||||
@@ -316,6 +316,33 @@ HnswElementMemory(HnswElement e, int m)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find duplicate element
|
||||||
|
*/
|
||||||
|
static bool
|
||||||
|
HnswFindDuplicateInMemory(HnswElement element)
|
||||||
|
{
|
||||||
|
HnswNeighborArray *neighbors = &element->neighbors[0];
|
||||||
|
|
||||||
|
for (int i = 0; i < neighbors->length; i++)
|
||||||
|
{
|
||||||
|
HnswCandidate *neighbor = &neighbors->items[i];
|
||||||
|
|
||||||
|
/* Exit early since ordered by distance */
|
||||||
|
if (!datumIsEqual(element->value, neighbor->element->value, false, -1))
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/* Check for space */
|
||||||
|
if (neighbor->element->heaptidsLength < HNSW_HEAPTIDS)
|
||||||
|
{
|
||||||
|
HnswAddHeapTid(neighbor->element, &element->heaptids[0]);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Insert tuple into in-memory graph
|
* Insert tuple into in-memory graph
|
||||||
*/
|
*/
|
||||||
@@ -330,7 +357,6 @@ InsertTupleInMemory(Relation index, Datum *values, ItemPointer heaptid, HnswBuil
|
|||||||
int m = buildstate->m;
|
int m = buildstate->m;
|
||||||
MemoryContext oldCtx;
|
MemoryContext oldCtx;
|
||||||
HnswElement element;
|
HnswElement element;
|
||||||
HnswElement dup;
|
|
||||||
|
|
||||||
/* Detoast once for all calls */
|
/* Detoast once for all calls */
|
||||||
Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
|
Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
|
||||||
@@ -348,38 +374,6 @@ InsertTupleInMemory(Relation index, Datum *values, ItemPointer heaptid, HnswBuil
|
|||||||
element->value = datumCopy(value, false, -1);
|
element->value = datumCopy(value, false, -1);
|
||||||
MemoryContextSwitchTo(oldCtx);
|
MemoryContextSwitchTo(oldCtx);
|
||||||
|
|
||||||
/* Insert element in graph */
|
|
||||||
HnswInsertElement(element, entryPoint, NULL, procinfo, collation, m, efConstruction, false);
|
|
||||||
|
|
||||||
/* Look for duplicate */
|
|
||||||
dup = HnswFindDuplicate(element);
|
|
||||||
|
|
||||||
if (dup == NULL)
|
|
||||||
{
|
|
||||||
/* Add element */
|
|
||||||
slist_push_head(&graph->elements, &element->next);
|
|
||||||
|
|
||||||
/* Update neighbors */
|
|
||||||
for (int lc = element->level; lc >= 0; lc--)
|
|
||||||
{
|
|
||||||
int lm = HnswGetLayerM(m, lc);
|
|
||||||
HnswNeighborArray *neighbors = &element->neighbors[lc];
|
|
||||||
|
|
||||||
for (int i = 0; i < neighbors->length; i++)
|
|
||||||
HnswUpdateConnection(element, &neighbors->items[i], lm, lc, NULL, NULL, procinfo, collation);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Update entry point if needed */
|
|
||||||
if (entryPoint == NULL || element->level > entryPoint->level)
|
|
||||||
graph->entryPoint = element;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
/* No need to free element since memory unlikely to be reallocated */
|
|
||||||
/* Element is also used to estimate memory usage below */
|
|
||||||
HnswAddHeapTid(dup, heaptid);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Update memory usage */
|
/* Update memory usage */
|
||||||
#if PG_VERSION_NUM >= 130000
|
#if PG_VERSION_NUM >= 130000
|
||||||
graph->memoryUsed = MemoryContextMemAllocated(buildstate->graphCtx, false);
|
graph->memoryUsed = MemoryContextMemAllocated(buildstate->graphCtx, false);
|
||||||
@@ -387,6 +381,33 @@ InsertTupleInMemory(Relation index, Datum *values, ItemPointer heaptid, HnswBuil
|
|||||||
graph->memoryUsed += HnswElementMemory(element, buildstate->m);
|
graph->memoryUsed += HnswElementMemory(element, buildstate->m);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Insert element in graph */
|
||||||
|
HnswInsertElement(element, entryPoint, NULL, procinfo, collation, m, efConstruction, false);
|
||||||
|
|
||||||
|
/* Look for duplicate */
|
||||||
|
if (HnswFindDuplicateInMemory(element))
|
||||||
|
{
|
||||||
|
/* No need to free element since memory unlikely to be reallocated */
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add element */
|
||||||
|
slist_push_head(&graph->elements, &element->next);
|
||||||
|
|
||||||
|
/* Update neighbors */
|
||||||
|
for (int lc = element->level; lc >= 0; lc--)
|
||||||
|
{
|
||||||
|
int lm = HnswGetLayerM(m, lc);
|
||||||
|
HnswNeighborArray *neighbors = &element->neighbors[lc];
|
||||||
|
|
||||||
|
for (int i = 0; i < neighbors->length; i++)
|
||||||
|
HnswUpdateConnection(element, &neighbors->items[i], lm, lc, NULL, NULL, procinfo, collation);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Update entry point if needed */
|
||||||
|
if (entryPoint == NULL || element->level > entryPoint->level)
|
||||||
|
graph->entryPoint = element;
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -447,7 +447,7 @@ HnswUpdateNeighborPages(Relation index, FmgrInfo *procinfo, Oid collation, HnswE
|
|||||||
* Add a heap TID to an existing element
|
* Add a heap TID to an existing element
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
HnswAddDuplicateToPage(Relation index, HnswElement element, HnswElement dup, bool building)
|
HnswAddDuplicate(Relation index, HnswElement element, HnswElement dup, bool building)
|
||||||
{
|
{
|
||||||
Buffer buf;
|
Buffer buf;
|
||||||
Page page;
|
Page page;
|
||||||
@@ -508,10 +508,10 @@ HnswAddDuplicateToPage(Relation index, HnswElement element, HnswElement dup, boo
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add duplicate if found
|
* Find duplicate element
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
HnswAddDuplicateIfFound(Relation index, HnswElement element, bool building)
|
HnswFindDuplicate(Relation index, HnswElement element, bool building)
|
||||||
{
|
{
|
||||||
HnswNeighborArray *neighbors = &element->neighbors[0];
|
HnswNeighborArray *neighbors = &element->neighbors[0];
|
||||||
|
|
||||||
@@ -519,12 +519,11 @@ HnswAddDuplicateIfFound(Relation index, HnswElement element, bool building)
|
|||||||
{
|
{
|
||||||
HnswCandidate *neighbor = &neighbors->items[i];
|
HnswCandidate *neighbor = &neighbors->items[i];
|
||||||
|
|
||||||
/* Exit early if not duplicate since ordered by distance */
|
/* Exit early since ordered by distance */
|
||||||
if (!datumIsEqual(element->value, neighbor->element->value, false, -1))
|
if (!datumIsEqual(element->value, neighbor->element->value, false, -1))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
/* If adding fails, continue to next duplicate element */
|
if (HnswAddDuplicate(index, element, neighbor->element, building))
|
||||||
if (HnswAddDuplicateToPage(index, element, neighbor->element, building))
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -539,8 +538,8 @@ WriteElement(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement elem
|
|||||||
{
|
{
|
||||||
BlockNumber newInsertPage = InvalidBlockNumber;
|
BlockNumber newInsertPage = InvalidBlockNumber;
|
||||||
|
|
||||||
/* Try to add to existing page */
|
/* Look for duplicate */
|
||||||
if (HnswAddDuplicateIfFound(index, element, building))
|
if (HnswFindDuplicate(index, element, building))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
/* Write element and neighbor tuples */
|
/* Write element and neighbor tuples */
|
||||||
@@ -553,7 +552,7 @@ WriteElement(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement elem
|
|||||||
/* Update neighbors */
|
/* Update neighbors */
|
||||||
HnswUpdateNeighborPages(index, procinfo, collation, element, m, false, building);
|
HnswUpdateNeighborPages(index, procinfo, collation, element, m, false, building);
|
||||||
|
|
||||||
/* Update metapage if needed */
|
/* Update entry point if needed */
|
||||||
if (entryPoint == NULL || element->level > entryPoint->level)
|
if (entryPoint == NULL || element->level > entryPoint->level)
|
||||||
HnswUpdateMetaPage(index, HNSW_UPDATE_ENTRY_GREATER, element, InvalidBlockNumber, MAIN_FORKNUM, building);
|
HnswUpdateMetaPage(index, HNSW_UPDATE_ENTRY_GREATER, element, InvalidBlockNumber, MAIN_FORKNUM, building);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -908,30 +908,6 @@ SelectNeighbors(List *c, int m, int lc, FmgrInfo *procinfo, Oid collation, HnswE
|
|||||||
return r;
|
return r;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Find duplicate element
|
|
||||||
*/
|
|
||||||
HnswElement
|
|
||||||
HnswFindDuplicate(HnswElement e)
|
|
||||||
{
|
|
||||||
HnswNeighborArray *neighbors = &e->neighbors[0];
|
|
||||||
|
|
||||||
for (int i = 0; i < neighbors->length; i++)
|
|
||||||
{
|
|
||||||
HnswCandidate *neighbor = &neighbors->items[i];
|
|
||||||
|
|
||||||
/* Exit early since ordered by distance */
|
|
||||||
if (!datumIsEqual(e->value, neighbor->element->value, false, -1))
|
|
||||||
break;
|
|
||||||
|
|
||||||
/* Check for space */
|
|
||||||
if (neighbor->element->heaptidsLength < HNSW_HEAPTIDS)
|
|
||||||
return neighbor->element;
|
|
||||||
}
|
|
||||||
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Add connections
|
* Add connections
|
||||||
*/
|
*/
|
||||||
|
|||||||
Reference in New Issue
Block a user