Improved HNSW build and insert code

This commit is contained in:
Andrew Kane
2024-01-13 10:07:42 -08:00
parent cacd389f6d
commit cbf3eb4fa5
4 changed files with 62 additions and 67 deletions

View File

@@ -339,7 +339,6 @@ void HnswGetMetaPageInfo(Relation index, int *m, HnswElement * entryPoint);
HnswElement HnswInitElement(ItemPointer tid, int m, double ml, int maxLevel);
HnswElement HnswInitElementFromBlock(BlockNumber blkno, OffsetNumber offno);
void HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool existing);
HnswElement HnswFindDuplicate(HnswElement e);
HnswCandidate *HnswEntryCandidate(HnswElement em, Datum q, Relation rel, FmgrInfo *procinfo, Oid collation, bool loadVec);
void HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber insertPage, ForkNumber forkNum, bool building);
void HnswSetNeighborTuple(HnswNeighborTuple ntup, HnswElement e, int m);

View File

@@ -316,6 +316,33 @@ HnswElementMemory(HnswElement e, int m)
}
#endif
/*
* Find duplicate element
*/
static bool
HnswFindDuplicateInMemory(HnswElement element)
{
HnswNeighborArray *neighbors = &element->neighbors[0];
for (int i = 0; i < neighbors->length; i++)
{
HnswCandidate *neighbor = &neighbors->items[i];
/* Exit early since ordered by distance */
if (!datumIsEqual(element->value, neighbor->element->value, false, -1))
return false;
/* Check for space */
if (neighbor->element->heaptidsLength < HNSW_HEAPTIDS)
{
HnswAddHeapTid(neighbor->element, &element->heaptids[0]);
return true;
}
}
return false;
}
/*
* Insert tuple into in-memory graph
*/
@@ -330,7 +357,6 @@ InsertTupleInMemory(Relation index, Datum *values, ItemPointer heaptid, HnswBuil
int m = buildstate->m;
MemoryContext oldCtx;
HnswElement element;
HnswElement dup;
/* Detoast once for all calls */
Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
@@ -348,38 +374,6 @@ InsertTupleInMemory(Relation index, Datum *values, ItemPointer heaptid, HnswBuil
element->value = datumCopy(value, false, -1);
MemoryContextSwitchTo(oldCtx);
/* Insert element in graph */
HnswInsertElement(element, entryPoint, NULL, procinfo, collation, m, efConstruction, false);
/* Look for duplicate */
dup = HnswFindDuplicate(element);
if (dup == NULL)
{
/* Add element */
slist_push_head(&graph->elements, &element->next);
/* Update neighbors */
for (int lc = element->level; lc >= 0; lc--)
{
int lm = HnswGetLayerM(m, lc);
HnswNeighborArray *neighbors = &element->neighbors[lc];
for (int i = 0; i < neighbors->length; i++)
HnswUpdateConnection(element, &neighbors->items[i], lm, lc, NULL, NULL, procinfo, collation);
}
/* Update entry point if needed */
if (entryPoint == NULL || element->level > entryPoint->level)
graph->entryPoint = element;
}
else
{
/* No need to free element since memory unlikely to be reallocated */
/* Element is also used to estimate memory usage below */
HnswAddHeapTid(dup, heaptid);
}
/* Update memory usage */
#if PG_VERSION_NUM >= 130000
graph->memoryUsed = MemoryContextMemAllocated(buildstate->graphCtx, false);
@@ -387,6 +381,33 @@ InsertTupleInMemory(Relation index, Datum *values, ItemPointer heaptid, HnswBuil
graph->memoryUsed += HnswElementMemory(element, buildstate->m);
#endif
/* Insert element in graph */
HnswInsertElement(element, entryPoint, NULL, procinfo, collation, m, efConstruction, false);
/* Look for duplicate */
if (HnswFindDuplicateInMemory(element))
{
/* No need to free element since memory unlikely to be reallocated */
return true;
}
/* Add element */
slist_push_head(&graph->elements, &element->next);
/* Update neighbors */
for (int lc = element->level; lc >= 0; lc--)
{
int lm = HnswGetLayerM(m, lc);
HnswNeighborArray *neighbors = &element->neighbors[lc];
for (int i = 0; i < neighbors->length; i++)
HnswUpdateConnection(element, &neighbors->items[i], lm, lc, NULL, NULL, procinfo, collation);
}
/* Update entry point if needed */
if (entryPoint == NULL || element->level > entryPoint->level)
graph->entryPoint = element;
return true;
}

View File

@@ -447,7 +447,7 @@ HnswUpdateNeighborPages(Relation index, FmgrInfo *procinfo, Oid collation, HnswE
* Add a heap TID to an existing element
*/
static bool
HnswAddDuplicateToPage(Relation index, HnswElement element, HnswElement dup, bool building)
HnswAddDuplicate(Relation index, HnswElement element, HnswElement dup, bool building)
{
Buffer buf;
Page page;
@@ -508,10 +508,10 @@ HnswAddDuplicateToPage(Relation index, HnswElement element, HnswElement dup, boo
}
/*
* Add duplicate if found
* Find duplicate element
*/
static bool
HnswAddDuplicateIfFound(Relation index, HnswElement element, bool building)
HnswFindDuplicate(Relation index, HnswElement element, bool building)
{
HnswNeighborArray *neighbors = &element->neighbors[0];
@@ -519,12 +519,11 @@ HnswAddDuplicateIfFound(Relation index, HnswElement element, bool building)
{
HnswCandidate *neighbor = &neighbors->items[i];
/* Exit early if not duplicate since ordered by distance */
/* Exit early since ordered by distance */
if (!datumIsEqual(element->value, neighbor->element->value, false, -1))
return false;
/* If adding fails, continue to next duplicate element */
if (HnswAddDuplicateToPage(index, element, neighbor->element, building))
if (HnswAddDuplicate(index, element, neighbor->element, building))
return true;
}
@@ -539,8 +538,8 @@ WriteElement(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement elem
{
BlockNumber newInsertPage = InvalidBlockNumber;
/* Try to add to existing page */
if (HnswAddDuplicateIfFound(index, element, building))
/* Look for duplicate */
if (HnswFindDuplicate(index, element, building))
return;
/* Write element and neighbor tuples */
@@ -553,7 +552,7 @@ WriteElement(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement elem
/* Update neighbors */
HnswUpdateNeighborPages(index, procinfo, collation, element, m, false, building);
/* Update metapage if needed */
/* Update entry point if needed */
if (entryPoint == NULL || element->level > entryPoint->level)
HnswUpdateMetaPage(index, HNSW_UPDATE_ENTRY_GREATER, element, InvalidBlockNumber, MAIN_FORKNUM, building);
}

View File

@@ -908,30 +908,6 @@ SelectNeighbors(List *c, int m, int lc, FmgrInfo *procinfo, Oid collation, HnswE
return r;
}
/*
* Find duplicate element
*/
HnswElement
HnswFindDuplicate(HnswElement e)
{
HnswNeighborArray *neighbors = &e->neighbors[0];
for (int i = 0; i < neighbors->length; i++)
{
HnswCandidate *neighbor = &neighbors->items[i];
/* Exit early since ordered by distance */
if (!datumIsEqual(e->value, neighbor->element->value, false, -1))
break;
/* Check for space */
if (neighbor->element->heaptidsLength < HNSW_HEAPTIDS)
return neighbor->element;
}
return NULL;
}
/*
* Add connections
*/