Improved construction code

This commit is contained in:
Andrew Kane
2023-08-10 08:38:31 -07:00
parent df68eb4570
commit 6f15dd266c
5 changed files with 32 additions and 39 deletions

View File

@@ -258,7 +258,7 @@ List *HnswSearchLayer(Datum q, List *ep, int ef, int lc, Relation index, Fmgr
HnswElement HnswGetEntryPoint(Relation index);
HnswElement HnswInitElement(ItemPointer tid, int m, double ml, int maxLevel);
void HnswFreeElement(HnswElement element);
HnswElement HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, List ***updateNeighbors, bool vacuuming);
HnswElement HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool vacuuming);
HnswCandidate *HnswEntryCandidate(HnswElement em, Datum q, Relation rel, FmgrInfo *procinfo, Oid collation, bool loadvec);
void HnswUpdateMetaPage(Relation index, bool updateEntry, HnswElement entryPoint, BlockNumber insertPage, ForkNumber forkNum);
void HnswSetNeighborTuple(HnswNeighborTuple ntup, HnswElement e, int m);

View File

@@ -288,7 +288,17 @@ InsertTuple(Relation index, Datum *values, HnswElement element, HnswBuildState *
memcpy(element->vec, DatumGetVector(value), VECTOR_SIZE(buildstate->dimensions));
/* Insert element in graph */
*dup = HnswInsertElement(element, entryPoint, NULL, procinfo, collation, m, efConstruction, NULL, false);
*dup = HnswInsertElement(element, entryPoint, NULL, procinfo, collation, m, efConstruction, false);
/* Update neighbors */
for (int lc = element->level; lc >= 0; lc--)
{
int lm = HnswGetLayerM(m, lc);
HnswNeighborArray *neighbors = &element->neighbors[lc];
for (int i = 0; i < neighbors->length; i++)
HnswUpdateConnection(element, &neighbors->items[i], lm, lc, NULL, NULL, procinfo, collation);
}
/* Update entry point if needed */
if (*dup == NULL && (entryPoint == NULL || element->level > entryPoint->level))

View File

@@ -273,17 +273,16 @@ WriteNewElementPages(Relation index, HnswElement e, int m)
* Update neighbors
*/
static void
UpdateNeighborPages(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, List **neighbors)
UpdateNeighborPages(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m)
{
for (int lc = e->level; lc >= 0; lc--)
{
int lm = HnswGetLayerM(m, lc);
List *levelNeighbors = neighbors[lc];
ListCell *lc2;
HnswNeighborArray *neighbors = &e->neighbors[lc];
foreach(lc2, levelNeighbors)
for (int i = 0; i < neighbors->length; i++)
{
HnswCandidate *hc = lfirst(lc2);
HnswCandidate *hc = &neighbors->items[i];
Buffer buf;
Page page;
GenericXLogState *state;
@@ -393,7 +392,7 @@ HnswAddDuplicate(Relation index, HnswElement element, HnswElement dup)
* Write changes to disk
*/
static void
WriteElement(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement element, int m, List **neighbors, HnswElement dup, HnswElement entryPoint)
WriteElement(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement element, int m, HnswElement dup, HnswElement entryPoint)
{
/* Try to add to existing page */
if (dup != NULL)
@@ -404,7 +403,7 @@ WriteElement(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement elem
/* If fails, take this path */
WriteNewElementPages(index, element, m);
UpdateNeighborPages(index, procinfo, collation, element, m, neighbors);
UpdateNeighborPages(index, procinfo, collation, element, m);
/* Update metapage if needed */
if (entryPoint == NULL || element->level > entryPoint->level)
@@ -426,7 +425,6 @@ HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_ti
double ml = HnswGetMl(m);
FmgrInfo *procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC);
Oid collation = index->rd_indcollation[0];
List **neighbors;
HnswElement dup;
/* Detoast once for all calls */
@@ -448,10 +446,10 @@ HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_ti
entryPoint = HnswGetEntryPoint(index);
/* Insert element in graph */
dup = HnswInsertElement(element, entryPoint, index, procinfo, collation, m, efConstruction, &neighbors, false);
dup = HnswInsertElement(element, entryPoint, index, procinfo, collation, m, efConstruction, false);
/* Write to disk */
WriteElement(index, procinfo, collation, element, m, neighbors, dup, entryPoint);
WriteElement(index, procinfo, collation, element, m, dup, entryPoint);
return true;
}

View File

@@ -744,13 +744,13 @@ SelectNeighbors(List *c, int m, int lc, FmgrInfo *procinfo, Oid collation, HnswC
* Find duplicate element
*/
static HnswElement
HnswFindDuplicate(HnswElement e, List *neighbors)
HnswFindDuplicate(HnswElement e)
{
ListCell *lc;
HnswNeighborArray *neighbors = &e->neighbors[0];
foreach(lc, neighbors)
for (int i = 0; i < neighbors->length; i++)
{
HnswCandidate *neighbor = lfirst(lc);
HnswCandidate *neighbor = &neighbors->items[i];
/* Exit early since ordered by distance */
if (vector_cmp_internal(e->vec, neighbor->element->vec) != 0)
@@ -885,13 +885,12 @@ HnswUpdateConnection(HnswElement element, HnswCandidate * hc, int m, int lc, int
* Algorithm 1 from paper
*/
HnswElement
HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, List ***updateNeighbors, bool vacuuming)
HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool vacuuming)
{
List *ep = NIL;
List *w;
int level = element->level;
int entryLevel;
List **neighbors = palloc(sizeof(List *) * (level + 1));
Datum q = PointerGetDatum(element->vec);
HnswElement dup;
BlockNumber *skipPage = vacuuming ? &element->neighborPage : NULL;
@@ -920,16 +919,14 @@ HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, F
ep = w;
}
while (level > entryLevel)
{
neighbors[level] = NIL;
level--;
}
if (level > entryLevel)
level = entryLevel;
/* 2nd phase */
for (int lc = level; lc >= 0; lc--)
{
int lm = HnswGetLayerM(m, lc);
List *neighbors;
w = HnswSearchLayer(q, ep, efConstruction, lc, index, procinfo, collation, true, skipPage, skipOffno);
@@ -937,30 +934,18 @@ HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, F
if (removeEntryPoint)
w = list_delete_ptr(w, entryCandidate);
neighbors[lc] = SelectNeighbors(w, lm, lc, procinfo, collation, NULL);
neighbors = SelectNeighbors(w, lm, lc, procinfo, collation, NULL);
AddConnections(element, neighbors[lc], lm, lc);
/* Update connections */
if (!vacuuming && updateNeighbors == NULL)
{
ListCell *lc2;
foreach(lc2, neighbors[lc])
HnswUpdateConnection(element, lfirst(lc2), lm, lc, NULL, index, procinfo, collation);
}
AddConnections(element, neighbors, lm, lc);
ep = w;
}
if (updateNeighbors != NULL)
*updateNeighbors = neighbors;
/* Look for duplicates */
/* This must come last, since duplicate updates can fail */
if (level >= 0 && !vacuuming)
{
dup = HnswFindDuplicate(element, neighbors[0]);
dup = HnswFindDuplicate(element);
if (dup != NULL)
return dup;
}

View File

@@ -223,7 +223,7 @@ RepairGraphElement(HnswVacuumState * vacuumstate, HnswElement element)
element->heaptids = NIL;
/* Add element to graph, skipping itself */
HnswInsertElement(element, entryPoint, index, procinfo, collation, m, efConstruction, NULL, true);
HnswInsertElement(element, entryPoint, index, procinfo, collation, m, efConstruction, true);
/* Update neighbor tuple */
/* Do this before getting page to minimize locking */