From 6f15dd266c5a37fac75ca13e32b72a2a139b6b81 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 10 Aug 2023 08:38:31 -0700 Subject: [PATCH] Improved construction code --- src/hnsw.h | 2 +- src/hnswbuild.c | 12 +++++++++++- src/hnswinsert.c | 18 ++++++++---------- src/hnswutils.c | 37 +++++++++++-------------------------- src/hnswvacuum.c | 2 +- 5 files changed, 32 insertions(+), 39 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index 14cef7c..e54f9cb 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -258,7 +258,7 @@ List *HnswSearchLayer(Datum q, List *ep, int ef, int lc, Relation index, Fmgr HnswElement HnswGetEntryPoint(Relation index); HnswElement HnswInitElement(ItemPointer tid, int m, double ml, int maxLevel); void HnswFreeElement(HnswElement element); -HnswElement HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, List ***updateNeighbors, bool vacuuming); +HnswElement HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool vacuuming); HnswCandidate *HnswEntryCandidate(HnswElement em, Datum q, Relation rel, FmgrInfo *procinfo, Oid collation, bool loadvec); void HnswUpdateMetaPage(Relation index, bool updateEntry, HnswElement entryPoint, BlockNumber insertPage, ForkNumber forkNum); void HnswSetNeighborTuple(HnswNeighborTuple ntup, HnswElement e, int m); diff --git a/src/hnswbuild.c b/src/hnswbuild.c index 2341cdb..1073cc8 100644 --- a/src/hnswbuild.c +++ b/src/hnswbuild.c @@ -288,7 +288,17 @@ InsertTuple(Relation index, Datum *values, HnswElement element, HnswBuildState * memcpy(element->vec, DatumGetVector(value), VECTOR_SIZE(buildstate->dimensions)); /* Insert element in graph */ - *dup = HnswInsertElement(element, entryPoint, NULL, procinfo, collation, m, efConstruction, NULL, false); + *dup = HnswInsertElement(element, entryPoint, NULL, procinfo, collation, m, efConstruction, false); + + /* Update neighbors */ + for (int lc = element->level; lc >= 0; lc--) + { + int lm = HnswGetLayerM(m, lc); + HnswNeighborArray *neighbors = &element->neighbors[lc]; + + for (int i = 0; i < neighbors->length; i++) + HnswUpdateConnection(element, &neighbors->items[i], lm, lc, NULL, NULL, procinfo, collation); + } /* Update entry point if needed */ if (*dup == NULL && (entryPoint == NULL || element->level > entryPoint->level)) diff --git a/src/hnswinsert.c b/src/hnswinsert.c index 8a71ff4..4e8af35 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -273,17 +273,16 @@ WriteNewElementPages(Relation index, HnswElement e, int m) * Update neighbors */ static void -UpdateNeighborPages(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, List **neighbors) +UpdateNeighborPages(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m) { for (int lc = e->level; lc >= 0; lc--) { int lm = HnswGetLayerM(m, lc); - List *levelNeighbors = neighbors[lc]; - ListCell *lc2; + HnswNeighborArray *neighbors = &e->neighbors[lc]; - foreach(lc2, levelNeighbors) + for (int i = 0; i < neighbors->length; i++) { - HnswCandidate *hc = lfirst(lc2); + HnswCandidate *hc = &neighbors->items[i]; Buffer buf; Page page; GenericXLogState *state; @@ -393,7 +392,7 @@ HnswAddDuplicate(Relation index, HnswElement element, HnswElement dup) * Write changes to disk */ static void -WriteElement(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement element, int m, List **neighbors, HnswElement dup, HnswElement entryPoint) +WriteElement(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement element, int m, HnswElement dup, HnswElement entryPoint) { /* Try to add to existing page */ if (dup != NULL) @@ -404,7 +403,7 @@ WriteElement(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement elem /* If fails, take this path */ WriteNewElementPages(index, element, m); - UpdateNeighborPages(index, procinfo, collation, element, m, neighbors); + UpdateNeighborPages(index, procinfo, collation, element, m); /* Update metapage if needed */ if (entryPoint == NULL || element->level > entryPoint->level) @@ -426,7 +425,6 @@ HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_ti double ml = HnswGetMl(m); FmgrInfo *procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); Oid collation = index->rd_indcollation[0]; - List **neighbors; HnswElement dup; /* Detoast once for all calls */ @@ -448,10 +446,10 @@ HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_ti entryPoint = HnswGetEntryPoint(index); /* Insert element in graph */ - dup = HnswInsertElement(element, entryPoint, index, procinfo, collation, m, efConstruction, &neighbors, false); + dup = HnswInsertElement(element, entryPoint, index, procinfo, collation, m, efConstruction, false); /* Write to disk */ - WriteElement(index, procinfo, collation, element, m, neighbors, dup, entryPoint); + WriteElement(index, procinfo, collation, element, m, dup, entryPoint); return true; } diff --git a/src/hnswutils.c b/src/hnswutils.c index f4c114c..f1f3a55 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -744,13 +744,13 @@ SelectNeighbors(List *c, int m, int lc, FmgrInfo *procinfo, Oid collation, HnswC * Find duplicate element */ static HnswElement -HnswFindDuplicate(HnswElement e, List *neighbors) +HnswFindDuplicate(HnswElement e) { - ListCell *lc; + HnswNeighborArray *neighbors = &e->neighbors[0]; - foreach(lc, neighbors) + for (int i = 0; i < neighbors->length; i++) { - HnswCandidate *neighbor = lfirst(lc); + HnswCandidate *neighbor = &neighbors->items[i]; /* Exit early since ordered by distance */ if (vector_cmp_internal(e->vec, neighbor->element->vec) != 0) @@ -885,13 +885,12 @@ HnswUpdateConnection(HnswElement element, HnswCandidate * hc, int m, int lc, int * Algorithm 1 from paper */ HnswElement -HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, List ***updateNeighbors, bool vacuuming) +HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool vacuuming) { List *ep = NIL; List *w; int level = element->level; int entryLevel; - List **neighbors = palloc(sizeof(List *) * (level + 1)); Datum q = PointerGetDatum(element->vec); HnswElement dup; BlockNumber *skipPage = vacuuming ? &element->neighborPage : NULL; @@ -920,16 +919,14 @@ HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, F ep = w; } - while (level > entryLevel) - { - neighbors[level] = NIL; - level--; - } + if (level > entryLevel) + level = entryLevel; /* 2nd phase */ for (int lc = level; lc >= 0; lc--) { int lm = HnswGetLayerM(m, lc); + List *neighbors; w = HnswSearchLayer(q, ep, efConstruction, lc, index, procinfo, collation, true, skipPage, skipOffno); @@ -937,30 +934,18 @@ HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, F if (removeEntryPoint) w = list_delete_ptr(w, entryCandidate); - neighbors[lc] = SelectNeighbors(w, lm, lc, procinfo, collation, NULL); + neighbors = SelectNeighbors(w, lm, lc, procinfo, collation, NULL); - AddConnections(element, neighbors[lc], lm, lc); - - /* Update connections */ - if (!vacuuming && updateNeighbors == NULL) - { - ListCell *lc2; - - foreach(lc2, neighbors[lc]) - HnswUpdateConnection(element, lfirst(lc2), lm, lc, NULL, index, procinfo, collation); - } + AddConnections(element, neighbors, lm, lc); ep = w; } - if (updateNeighbors != NULL) - *updateNeighbors = neighbors; - /* Look for duplicates */ /* This must come last, since duplicate updates can fail */ if (level >= 0 && !vacuuming) { - dup = HnswFindDuplicate(element, neighbors[0]); + dup = HnswFindDuplicate(element); if (dup != NULL) return dup; } diff --git a/src/hnswvacuum.c b/src/hnswvacuum.c index c1c289d..2a472b5 100644 --- a/src/hnswvacuum.c +++ b/src/hnswvacuum.c @@ -223,7 +223,7 @@ RepairGraphElement(HnswVacuumState * vacuumstate, HnswElement element) element->heaptids = NIL; /* Add element to graph, skipping itself */ - HnswInsertElement(element, entryPoint, index, procinfo, collation, m, efConstruction, NULL, true); + HnswInsertElement(element, entryPoint, index, procinfo, collation, m, efConstruction, true); /* Update neighbor tuple */ /* Do this before getting page to minimize locking */