From 97ac01773da55e4d1ff476333c17350118718ecd Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 4 Oct 2023 19:43:25 -0700 Subject: [PATCH] Speed up HNSW index build --- src/hnsw.h | 2 ++ src/hnswutils.c | 25 +++++++++++++++++++------ 2 files changed, 21 insertions(+), 6 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index cfba560..6c9809d 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -110,12 +110,14 @@ typedef struct HnswCandidate { HnswElement element; float distance; + bool closer; } HnswCandidate; typedef struct HnswNeighborArray { int length; HnswCandidate *items; + bool closerSet; } HnswNeighborArray; typedef struct HnswPairingHeapNode diff --git a/src/hnswutils.c b/src/hnswutils.c index 72cf94e..39a3205 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -139,6 +139,7 @@ HnswInitNeighbors(HnswElement element, int m) a = &element->neighbors[lc]; a->length = 0; a->items = palloc(sizeof(HnswCandidate) * lm); + a->closerSet = false; } } @@ -748,11 +749,12 @@ CheckElementCloser(HnswCandidate * e, List *r, int lc, FmgrInfo *procinfo, Oid c * Algorithm 4 from paper */ static List * -SelectNeighbors(List *c, int m, int lc, FmgrInfo *procinfo, Oid collation, HnswCandidate * *pruned) +SelectNeighbors(List *c, int m, int lc, FmgrInfo *procinfo, Oid collation, HnswElement e2, HnswCandidate * newCandidate, HnswCandidate * *pruned) { List *r = NIL; List *w = list_copy(c); pairingheap *wd; + bool mustCalculate = !e2->neighbors[lc].closerSet; if (list_length(w) <= m) return w; @@ -763,18 +765,29 @@ SelectNeighbors(List *c, int m, int lc, FmgrInfo *procinfo, Oid collation, HnswC { /* Assumes w is already ordered desc */ HnswCandidate *e = llast(w); - bool closer; w = list_delete_last(w); - closer = CheckElementCloser(e, r, lc, procinfo, collation); + /* + * r and wd will be the same as previous calls until the new + * candidate, so can skip distance calculations + */ + if (mustCalculate) + e->closer = CheckElementCloser(e, r, lc, procinfo, collation); + else if (e == newCandidate) + { + e->closer = CheckElementCloser(e, r, lc, procinfo, collation); + mustCalculate = true; + } - if (closer) + if (e->closer) r = lappend(r, e); else pairingheap_add(wd, &(CreatePairingHeapNode(e)->ph_node)); } + e2->neighbors[lc].closerSet = true; + /* Keep pruned connections */ while (!pairingheap_is_empty(wd) && list_length(r) < m) r = lappend(r, ((HnswPairingHeapNode *) pairingheap_remove_first(wd))->inner); @@ -909,7 +922,7 @@ HnswUpdateConnection(HnswElement element, HnswCandidate * hc, int m, int lc, int c = lappend(c, &hc2); list_sort(c, CompareCandidateDistances); - SelectNeighbors(c, m, lc, procinfo, collation, &pruned); + SelectNeighbors(c, m, lc, procinfo, collation, hc->element, &hc2, &pruned); /* Should not happen */ if (pruned == NULL) @@ -1008,7 +1021,7 @@ HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, F else lw = w; - neighbors = SelectNeighbors(lw, lm, lc, procinfo, collation, NULL); + neighbors = SelectNeighbors(lw, lm, lc, procinfo, collation, element, NULL, NULL); AddConnections(element, neighbors, lm, lc);