From c81302b835e8b41daa02c0acfc244c9dbb1de0c2 Mon Sep 17 00:00:00 2001 From: Heikki Linnakangas Date: Fri, 6 Oct 2023 02:04:50 +0300 Subject: [PATCH] Improve HNSW index build performance more (#295) This takes the approach from commit a713e2acaa further. Once we have remove a candidate from the "closer" set, we still don't need to recalculate everything that follows. Any candidates that were in the closer set before still only need to be compared with any new candidates that we have added. --- src/hnswutils.c | 35 +++++++++++++++++++++++++---------- 1 file changed, 25 insertions(+), 10 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index 31e9dd8..2c7378f 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -755,7 +755,8 @@ SelectNeighbors(List *c, int m, int lc, FmgrInfo *procinfo, Oid collation, HnswE List *w = list_copy(c); pairingheap *wd; bool mustCalculate = !e2->neighbors[lc].closerSet; - bool foundNew = false; + List *added = NIL; + bool removedAny = false; if (list_length(w) <= m) return w; @@ -772,24 +773,38 @@ SelectNeighbors(List *c, int m, int lc, FmgrInfo *procinfo, Oid collation, HnswE /* Use previous state of r and wd to skip work when possible */ if (mustCalculate) e->closer = CheckElementCloser(e, r, lc, procinfo, collation); - else if (foundNew) + else if (list_length(added) > 0) { - /* If new or current candidate is not closer, no change in state */ - if (newCandidate->closer && e->closer) + /* + * If the current candidate was closer, we only need to compare it + * with the other candidates that we have added. + */ + if (e->closer) { - /* Only need to compare with new candidate */ - float distance = HnswGetDistance(e->element, newCandidate->element, lc, procinfo, collation); - - e->closer = e->distance < distance; + e->closer = CheckElementCloser(e, added, lc, procinfo, collation); if (!e->closer) - mustCalculate = true; + removedAny = true; + } + else + { + /* + * If we have removed any candidates from closer, a candidate + * that was not closer earlier might now be. + */ + if (removedAny) + { + e->closer = CheckElementCloser(e, r, lc, procinfo, collation); + if (e->closer) + added = lappend(added, e); + } } } else if (e == newCandidate) { e->closer = CheckElementCloser(e, r, lc, procinfo, collation); - foundNew = true; + if (e->closer) + added = lappend(added, newCandidate); } if (e->closer)