From d74d3065bc67c97419c3ca3f016cfa3fa16b1565 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 19 Sep 2024 01:59:46 -0700 Subject: [PATCH 1/8] Reduced allocations for pairing heap --- src/hnsw.h | 3 ++- src/hnswutils.c | 31 +++++++++++++++++++------------ 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index 480ad9f..5ef4cbd 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -162,8 +162,9 @@ struct HnswNeighborArray typedef struct HnswPairingHeapNode { - pairingheap_node ph_node; HnswCandidate *inner; + pairingheap_node c_node; + pairingheap_node w_node; } HnswPairingHeapNode; /* HNSW index options */ diff --git a/src/hnswutils.c b/src/hnswutils.c index 96c5026..8f7a783 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -607,16 +607,19 @@ HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, return hc; } +#define HnswGetPairingHeapCandidate(membername, ptr) (pairingheap_container(HnswPairingHeapNode, membername, ptr)->inner) +#define HnswGetPairingHeapCandidateConst(membername, ptr) (pairingheap_const_container(HnswPairingHeapNode, membername, ptr)->inner) + /* * Compare candidate distances */ static int CompareNearestCandidates(const pairingheap_node *a, const pairingheap_node *b, void *arg) { - if (((const HnswPairingHeapNode *) a)->inner->distance < ((const HnswPairingHeapNode *) b)->inner->distance) + if (HnswGetPairingHeapCandidateConst(c_node, a)->distance < HnswGetPairingHeapCandidateConst(c_node, b)->distance) return 1; - if (((const HnswPairingHeapNode *) a)->inner->distance > ((const HnswPairingHeapNode *) b)->inner->distance) + if (HnswGetPairingHeapCandidateConst(c_node, a)->distance > HnswGetPairingHeapCandidateConst(c_node, b)->distance) return -1; return 0; @@ -628,10 +631,10 @@ CompareNearestCandidates(const pairingheap_node *a, const pairingheap_node *b, v static int CompareFurthestCandidates(const pairingheap_node *a, const pairingheap_node *b, void *arg) { - if (((const HnswPairingHeapNode *) a)->inner->distance < ((const HnswPairingHeapNode *) b)->inner->distance) + if (HnswGetPairingHeapCandidateConst(w_node, a)->distance < HnswGetPairingHeapCandidateConst(w_node, b)->distance) return -1; - if (((const HnswPairingHeapNode *) a)->inner->distance > ((const HnswPairingHeapNode *) b)->inner->distance) + if (HnswGetPairingHeapCandidateConst(w_node, a)->distance > HnswGetPairingHeapCandidateConst(w_node, b)->distance) return 1; return 0; @@ -746,11 +749,13 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F { HnswCandidate *hc = (HnswCandidate *) lfirst(lc2); bool found; + HnswPairingHeapNode *node; AddToVisited(base, &v, hc, index, &found); - pairingheap_add(C, &(CreatePairingHeapNode(hc)->ph_node)); - pairingheap_add(W, &(CreatePairingHeapNode(hc)->ph_node)); + node = CreatePairingHeapNode(hc); + pairingheap_add(C, &node->c_node); + pairingheap_add(W, &node->w_node); /* * Do not count elements being deleted towards ef when vacuuming. It @@ -764,8 +769,8 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F while (!pairingheap_is_empty(C)) { HnswNeighborArray *neighborhood; - HnswCandidate *c = ((HnswPairingHeapNode *) pairingheap_remove_first(C))->inner; - HnswCandidate *f = ((HnswPairingHeapNode *) pairingheap_first(W))->inner; + HnswCandidate *c = HnswGetPairingHeapCandidate(c_node, pairingheap_remove_first(C)); + HnswCandidate *f = HnswGetPairingHeapCandidate(w_node, pairingheap_first(W)); HnswElement cElement; if (c->distance > f->distance) @@ -801,7 +806,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F HnswElement eElement = HnswPtrAccess(base, e->element); bool alwaysAdd = wlen < ef; - f = ((HnswPairingHeapNode *) pairingheap_first(W))->inner; + f = HnswGetPairingHeapCandidate(w_node, pairingheap_first(W)); if (index == NULL) eDistance = GetCandidateDistance(base, e, q, procinfo, collation); @@ -811,6 +816,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F if (eDistance < f->distance || alwaysAdd) { HnswCandidate *ec; + HnswPairingHeapNode *node; Assert(!eElement->deleted); @@ -823,8 +829,9 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F HnswPtrStore(base, ec->element, eElement); ec->distance = eDistance; - pairingheap_add(C, &(CreatePairingHeapNode(ec)->ph_node)); - pairingheap_add(W, &(CreatePairingHeapNode(ec)->ph_node)); + node = CreatePairingHeapNode(ec); + pairingheap_add(C, &node->c_node); + pairingheap_add(W, &node->w_node); /* * Do not count elements being deleted towards ef when @@ -847,7 +854,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F /* Add each element of W to w */ while (!pairingheap_is_empty(W)) { - HnswCandidate *hc = ((HnswPairingHeapNode *) pairingheap_remove_first(W))->inner; + HnswCandidate *hc = HnswGetPairingHeapCandidate(w_node, pairingheap_remove_first(W)); w = lappend(w, hc); } From 8dde14a7362e99a4f6b31fc355b96bfa0de5c3b4 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 19 Sep 2024 02:17:51 -0700 Subject: [PATCH 2/8] Reduced memory usage for HNSW index scans Co-authored-by: Heikki Linnakangas --- src/hnswutils.c | 190 ++++++++++++++++++++++++++++++------------------ 1 file changed, 120 insertions(+), 70 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index 8f7a783..b12709c 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -580,13 +580,12 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, } /* - * Get the distance for a candidate + * Get the distance for an element */ static float -GetCandidateDistance(char *base, HnswCandidate * hc, Datum q, FmgrInfo *procinfo, Oid collation) +GetElementDistance(char *base, HnswElement element, Datum q, FmgrInfo *procinfo, Oid collation) { - HnswElement hce = HnswPtrAccess(base, hc->element); - Datum value = HnswGetValue(base, hce); + Datum value = HnswGetValue(base, element); return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, q, value)); } @@ -601,7 +600,7 @@ HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, HnswPtrStore(base, hc->element, entryPoint); if (index == NULL) - hc->distance = GetCandidateDistance(base, hc, q, procinfo, collation); + hc->distance = GetElementDistance(base, entryPoint, q, procinfo, collation); else HnswLoadElement(entryPoint, &hc->distance, &q, index, procinfo, collation, loadVec, NULL); return hc; @@ -706,20 +705,87 @@ AddToVisited(char *base, visited_hash * v, HnswCandidate * hc, Relation index, b * Count element towards ef */ static inline bool -CountElement(char *base, HnswElement skipElement, HnswCandidate * hc) +CountElement(char *base, HnswElement skipElement, HnswElement e) { - HnswElement e; - if (skipElement == NULL) return true; /* Ensure does not access heaptidsLength during in-memory build */ pg_memory_barrier(); - e = HnswPtrAccess(base, hc->element); return e->heaptidsLength != 0; } +/* + * Load unvisited neighbors from memory + */ +static void +HnswLoadUnvisitedFromMemory(char *base, HnswElement element, HnswElement * unvisited, int *unvisitedLength, visited_hash * v, int lc, HnswNeighborArray * neighborhoodData, Size neighborhoodSize) +{ + /* Get the neighborhood at layer lc */ + HnswNeighborArray *neighborhood = HnswGetNeighbors(base, element, lc); + + /* Copy neighborhood to local memory */ + LWLockAcquire(&element->lock, LW_SHARED); + memcpy(neighborhoodData, neighborhood, neighborhoodSize); + LWLockRelease(&element->lock); + neighborhood = neighborhoodData; + + *unvisitedLength = 0; + + for (int i = 0; i < neighborhood->length; i++) + { + HnswCandidate *hc = &neighborhood->items[i]; + bool found; + + AddToVisited(base, v, hc, NULL, &found); + + if (!found) + unvisited[(*unvisitedLength)++] = HnswPtrAccess(base, hc->element); + } +} + +/* + * Load unvisited neighbors from disk + */ +static void +HnswLoadUnvisitedFromDisk(HnswElement element, HnswElement * unvisited, int *unvisitedLength, visited_hash * v, Relation index, int m, int lm, int lc) +{ + Buffer buf; + Page page; + HnswNeighborTuple ntup; + int start; + ItemPointerData indextids[HNSW_MAX_M * 2]; + + buf = ReadBuffer(index, element->neighborPage); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + + ntup = (HnswNeighborTuple) PageGetItem(page, PageGetItemId(page, element->neighborOffno)); + start = (element->level - lc) * m; + + /* Copy to minimize lock time */ + memcpy(&indextids, ntup->indextids + start, lm * sizeof(ItemPointerData)); + + UnlockReleaseBuffer(buf); + + *unvisitedLength = 0; + + for (int i = 0; i < lm; i++) + { + ItemPointer indextid = &indextids[i]; + bool found; + + if (!ItemPointerIsValid(indextid)) + break; + + tidhash_insert(v->tids, *indextid, &found); + + if (!found) + unvisited[(*unvisitedLength)++] = HnswInitElementFromBlock(ItemPointerGetBlockNumber(indextid), ItemPointerGetOffsetNumber(indextid)); + } +} + /* * Algorithm 2 from paper */ @@ -734,13 +800,16 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F ListCell *lc2; HnswNeighborArray *neighborhoodData = NULL; Size neighborhoodSize = 0; + int lm = HnswGetLayerM(m, lc); + HnswElement *unvisited = palloc(lm * sizeof(HnswElement)); + int unvisitedLength; InitVisited(base, &v, index, ef, m); /* Create local memory for neighborhood if needed */ if (index == NULL) { - neighborhoodSize = HNSW_NEIGHBOR_ARRAY_SIZE(HnswGetLayerM(m, lc)); + neighborhoodSize = HNSW_NEIGHBOR_ARRAY_SIZE(lm); neighborhoodData = palloc(neighborhoodSize); } @@ -762,13 +831,12 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F * would be ideal to do this for inserts as well, but this could * affect insert performance. */ - if (CountElement(base, skipElement, hc)) + if (CountElement(base, skipElement, HnswPtrAccess(base, hc->element))) wlen++; } while (!pairingheap_is_empty(C)) { - HnswNeighborArray *neighborhood; HnswCandidate *c = HnswGetPairingHeapCandidate(c_node, pairingheap_remove_first(C)); HnswCandidate *f = HnswGetPairingHeapCandidate(w_node, pairingheap_first(W)); HnswElement cElement; @@ -778,74 +846,56 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F cElement = HnswPtrAccess(base, c->element); - if (HnswPtrIsNull(base, cElement->neighbors)) - HnswLoadNeighbors(cElement, index, m); - - /* Get the neighborhood at layer lc */ - neighborhood = HnswGetNeighbors(base, cElement, lc); - - /* Copy neighborhood to local memory if needed */ if (index == NULL) + HnswLoadUnvisitedFromMemory(base, cElement, unvisited, &unvisitedLength, &v, lc, neighborhoodData, neighborhoodSize); + else + HnswLoadUnvisitedFromDisk(cElement, unvisited, &unvisitedLength, &v, index, m, lm, lc); + + for (int i = 0; i < unvisitedLength; i++) { - LWLockAcquire(&cElement->lock, LW_SHARED); - memcpy(neighborhoodData, neighborhood, neighborhoodSize); - LWLockRelease(&cElement->lock); - neighborhood = neighborhoodData; - } + HnswElement eElement = unvisited[i]; + float eDistance; + bool alwaysAdd = wlen < ef; - for (int i = 0; i < neighborhood->length; i++) - { - HnswCandidate *e = &neighborhood->items[i]; - bool visited; + f = HnswGetPairingHeapCandidate(w_node, pairingheap_first(W)); - AddToVisited(base, &v, e, index, &visited); + if (index == NULL) + eDistance = GetElementDistance(base, eElement, q, procinfo, collation); + else + HnswLoadElement(eElement, &eDistance, &q, index, procinfo, collation, inserting, alwaysAdd ? NULL : &f->distance); - if (!visited) + if (eDistance < f->distance || alwaysAdd) { - float eDistance; - HnswElement eElement = HnswPtrAccess(base, e->element); - bool alwaysAdd = wlen < ef; + HnswCandidate *e; + HnswPairingHeapNode *node; - f = HnswGetPairingHeapCandidate(w_node, pairingheap_first(W)); + Assert(!eElement->deleted); - if (index == NULL) - eDistance = GetCandidateDistance(base, e, q, procinfo, collation); - else - HnswLoadElement(eElement, &eDistance, &q, index, procinfo, collation, inserting, alwaysAdd ? NULL : &f->distance); + /* Make robust to issues */ + if (eElement->level < lc) + continue; - if (eDistance < f->distance || alwaysAdd) + /* Create a new candidate */ + e = palloc(sizeof(HnswCandidate)); + HnswPtrStore(base, e->element, eElement); + e->distance = eDistance; + + node = CreatePairingHeapNode(e); + pairingheap_add(C, &node->c_node); + pairingheap_add(W, &node->w_node); + + /* + * Do not count elements being deleted towards ef when + * vacuuming. It would be ideal to do this for inserts as + * well, but this could affect insert performance. + */ + if (CountElement(base, skipElement, eElement)) { - HnswCandidate *ec; - HnswPairingHeapNode *node; + wlen++; - Assert(!eElement->deleted); - - /* Make robust to issues */ - if (eElement->level < lc) - continue; - - /* Copy e */ - ec = palloc(sizeof(HnswCandidate)); - HnswPtrStore(base, ec->element, eElement); - ec->distance = eDistance; - - node = CreatePairingHeapNode(ec); - pairingheap_add(C, &node->c_node); - pairingheap_add(W, &node->w_node); - - /* - * Do not count elements being deleted towards ef when - * vacuuming. It would be ideal to do this for inserts as - * well, but this could affect insert performance. - */ - if (CountElement(base, skipElement, e)) - { - wlen++; - - /* No need to decrement wlen */ - if (wlen > ef) - pairingheap_remove_first(W); - } + /* No need to decrement wlen */ + if (wlen > ef) + pairingheap_remove_first(W); } } } @@ -1117,7 +1167,7 @@ HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm if (HnswPtrIsNull(base, hc3Element->value)) HnswLoadElement(hc3Element, &hc3->distance, &q, index, procinfo, collation, true, NULL); else - hc3->distance = GetCandidateDistance(base, hc3, q, procinfo, collation); + hc3->distance = GetElementDistance(base, hc3Element, q, procinfo, collation); /* Prune element if being deleted */ if (hc3Element->heaptidsLength == 0) From 16ca608f42e19e01be80fe7b5c3de0d29658ba81 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 19 Sep 2024 02:41:20 -0700 Subject: [PATCH 3/8] Updated AddToVisited to use HnswElementPtr --- src/hnswutils.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index b12709c..4881b60 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -669,11 +669,11 @@ InitVisited(char *base, visited_hash * v, Relation index, int ef, int m) * Add to visited */ static inline void -AddToVisited(char *base, visited_hash * v, HnswCandidate * hc, Relation index, bool *found) +AddToVisited(char *base, visited_hash * v, HnswElementPtr elementPtr, Relation index, bool *found) { if (index != NULL) { - HnswElement element = HnswPtrAccess(base, hc->element); + HnswElement element = HnswPtrAccess(base, elementPtr); ItemPointerData indextid; ItemPointerSet(&indextid, element->blkno, element->offno); @@ -682,21 +682,21 @@ AddToVisited(char *base, visited_hash * v, HnswCandidate * hc, Relation index, b else if (base != NULL) { #if PG_VERSION_NUM >= 130000 - HnswElement element = HnswPtrAccess(base, hc->element); + HnswElement element = HnswPtrAccess(base, elementPtr); - offsethash_insert_hash(v->offsets, HnswPtrOffset(hc->element), element->hash, found); + offsethash_insert_hash(v->offsets, HnswPtrOffset(elementPtr), element->hash, found); #else - offsethash_insert(v->offsets, HnswPtrOffset(hc->element), found); + offsethash_insert(v->offsets, HnswPtrOffset(elementPtr), found); #endif } else { #if PG_VERSION_NUM >= 130000 - HnswElement element = HnswPtrAccess(base, hc->element); + HnswElement element = HnswPtrAccess(base, elementPtr); - pointerhash_insert_hash(v->pointers, (uintptr_t) HnswPtrPointer(hc->element), element->hash, found); + pointerhash_insert_hash(v->pointers, (uintptr_t) HnswPtrPointer(elementPtr), element->hash, found); #else - pointerhash_insert(v->pointers, (uintptr_t) HnswPtrPointer(hc->element), found); + pointerhash_insert(v->pointers, (uintptr_t) HnswPtrPointer(elementPtr), found); #endif } } @@ -738,7 +738,7 @@ HnswLoadUnvisitedFromMemory(char *base, HnswElement element, HnswElement * unvis HnswCandidate *hc = &neighborhood->items[i]; bool found; - AddToVisited(base, v, hc, NULL, &found); + AddToVisited(base, v, hc->element, NULL, &found); if (!found) unvisited[(*unvisitedLength)++] = HnswPtrAccess(base, hc->element); @@ -820,7 +820,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F bool found; HnswPairingHeapNode *node; - AddToVisited(base, &v, hc, index, &found); + AddToVisited(base, &v, hc->element, index, &found); node = CreatePairingHeapNode(hc); pairingheap_add(C, &node->c_node); From 4b44d6e745c8c32fbe24579159b7f8c71139d319 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 19 Sep 2024 02:42:33 -0700 Subject: [PATCH 4/8] Updated changelog [skip ci] --- CHANGELOG.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index af972fe..246bba0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,7 @@ +## 0.7.5 (unreleased) + +- Reduced memory usage for HNSW index scans + ## 0.7.4 (2024-08-05) - Fixed locking for parallel HNSW index builds From 5c9429a0f88e93e9ca8b5f1e481e1b3379bc5a52 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 19 Sep 2024 03:27:35 -0700 Subject: [PATCH 5/8] Reduced memory usage for HNSW index scans --- src/hnswutils.c | 55 ++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index 4881b60..764e9d5 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -112,6 +112,12 @@ typedef union tidhash_hash *tids; } visited_hash; +typedef union +{ + HnswElement element; + ItemPointerData indextid; +} HnswUnvisited; + /* * Get the max number of connections in an upper layer for each element in the index */ @@ -547,19 +553,19 @@ HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHe /* * Load an element and optionally get its distance from q */ -void -HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance) +static void +HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance, HnswElement * element) { Buffer buf; Page page; HnswElementTuple etup; /* Read vector */ - buf = ReadBuffer(index, element->blkno); + buf = ReadBuffer(index, blkno); LockBuffer(buf, BUFFER_LOCK_SHARE); page = BufferGetPage(buf); - etup = (HnswElementTuple) PageGetItem(page, PageGetItemId(page, element->offno)); + etup = (HnswElementTuple) PageGetItem(page, PageGetItemId(page, offno)); Assert(HnswIsElementTuple(etup)); @@ -574,11 +580,25 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, /* Load element */ if (distance == NULL || maxDistance == NULL || *distance < *maxDistance) - HnswLoadElementFromTuple(element, etup, true, loadVec); + { + if (*element == NULL) + *element = HnswInitElementFromBlock(blkno, offno); + + HnswLoadElementFromTuple(*element, etup, true, loadVec); + } UnlockReleaseBuffer(buf); } +/* + * Load an element and optionally get its distance from q + */ +void +HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance) +{ + HnswLoadElementImpl(element->blkno, element->offno, distance, q, index, procinfo, collation, loadVec, maxDistance, &element); +} + /* * Get the distance for an element */ @@ -720,7 +740,7 @@ CountElement(char *base, HnswElement skipElement, HnswElement e) * Load unvisited neighbors from memory */ static void -HnswLoadUnvisitedFromMemory(char *base, HnswElement element, HnswElement * unvisited, int *unvisitedLength, visited_hash * v, int lc, HnswNeighborArray * neighborhoodData, Size neighborhoodSize) +HnswLoadUnvisitedFromMemory(char *base, HnswElement element, HnswUnvisited * unvisited, int *unvisitedLength, visited_hash * v, int lc, HnswNeighborArray * neighborhoodData, Size neighborhoodSize) { /* Get the neighborhood at layer lc */ HnswNeighborArray *neighborhood = HnswGetNeighbors(base, element, lc); @@ -741,7 +761,7 @@ HnswLoadUnvisitedFromMemory(char *base, HnswElement element, HnswElement * unvis AddToVisited(base, v, hc->element, NULL, &found); if (!found) - unvisited[(*unvisitedLength)++] = HnswPtrAccess(base, hc->element); + unvisited[(*unvisitedLength)++].element = HnswPtrAccess(base, hc->element); } } @@ -749,7 +769,7 @@ HnswLoadUnvisitedFromMemory(char *base, HnswElement element, HnswElement * unvis * Load unvisited neighbors from disk */ static void -HnswLoadUnvisitedFromDisk(HnswElement element, HnswElement * unvisited, int *unvisitedLength, visited_hash * v, Relation index, int m, int lm, int lc) +HnswLoadUnvisitedFromDisk(HnswElement element, HnswUnvisited * unvisited, int *unvisitedLength, visited_hash * v, Relation index, int m, int lm, int lc) { Buffer buf; Page page; @@ -782,7 +802,7 @@ HnswLoadUnvisitedFromDisk(HnswElement element, HnswElement * unvisited, int *unv tidhash_insert(v->tids, *indextid, &found); if (!found) - unvisited[(*unvisitedLength)++] = HnswInitElementFromBlock(ItemPointerGetBlockNumber(indextid), ItemPointerGetOffsetNumber(indextid)); + unvisited[(*unvisitedLength)++].indextid = *indextid; } } @@ -801,7 +821,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F HnswNeighborArray *neighborhoodData = NULL; Size neighborhoodSize = 0; int lm = HnswGetLayerM(m, lc); - HnswElement *unvisited = palloc(lm * sizeof(HnswElement)); + HnswUnvisited *unvisited = palloc(lm * sizeof(HnswUnvisited)); int unvisitedLength; InitVisited(base, &v, index, ef, m); @@ -853,16 +873,27 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F for (int i = 0; i < unvisitedLength; i++) { - HnswElement eElement = unvisited[i]; + HnswElement eElement; float eDistance; bool alwaysAdd = wlen < ef; f = HnswGetPairingHeapCandidate(w_node, pairingheap_first(W)); if (index == NULL) + { + eElement = unvisited[i].element; eDistance = GetElementDistance(base, eElement, q, procinfo, collation); + } else - HnswLoadElement(eElement, &eDistance, &q, index, procinfo, collation, inserting, alwaysAdd ? NULL : &f->distance); + { + ItemPointer indextid = &unvisited[i].indextid; + BlockNumber blkno = ItemPointerGetBlockNumber(indextid); + OffsetNumber offno = ItemPointerGetOffsetNumber(indextid); + + /* Avoid any allocations if not adding */ + eElement = NULL; + HnswLoadElementImpl(blkno, offno, &eDistance, &q, index, procinfo, collation, inserting, alwaysAdd ? NULL : &f->distance, &eElement); + } if (eDistance < f->distance || alwaysAdd) { From a15806196ebae7e52c3446b1e26ee54cb0f793b9 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 19 Sep 2024 04:02:09 -0700 Subject: [PATCH 6/8] Keep scan-build happy --- src/hnswutils.c | 64 +++++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 31 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index 764e9d5..bb4a32c 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -874,6 +874,8 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F for (int i = 0; i < unvisitedLength; i++) { HnswElement eElement; + HnswCandidate *e; + HnswPairingHeapNode *node; float eDistance; bool alwaysAdd = wlen < ef; @@ -883,6 +885,9 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F { eElement = unvisited[i].element; eDistance = GetElementDistance(base, eElement, q, procinfo, collation); + + if (!(eDistance < f->distance || alwaysAdd)) + continue; } else { @@ -893,41 +898,38 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F /* Avoid any allocations if not adding */ eElement = NULL; HnswLoadElementImpl(blkno, offno, &eDistance, &q, index, procinfo, collation, inserting, alwaysAdd ? NULL : &f->distance, &eElement); + + if (eElement == NULL) + continue; } - if (eDistance < f->distance || alwaysAdd) + Assert(!eElement->deleted); + + /* Make robust to issues */ + if (eElement->level < lc) + continue; + + /* Create a new candidate */ + e = palloc(sizeof(HnswCandidate)); + HnswPtrStore(base, e->element, eElement); + e->distance = eDistance; + + node = CreatePairingHeapNode(e); + pairingheap_add(C, &node->c_node); + pairingheap_add(W, &node->w_node); + + /* + * Do not count elements being deleted towards ef when vacuuming. + * It would be ideal to do this for inserts as well, but this + * could affect insert performance. + */ + if (CountElement(base, skipElement, eElement)) { - HnswCandidate *e; - HnswPairingHeapNode *node; + wlen++; - Assert(!eElement->deleted); - - /* Make robust to issues */ - if (eElement->level < lc) - continue; - - /* Create a new candidate */ - e = palloc(sizeof(HnswCandidate)); - HnswPtrStore(base, e->element, eElement); - e->distance = eDistance; - - node = CreatePairingHeapNode(e); - pairingheap_add(C, &node->c_node); - pairingheap_add(W, &node->w_node); - - /* - * Do not count elements being deleted towards ef when - * vacuuming. It would be ideal to do this for inserts as - * well, but this could affect insert performance. - */ - if (CountElement(base, skipElement, eElement)) - { - wlen++; - - /* No need to decrement wlen */ - if (wlen > ef) - pairingheap_remove_first(W); - } + /* No need to decrement wlen */ + if (wlen > ef) + pairingheap_remove_first(W); } } } From 4f8ab574c91448568432491f37cb0bbc679e02ec Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 19 Sep 2024 04:32:38 -0700 Subject: [PATCH 7/8] Simplified CountElement [skip ci] --- src/hnswutils.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index bb4a32c..d1fb22c 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -725,7 +725,7 @@ AddToVisited(char *base, visited_hash * v, HnswElementPtr elementPtr, Relation i * Count element towards ef */ static inline bool -CountElement(char *base, HnswElement skipElement, HnswElement e) +CountElement(HnswElement skipElement, HnswElement e) { if (skipElement == NULL) return true; @@ -851,7 +851,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F * would be ideal to do this for inserts as well, but this could * affect insert performance. */ - if (CountElement(base, skipElement, HnswPtrAccess(base, hc->element))) + if (CountElement(skipElement, HnswPtrAccess(base, hc->element))) wlen++; } @@ -923,7 +923,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F * It would be ideal to do this for inserts as well, but this * could affect insert performance. */ - if (CountElement(base, skipElement, eElement)) + if (CountElement(skipElement, eElement)) { wlen++; From f9d68a061a2e3ceb17573a6662a1c482e5b7b534 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Thu, 19 Sep 2024 04:39:46 -0700 Subject: [PATCH 8/8] Simplified HnswLoadUnvisitedFromMemory [skip ci] --- src/hnswutils.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index d1fb22c..34eb08d 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -749,13 +749,12 @@ HnswLoadUnvisitedFromMemory(char *base, HnswElement element, HnswUnvisited * unv LWLockAcquire(&element->lock, LW_SHARED); memcpy(neighborhoodData, neighborhood, neighborhoodSize); LWLockRelease(&element->lock); - neighborhood = neighborhoodData; *unvisitedLength = 0; - for (int i = 0; i < neighborhood->length; i++) + for (int i = 0; i < neighborhoodData->length; i++) { - HnswCandidate *hc = &neighborhood->items[i]; + HnswCandidate *hc = &neighborhoodData->items[i]; bool found; AddToVisited(base, v, hc->element, NULL, &found);