From 6d2af6d3f9b58c8041b27c055dbfd984e9b9063b Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Fri, 20 Sep 2024 15:21:57 -0700 Subject: [PATCH 1/3] Improved code [skip ci] --- src/hnswutils.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index f69c057..6e01cf4 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -874,9 +874,6 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F { eElement = unvisited[i].element; eDistance = GetElementDistance(base, eElement, q, procinfo, collation); - - if (!(eDistance < f->distance || alwaysAdd)) - continue; } else { @@ -892,6 +889,9 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F continue; } + if (!(eDistance < f->distance || alwaysAdd)) + continue; + Assert(!eElement->deleted); /* Make robust to issues */ From d5e8fc96a5dd089d94ba075334bdcb583f1986ab Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 21 Sep 2024 12:07:44 -0700 Subject: [PATCH 2/3] Changed HnswPairingHeapNode to HnswSearchCandidate to reduce allocations and improve code --- src/hnsw.h | 9 +++--- src/hnswscan.c | 4 +-- src/hnswutils.c | 74 +++++++++++++++++++++++-------------------------- 3 files changed, 41 insertions(+), 46 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index 2f45039..9fb650a 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -155,12 +155,13 @@ struct HnswNeighborArray HnswCandidate items[FLEXIBLE_ARRAY_MEMBER]; }; -typedef struct HnswPairingHeapNode +typedef struct HnswSearchCandidate { - HnswCandidate *inner; pairingheap_node c_node; pairingheap_node w_node; -} HnswPairingHeapNode; + HnswElementPtr element; + float distance; +} HnswSearchCandidate; /* HNSW index options */ typedef struct HnswOptions @@ -381,7 +382,7 @@ void *HnswAlloc(HnswAllocator * allocator, Size size); HnswElement HnswInitElement(char *base, ItemPointer tid, int m, double ml, int maxLevel, HnswAllocator * alloc); HnswElement HnswInitElementFromBlock(BlockNumber blkno, OffsetNumber offno); void HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool existing); -HnswCandidate *HnswEntryCandidate(char *base, HnswElement em, Datum q, Relation rel, FmgrInfo *procinfo, Oid collation, bool loadVec); +HnswSearchCandidate *HnswEntryCandidate(char *base, HnswElement em, Datum q, Relation rel, FmgrInfo *procinfo, Oid collation, bool loadVec); void HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber insertPage, ForkNumber forkNum, bool building); void HnswSetNeighborTuple(char *base, HnswNeighborTuple ntup, HnswElement e, int m); void HnswAddHeapTid(HnswElement element, ItemPointer heaptid); diff --git a/src/hnswscan.c b/src/hnswscan.c index 0463a89..30815af 100644 --- a/src/hnswscan.c +++ b/src/hnswscan.c @@ -161,14 +161,14 @@ hnswgettuple(IndexScanDesc scan, ScanDirection dir) so->first = false; #if defined(HNSW_MEMORY) - elog(INFO, "memory: %zu MB", MemoryContextMemAllocated(so->tmpCtx, false) / (1024 * 1024)); + elog(INFO, "memory: %zu KB", MemoryContextMemAllocated(so->tmpCtx, false) / 1024); #endif } while (list_length(so->w) > 0) { char *base = NULL; - HnswCandidate *hc = llast(so->w); + HnswSearchCandidate *hc = llast(so->w); HnswElement element = HnswPtrAccess(base, hc->element); ItemPointer heaptid; diff --git a/src/hnswutils.c b/src/hnswutils.c index 6e01cf4..ac1e7de 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -608,10 +608,10 @@ GetElementDistance(char *base, HnswElement element, Datum q, FmgrInfo *procinfo, /* * Create a candidate for the entry point */ -HnswCandidate * +HnswSearchCandidate * HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec) { - HnswCandidate *hc = palloc(sizeof(HnswCandidate)); + HnswSearchCandidate *hc = palloc(sizeof(HnswSearchCandidate)); HnswPtrStore(base, hc->element, entryPoint); if (index == NULL) @@ -621,8 +621,8 @@ HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, return hc; } -#define HnswGetPairingHeapCandidate(membername, ptr) (pairingheap_container(HnswPairingHeapNode, membername, ptr)->inner) -#define HnswGetPairingHeapCandidateConst(membername, ptr) (pairingheap_const_container(HnswPairingHeapNode, membername, ptr)->inner) +#define HnswGetSearchCandidate(membername, ptr) pairingheap_container(HnswSearchCandidate, membername, ptr) +#define HnswGetSearchCandidateConst(membername, ptr) pairingheap_const_container(HnswSearchCandidate, membername, ptr) /* * Compare candidate distances @@ -630,10 +630,10 @@ HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, static int CompareNearestCandidates(const pairingheap_node *a, const pairingheap_node *b, void *arg) { - if (HnswGetPairingHeapCandidateConst(c_node, a)->distance < HnswGetPairingHeapCandidateConst(c_node, b)->distance) + if (HnswGetSearchCandidateConst(c_node, a)->distance < HnswGetSearchCandidateConst(c_node, b)->distance) return 1; - if (HnswGetPairingHeapCandidateConst(c_node, a)->distance > HnswGetPairingHeapCandidateConst(c_node, b)->distance) + if (HnswGetSearchCandidateConst(c_node, a)->distance > HnswGetSearchCandidateConst(c_node, b)->distance) return -1; return 0; @@ -645,27 +645,15 @@ CompareNearestCandidates(const pairingheap_node *a, const pairingheap_node *b, v static int CompareFurthestCandidates(const pairingheap_node *a, const pairingheap_node *b, void *arg) { - if (HnswGetPairingHeapCandidateConst(w_node, a)->distance < HnswGetPairingHeapCandidateConst(w_node, b)->distance) + if (HnswGetSearchCandidateConst(w_node, a)->distance < HnswGetSearchCandidateConst(w_node, b)->distance) return -1; - if (HnswGetPairingHeapCandidateConst(w_node, a)->distance > HnswGetPairingHeapCandidateConst(w_node, b)->distance) + if (HnswGetSearchCandidateConst(w_node, a)->distance > HnswGetSearchCandidateConst(w_node, b)->distance) return 1; return 0; } -/* - * Create a pairing heap node for a candidate - */ -static HnswPairingHeapNode * -CreatePairingHeapNode(HnswCandidate * c) -{ - HnswPairingHeapNode *node = palloc(sizeof(HnswPairingHeapNode)); - - node->inner = c; - return node; -} - /* * Init visited */ @@ -825,15 +813,13 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F /* Add entry points to v, C, and W */ foreach(lc2, ep) { - HnswCandidate *hc = (HnswCandidate *) lfirst(lc2); + HnswSearchCandidate *hc = (HnswSearchCandidate *) lfirst(lc2); bool found; - HnswPairingHeapNode *node; AddToVisited(base, &v, hc->element, index, &found); - node = CreatePairingHeapNode(hc); - pairingheap_add(C, &node->c_node); - pairingheap_add(W, &node->w_node); + pairingheap_add(C, &hc->c_node); + pairingheap_add(W, &hc->w_node); /* * Do not count elements being deleted towards ef when vacuuming. It @@ -846,8 +832,8 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F while (!pairingheap_is_empty(C)) { - HnswCandidate *c = HnswGetPairingHeapCandidate(c_node, pairingheap_remove_first(C)); - HnswCandidate *f = HnswGetPairingHeapCandidate(w_node, pairingheap_first(W)); + HnswSearchCandidate *c = HnswGetSearchCandidate(c_node, pairingheap_remove_first(C)); + HnswSearchCandidate *f = HnswGetSearchCandidate(w_node, pairingheap_first(W)); HnswElement cElement; if (c->distance > f->distance) @@ -863,12 +849,11 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F for (int i = 0; i < unvisitedLength; i++) { HnswElement eElement; - HnswCandidate *e; - HnswPairingHeapNode *node; + HnswSearchCandidate *e; float eDistance; bool alwaysAdd = wlen < ef; - f = HnswGetPairingHeapCandidate(w_node, pairingheap_first(W)); + f = HnswGetSearchCandidate(w_node, pairingheap_first(W)); if (index == NULL) { @@ -899,13 +884,11 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F continue; /* Create a new candidate */ - e = palloc(sizeof(HnswCandidate)); + e = palloc(sizeof(HnswSearchCandidate)); HnswPtrStore(base, e->element, eElement); e->distance = eDistance; - - node = CreatePairingHeapNode(e); - pairingheap_add(C, &node->c_node); - pairingheap_add(W, &node->w_node); + pairingheap_add(C, &e->c_node); + pairingheap_add(W, &e->w_node); /* * Do not count elements being deleted towards ef when vacuuming. @@ -926,7 +909,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F /* Add each element of W to w */ while (!pairingheap_is_empty(W)) { - HnswCandidate *hc = HnswGetPairingHeapCandidate(w_node, pairingheap_remove_first(W)); + HnswSearchCandidate *hc = HnswGetSearchCandidate(w_node, pairingheap_remove_first(W)); w = lappend(w, hc); } @@ -1307,16 +1290,27 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint { int lm = HnswGetLayerM(m, lc); List *neighbors; - List *lw; + List *lw = NIL; + ListCell *lc2; w = HnswSearchLayer(base, q, ep, efConstruction, lc, index, procinfo, collation, m, true, skipElement); + /* Convert search candidates to candidates */ + foreach(lc2, w) + { + HnswSearchCandidate *sc = lfirst(lc2); + HnswCandidate *hc = palloc(sizeof(HnswCandidate)); + + hc->element = sc->element; + hc->distance = sc->distance; + + lw = lappend(lw, hc); + } + /* Elements being deleted or skipped can help with search */ /* but should be removed before selecting neighbors */ if (index != NULL) - lw = RemoveElements(base, w, skipElement); - else - lw = w; + lw = RemoveElements(base, lw, skipElement); /* * Candidates are sorted, but not deterministically. Could set From be4e9a9df21ed10d5780a213deb1fac8fdc5d03b Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 21 Sep 2024 18:10:37 -0700 Subject: [PATCH 3/3] Added macros for IvfflatScanList [skip ci] --- src/ivfscan.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/src/ivfscan.c b/src/ivfscan.c index 1e95cd6..98731f4 100644 --- a/src/ivfscan.c +++ b/src/ivfscan.c @@ -15,16 +15,19 @@ #include "utils/memutils.h" #endif +#define GetScanList(ptr) pairingheap_container(IvfflatScanList, ph_node, ptr) +#define GetScanListConst(ptr) pairingheap_const_container(IvfflatScanList, ph_node, ptr) + /* * Compare list distances */ static int CompareLists(const pairingheap_node *a, const pairingheap_node *b, void *arg) { - if (((const IvfflatScanList *) a)->distance > ((const IvfflatScanList *) b)->distance) + if (GetScanListConst(a)->distance > GetScanListConst(b)->distance) return 1; - if (((const IvfflatScanList *) a)->distance < ((const IvfflatScanList *) b)->distance) + if (GetScanListConst(a)->distance < GetScanListConst(b)->distance) return -1; return 0; @@ -76,14 +79,14 @@ GetScanLists(IndexScanDesc scan, Datum value) /* Calculate max distance */ if (listCount == so->probes) - maxDistance = ((IvfflatScanList *) pairingheap_first(so->listQueue))->distance; + maxDistance = GetScanList(pairingheap_first(so->listQueue))->distance; } else if (distance < maxDistance) { IvfflatScanList *scanlist; /* Remove */ - scanlist = (IvfflatScanList *) pairingheap_remove_first(so->listQueue); + scanlist = GetScanList(pairingheap_remove_first(so->listQueue)); /* Reuse */ scanlist->startPage = list->startPage; @@ -91,7 +94,7 @@ GetScanLists(IndexScanDesc scan, Datum value) pairingheap_add(so->listQueue, &scanlist->ph_node); /* Update max distance */ - maxDistance = ((IvfflatScanList *) pairingheap_first(so->listQueue))->distance; + maxDistance = GetScanList(pairingheap_first(so->listQueue))->distance; } } @@ -122,7 +125,7 @@ GetScanItems(IndexScanDesc scan, Datum value) /* Search closest probes lists */ while (!pairingheap_is_empty(so->listQueue)) { - BlockNumber searchPage = ((IvfflatScanList *) pairingheap_remove_first(so->listQueue))->startPage; + BlockNumber searchPage = GetScanList(pairingheap_remove_first(so->listQueue))->startPage; /* Search all entry pages for list */ while (BlockNumberIsValid(searchPage))