From 0b6214aad6b3f5e82b8de81472e92ae5b313df56 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 29 Sep 2024 15:49:01 -0700 Subject: [PATCH 01/27] Moved HnswLoadNeighbors to hnswinsert.c [skip ci] --- src/hnsw.h | 1 - src/hnswinsert.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++ src/hnswutils.c | 63 ------------------------------------------------ 3 files changed, 63 insertions(+), 64 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index b57e9f8..cfbcd3f 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -393,7 +393,6 @@ void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool void HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance); void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element); void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm, int lc, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation); -void HnswLoadNeighbors(HnswElement element, Relation index, int m); void HnswInitLockTranche(void); const HnswTypeInfo *HnswGetTypeInfo(Relation index); PGDLLEXPORT void HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc); diff --git a/src/hnswinsert.c b/src/hnswinsert.c index 2dce16f..42d7378 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -334,6 +334,69 @@ AddElementOnDisk(Relation index, HnswElement e, int m, BlockNumber insertPage, B *updatedInsertPage = newInsertPage; } +/* + * Load neighbors from page + */ +static void +LoadNeighborsFromPage(HnswElement element, Relation index, Page page, int m) +{ + char *base = NULL; + + HnswNeighborTuple ntup = (HnswNeighborTuple) PageGetItem(page, PageGetItemId(page, element->neighborOffno)); + int neighborCount = (element->level + 2) * m; + + Assert(HnswIsNeighborTuple(ntup)); + + HnswInitNeighbors(base, element, m, NULL); + + /* Ensure expected neighbors */ + if (ntup->count != neighborCount) + return; + + for (int i = 0; i < neighborCount; i++) + { + HnswElement e; + int level; + HnswCandidate *hc; + ItemPointer indextid; + HnswNeighborArray *neighbors; + + indextid = &ntup->indextids[i]; + + if (!ItemPointerIsValid(indextid)) + continue; + + e = HnswInitElementFromBlock(ItemPointerGetBlockNumber(indextid), ItemPointerGetOffsetNumber(indextid)); + + /* Calculate level based on offset */ + level = element->level - i / m; + if (level < 0) + level = 0; + + neighbors = HnswGetNeighbors(base, element, level); + hc = &neighbors->items[neighbors->length++]; + HnswPtrStore(base, hc->element, e); + } +} + +/* + * Load neighbors + */ +static void +HnswLoadNeighbors(HnswElement element, Relation index, int m) +{ + Buffer buf; + Page page; + + buf = ReadBuffer(index, element->neighborPage); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + + LoadNeighborsFromPage(element, index, page, m); + + UnlockReleaseBuffer(buf); +} + /* * Check if connection already exists */ diff --git a/src/hnswutils.c b/src/hnswutils.c index 79bc086..3a66d85 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -449,69 +449,6 @@ HnswSetNeighborTuple(char *base, HnswNeighborTuple ntup, HnswElement e, int m) ntup->count = idx; } -/* - * Load neighbors from page - */ -static void -LoadNeighborsFromPage(HnswElement element, Relation index, Page page, int m) -{ - char *base = NULL; - - HnswNeighborTuple ntup = (HnswNeighborTuple) PageGetItem(page, PageGetItemId(page, element->neighborOffno)); - int neighborCount = (element->level + 2) * m; - - Assert(HnswIsNeighborTuple(ntup)); - - HnswInitNeighbors(base, element, m, NULL); - - /* Ensure expected neighbors */ - if (ntup->count != neighborCount) - return; - - for (int i = 0; i < neighborCount; i++) - { - HnswElement e; - int level; - HnswCandidate *hc; - ItemPointer indextid; - HnswNeighborArray *neighbors; - - indextid = &ntup->indextids[i]; - - if (!ItemPointerIsValid(indextid)) - continue; - - e = HnswInitElementFromBlock(ItemPointerGetBlockNumber(indextid), ItemPointerGetOffsetNumber(indextid)); - - /* Calculate level based on offset */ - level = element->level - i / m; - if (level < 0) - level = 0; - - neighbors = HnswGetNeighbors(base, element, level); - hc = &neighbors->items[neighbors->length++]; - HnswPtrStore(base, hc->element, e); - } -} - -/* - * Load neighbors - */ -void -HnswLoadNeighbors(HnswElement element, Relation index, int m) -{ - Buffer buf; - Page page; - - buf = ReadBuffer(index, element->neighborPage); - LockBuffer(buf, BUFFER_LOCK_SHARE); - page = BufferGetPage(buf); - - LoadNeighborsFromPage(element, index, page, m); - - UnlockReleaseBuffer(buf); -} - /* * Load an element from a tuple */ From 382a25aefbe825ac2a63dcee7caf3ebbbefe5e4c Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 29 Sep 2024 17:20:54 -0700 Subject: [PATCH 02/27] Split loading neighbor TIDs into separate function [skip ci] --- src/hnswutils.c | 28 ++++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index 3a66d85..5c6d68e 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -680,18 +680,15 @@ HnswLoadUnvisitedFromMemory(char *base, HnswElement element, HnswUnvisited * unv } /* - * Load unvisited neighbors from disk + * Load neighbor index TIDs */ -static void -HnswLoadUnvisitedFromDisk(HnswElement element, HnswUnvisited * unvisited, int *unvisitedLength, visited_hash * v, Relation index, int m, int lm, int lc) +static bool +HnswLoadNeighborTids(HnswElement element, ItemPointerData *indextids, Relation index, int m, int lm, int lc) { Buffer buf; Page page; HnswNeighborTuple ntup; int start; - ItemPointerData indextids[HNSW_MAX_M * 2]; - - *unvisitedLength = 0; buf = ReadBuffer(index, element->neighborPage); LockBuffer(buf, BUFFER_LOCK_SHARE); @@ -703,14 +700,29 @@ HnswLoadUnvisitedFromDisk(HnswElement element, HnswUnvisited * unvisited, int *u if (ntup->count != (element->level + 2) * m) { UnlockReleaseBuffer(buf); - return; + return false; } /* Copy to minimize lock time */ start = (element->level - lc) * m; - memcpy(&indextids, ntup->indextids + start, lm * sizeof(ItemPointerData)); + memcpy(indextids, ntup->indextids + start, lm * sizeof(ItemPointerData)); UnlockReleaseBuffer(buf); + return true; +} + +/* + * Load unvisited neighbors from disk + */ +static void +HnswLoadUnvisitedFromDisk(HnswElement element, HnswUnvisited * unvisited, int *unvisitedLength, visited_hash * v, Relation index, int m, int lm, int lc) +{ + ItemPointerData indextids[HNSW_MAX_M * 2]; + + *unvisitedLength = 0; + + if (!HnswLoadNeighborTids(element, indextids, index, m, lm, lc)) + return; for (int i = 0; i < lm; i++) { From f371eb119b37ae524a7cc4dac7fb6309c6f2d868 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 29 Sep 2024 18:14:28 -0700 Subject: [PATCH 03/27] Removed lc from SelectNeighbors [skip ci] --- src/hnswutils.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index 5c6d68e..c2f1128 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -959,14 +959,13 @@ CheckElementCloser(char *base, HnswCandidate * e, List *r, FmgrInfo *procinfo, O * Algorithm 4 from paper */ static List * -SelectNeighbors(char *base, List *c, int lm, int lc, FmgrInfo *procinfo, Oid collation, HnswElement e2, HnswCandidate * newCandidate, HnswCandidate * *pruned, bool sortCandidates) +SelectNeighbors(char *base, List *c, int lm, FmgrInfo *procinfo, Oid collation, HnswNeighborArray * neighbors, HnswCandidate * newCandidate, HnswCandidate * *pruned, bool sortCandidates) { List *r = NIL; List *w = list_copy(c); HnswCandidate **wd; int wdlen = 0; int wdoff = 0; - HnswNeighborArray *neighbors = HnswGetNeighbors(base, e2, lc); bool mustCalculate = !neighbors->closerSet; List *added = NIL; bool removedAny = false; @@ -1139,7 +1138,7 @@ HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm c = lappend(c, ¤tNeighbors->items[i]); c = lappend(c, &hc2); - SelectNeighbors(base, c, lm, lc, procinfo, collation, hce, &hc2, &pruned, true); + SelectNeighbors(base, c, lm, procinfo, collation, currentNeighbors, &hc2, &pruned, true); /* Should not happen */ if (pruned == NULL) @@ -1278,7 +1277,7 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint * sortCandidates to true for in-memory builds to enable closer * caching, but there does not seem to be a difference in performance. */ - neighbors = SelectNeighbors(base, lw, lm, lc, procinfo, collation, element, NULL, NULL, false); + neighbors = SelectNeighbors(base, lw, lm, procinfo, collation, HnswGetNeighbors(base, element, lc), NULL, NULL, false); AddConnections(base, element, neighbors, lc); From 5ce367e18b11089eb4fb1569d3a44221809e57cd Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 29 Sep 2024 18:18:42 -0700 Subject: [PATCH 04/27] Removed lc from HnswUpdateConnection [skip ci] --- src/hnsw.h | 2 +- src/hnswbuild.c | 2 +- src/hnswinsert.c | 2 +- src/hnswutils.c | 3 +-- 4 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index cfbcd3f..caec2e4 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -392,7 +392,7 @@ void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collatio void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec); void HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance); void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element); -void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm, int lc, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation); +void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, HnswNeighborArray * currentNeighbors, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation); void HnswInitLockTranche(void); const HnswTypeInfo *HnswGetTypeInfo(Relation index); PGDLLEXPORT void HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc); diff --git a/src/hnswbuild.c b/src/hnswbuild.c index 498b5d9..10bda20 100644 --- a/src/hnswbuild.c +++ b/src/hnswbuild.c @@ -388,7 +388,7 @@ UpdateNeighborsInMemory(char *base, FmgrInfo *procinfo, Oid collation, HnswEleme Assert(neighborElement); LWLockAcquire(&neighborElement->lock, LW_EXCLUSIVE); - HnswUpdateConnection(base, e, hc, lm, lc, NULL, NULL, procinfo, collation); + HnswUpdateConnection(base, e, hc, HnswGetNeighbors(base, neighborElement, lc), lm, NULL, NULL, procinfo, collation); LWLockRelease(&neighborElement->lock); } } diff --git a/src/hnswinsert.c b/src/hnswinsert.c index 42d7378..6456005 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -458,7 +458,7 @@ HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Hns */ /* Select neighbors */ - HnswUpdateConnection(NULL, e, hc, lm, lc, &idx, index, procinfo, collation); + HnswUpdateConnection(NULL, e, hc, HnswGetNeighbors(base, neighborElement, lc), lm, &idx, index, procinfo, collation); /* New element was not selected as a neighbor */ if (idx == -1) diff --git a/src/hnswutils.c b/src/hnswutils.c index c2f1128..0659609 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -1078,10 +1078,9 @@ AddConnections(char *base, HnswElement element, List *neighbors, int lc) * Update connections */ void -HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm, int lc, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation) +HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, HnswNeighborArray * currentNeighbors, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation) { HnswElement hce = HnswPtrAccess(base, hc->element); - HnswNeighborArray *currentNeighbors = HnswGetNeighbors(base, hce, lc); HnswCandidate hc2; HnswPtrStore(base, hc2.element, element); From ee43ee9b1699d41527443b30f803a6a332cc66fc Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 29 Sep 2024 18:52:12 -0700 Subject: [PATCH 05/27] Use HnswLoadNeighborTids for inserts --- src/hnsw.h | 2 ++ src/hnswinsert.c | 60 +++++++++++------------------------------------- src/hnswutils.c | 4 ++-- 3 files changed, 18 insertions(+), 48 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index caec2e4..1b3f2ac 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -386,6 +386,7 @@ HnswSearchCandidate *HnswEntryCandidate(char *base, HnswElement em, Datum q, Rel void HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber insertPage, ForkNumber forkNum, bool building); void HnswSetNeighborTuple(char *base, HnswNeighborTuple ntup, HnswElement e, int m); void HnswAddHeapTid(HnswElement element, ItemPointer heaptid); +HnswNeighborArray *HnswInitNeighborArray(int lm, HnswAllocator * allocator); void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator * alloc); bool HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, ItemPointer heap_tid, bool building); void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting, bool building); @@ -393,6 +394,7 @@ void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool void HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance); void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element); void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, HnswNeighborArray * currentNeighbors, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation); +bool HnswLoadNeighborTids(HnswElement element, ItemPointerData *indextids, Relation index, int m, int lm, int lc); void HnswInitLockTranche(void); const HnswTypeInfo *HnswGetTypeInfo(Relation index); PGDLLEXPORT void HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc); diff --git a/src/hnswinsert.c b/src/hnswinsert.c index 6456005..712eaa1 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -335,66 +335,33 @@ AddElementOnDisk(Relation index, HnswElement e, int m, BlockNumber insertPage, B } /* - * Load neighbors from page + * Load neighbors */ -static void -LoadNeighborsFromPage(HnswElement element, Relation index, Page page, int m) +static HnswNeighborArray * +HnswLoadNeighbors(HnswElement element, Relation index, int m, int lm, int lc) { char *base = NULL; + HnswNeighborArray *neighbors = HnswInitNeighborArray(lm, NULL); + ItemPointerData indextids[HNSW_MAX_M * 2]; - HnswNeighborTuple ntup = (HnswNeighborTuple) PageGetItem(page, PageGetItemId(page, element->neighborOffno)); - int neighborCount = (element->level + 2) * m; + if (!HnswLoadNeighborTids(element, indextids, index, m, lm, lc)) + return neighbors; - Assert(HnswIsNeighborTuple(ntup)); - - HnswInitNeighbors(base, element, m, NULL); - - /* Ensure expected neighbors */ - if (ntup->count != neighborCount) - return; - - for (int i = 0; i < neighborCount; i++) + for (int i = 0; i < lm; i++) { + ItemPointer indextid = &indextids[i]; HnswElement e; - int level; HnswCandidate *hc; - ItemPointer indextid; - HnswNeighborArray *neighbors; - - indextid = &ntup->indextids[i]; if (!ItemPointerIsValid(indextid)) - continue; + break; e = HnswInitElementFromBlock(ItemPointerGetBlockNumber(indextid), ItemPointerGetOffsetNumber(indextid)); - - /* Calculate level based on offset */ - level = element->level - i / m; - if (level < 0) - level = 0; - - neighbors = HnswGetNeighbors(base, element, level); hc = &neighbors->items[neighbors->length++]; HnswPtrStore(base, hc->element, e); } -} -/* - * Load neighbors - */ -static void -HnswLoadNeighbors(HnswElement element, Relation index, int m) -{ - Buffer buf; - Page page; - - buf = ReadBuffer(index, element->neighborPage); - LockBuffer(buf, BUFFER_LOCK_SHARE); - page = BufferGetPage(buf); - - LoadNeighborsFromPage(element, index, page, m); - - UnlockReleaseBuffer(buf); + return neighbors; } /* @@ -441,6 +408,7 @@ HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Hns int startIdx; HnswElement neighborElement = HnswPtrAccess(base, hc->element); OffsetNumber offno = neighborElement->neighborOffno; + HnswNeighborArray *neighborNeighbors; /* * Get latest neighbors since they may have changed. Do not lock @@ -448,7 +416,7 @@ HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Hns * optimistic locking to retry if another update occurs before * getting exclusive lock. */ - HnswLoadNeighbors(neighborElement, index, m); + neighborNeighbors = HnswLoadNeighbors(neighborElement, index, m, lm, lc); /* * Could improve performance for vacuuming by checking neighbors @@ -458,7 +426,7 @@ HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Hns */ /* Select neighbors */ - HnswUpdateConnection(NULL, e, hc, HnswGetNeighbors(base, neighborElement, lc), lm, &idx, index, procinfo, collation); + HnswUpdateConnection(NULL, e, hc, neighborNeighbors, lm, &idx, index, procinfo, collation); /* New element was not selected as a neighbor */ if (idx == -1) diff --git a/src/hnswutils.c b/src/hnswutils.c index 0659609..3a75672 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -197,7 +197,7 @@ HnswInitPage(Buffer buf, Page page) /* * Allocate a neighbor array */ -static HnswNeighborArray * +HnswNeighborArray * HnswInitNeighborArray(int lm, HnswAllocator * allocator) { HnswNeighborArray *a = HnswAlloc(allocator, HNSW_NEIGHBOR_ARRAY_SIZE(lm)); @@ -682,7 +682,7 @@ HnswLoadUnvisitedFromMemory(char *base, HnswElement element, HnswUnvisited * unv /* * Load neighbor index TIDs */ -static bool +bool HnswLoadNeighborTids(HnswElement element, ItemPointerData *indextids, Relation index, int m, int lm, int lc) { Buffer buf; From 648dd8af7810116da1758795600924a2c5e77160 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 29 Sep 2024 19:12:38 -0700 Subject: [PATCH 06/27] Moved LoadElementsForInsert to separate function and removed unused code path --- src/hnswutils.c | 53 +++++++++++++++++++++++++------------------------ 1 file changed, 27 insertions(+), 26 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index 3a75672..6b83d7c 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -1074,6 +1074,32 @@ AddConnections(char *base, HnswElement element, List *neighbors, int lc) a->items[a->length++] = *((HnswCandidate *) lfirst(lc2)); } +/* + * Load elements for insert + */ +static void +LoadElementsForInsert(HnswNeighborArray * neighbors, Datum q, HnswCandidate * *pruned, Relation index, FmgrInfo *procinfo, Oid collation) +{ + char *base = NULL; + + for (int i = 0; i < neighbors->length; i++) + { + HnswCandidate *hc = &neighbors->items[i]; + HnswElement element = HnswPtrAccess(base, hc->element); + double distance; + + HnswLoadElement(element, &distance, &q, index, procinfo, collation, true, NULL); + hc->distance = distance; + + /* Prune element if being deleted */ + if (element->heaptidsLength == 0) + { + *pruned = &neighbors->items[i]; + break; + } + } +} + /* * Update connections */ @@ -1101,32 +1127,7 @@ HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, HnswNe /* Load elements on insert */ if (index != NULL) - { - Datum q = HnswGetValue(base, hce); - - for (int i = 0; i < currentNeighbors->length; i++) - { - HnswCandidate *hc3 = ¤tNeighbors->items[i]; - HnswElement hc3Element = HnswPtrAccess(base, hc3->element); - - if (HnswPtrIsNull(base, hc3Element->value)) - { - double distance; - - HnswLoadElement(hc3Element, &distance, &q, index, procinfo, collation, true, NULL); - hc3->distance = distance; - } - else - hc3->distance = GetElementDistance(base, hc3Element, q, procinfo, collation); - - /* Prune element if being deleted */ - if (hc3Element->heaptidsLength == 0) - { - pruned = ¤tNeighbors->items[i]; - break; - } - } - } + LoadElementsForInsert(currentNeighbors, HnswGetValue(base, hce), &pruned, index, procinfo, collation); if (pruned == NULL) { From 4ac86f62a1dcd5db6c758eaef4619550a968f721 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 29 Sep 2024 19:22:35 -0700 Subject: [PATCH 07/27] Improved variable names [skip ci] --- src/hnsw.h | 2 +- src/hnswutils.c | 28 ++++++++++++++-------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index 1b3f2ac..e3974c3 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -393,7 +393,7 @@ void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collatio void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec); void HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance); void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element); -void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, HnswNeighborArray * currentNeighbors, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation); +void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, HnswNeighborArray * neighbors, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation); bool HnswLoadNeighborTids(HnswElement element, ItemPointerData *indextids, Relation index, int m, int lm, int lc); void HnswInitLockTranche(void); const HnswTypeInfo *HnswGetTypeInfo(Relation index); diff --git a/src/hnswutils.c b/src/hnswutils.c index 6b83d7c..6016cc8 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -1104,17 +1104,17 @@ LoadElementsForInsert(HnswNeighborArray * neighbors, Datum q, HnswCandidate * *p * Update connections */ void -HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, HnswNeighborArray * currentNeighbors, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation) +HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, HnswNeighborArray * neighbors, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation) { HnswElement hce = HnswPtrAccess(base, hc->element); - HnswCandidate hc2; + HnswCandidate newHc; - HnswPtrStore(base, hc2.element, element); - hc2.distance = hc->distance; + HnswPtrStore(base, newHc.element, element); + newHc.distance = hc->distance; - if (currentNeighbors->length < lm) + if (neighbors->length < lm) { - currentNeighbors->items[currentNeighbors->length++] = hc2; + neighbors->items[neighbors->length++] = newHc; /* Track update */ if (updateIdx != NULL) @@ -1127,18 +1127,18 @@ HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, HnswNe /* Load elements on insert */ if (index != NULL) - LoadElementsForInsert(currentNeighbors, HnswGetValue(base, hce), &pruned, index, procinfo, collation); + LoadElementsForInsert(neighbors, HnswGetValue(base, hce), &pruned, index, procinfo, collation); if (pruned == NULL) { List *c = NIL; /* Add candidates */ - for (int i = 0; i < currentNeighbors->length; i++) - c = lappend(c, ¤tNeighbors->items[i]); - c = lappend(c, &hc2); + for (int i = 0; i < neighbors->length; i++) + c = lappend(c, &neighbors->items[i]); + c = lappend(c, &newHc); - SelectNeighbors(base, c, lm, procinfo, collation, currentNeighbors, &hc2, &pruned, true); + SelectNeighbors(base, c, lm, procinfo, collation, neighbors, &newHc, &pruned, true); /* Should not happen */ if (pruned == NULL) @@ -1146,11 +1146,11 @@ HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, HnswNe } /* Find and replace the pruned element */ - for (int i = 0; i < currentNeighbors->length; i++) + for (int i = 0; i < neighbors->length; i++) { - if (HnswPtrEqual(base, currentNeighbors->items[i].element, pruned->element)) + if (HnswPtrEqual(base, neighbors->items[i].element, pruned->element)) { - currentNeighbors->items[i] = hc2; + neighbors->items[i] = newHc; /* Track update */ if (updateIdx != NULL) From 4c72f912068b1ed870d1660e242c216ae9b9b02a Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 29 Sep 2024 19:26:15 -0700 Subject: [PATCH 08/27] Improved variable name [skip ci] --- src/hnswutils.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index 6016cc8..019e230 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -1104,12 +1104,12 @@ LoadElementsForInsert(HnswNeighborArray * neighbors, Datum q, HnswCandidate * *p * Update connections */ void -HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, HnswNeighborArray * neighbors, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation) +HnswUpdateConnection(char *base, HnswElement newElement, HnswCandidate * hc, HnswNeighborArray * neighbors, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation) { HnswElement hce = HnswPtrAccess(base, hc->element); HnswCandidate newHc; - HnswPtrStore(base, newHc.element, element); + HnswPtrStore(base, newHc.element, newElement); newHc.distance = hc->distance; if (neighbors->length < lm) From 8eb8cdf0f3052af85587b4bf420acfdc558940c3 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 29 Sep 2024 19:44:11 -0700 Subject: [PATCH 09/27] Moved insert-specific code to hnswinsert.c --- src/hnswinsert.c | 38 +++++++++++++++++++++++++++++++++- src/hnswutils.c | 53 ++++++++---------------------------------------- 2 files changed, 46 insertions(+), 45 deletions(-) diff --git a/src/hnswinsert.c b/src/hnswinsert.c index 712eaa1..106428b 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -364,6 +364,32 @@ HnswLoadNeighbors(HnswElement element, Relation index, int m, int lm, int lc) return neighbors; } +/* + * Load elements for insert + */ +static void +LoadElementsForInsert(HnswNeighborArray * neighbors, Datum q, int *idx, Relation index, FmgrInfo *procinfo, Oid collation) +{ + char *base = NULL; + + for (int i = 0; i < neighbors->length; i++) + { + HnswCandidate *hc = &neighbors->items[i]; + HnswElement element = HnswPtrAccess(base, hc->element); + double distance; + + HnswLoadElement(element, &distance, &q, index, procinfo, collation, true, NULL); + hc->distance = distance; + + /* Prune element if being deleted */ + if (element->heaptidsLength == 0) + { + *idx = i; + break; + } + } +} + /* * Check if connection already exists */ @@ -426,7 +452,17 @@ HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Hns */ /* Select neighbors */ - HnswUpdateConnection(NULL, e, hc, neighborNeighbors, lm, &idx, index, procinfo, collation); + if (neighbors->length < lm) + idx = -2; + else + { + Datum q = HnswGetValue(base, neighborElement); + + LoadElementsForInsert(neighborNeighbors, q, &idx, index, procinfo, collation); + + if (idx == -1) + HnswUpdateConnection(base, e, hc, neighborNeighbors, lm, &idx, index, procinfo, collation); + } /* New element was not selected as a neighbor */ if (idx == -1) diff --git a/src/hnswutils.c b/src/hnswutils.c index 019e230..8ac9c08 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -1074,39 +1074,12 @@ AddConnections(char *base, HnswElement element, List *neighbors, int lc) a->items[a->length++] = *((HnswCandidate *) lfirst(lc2)); } -/* - * Load elements for insert - */ -static void -LoadElementsForInsert(HnswNeighborArray * neighbors, Datum q, HnswCandidate * *pruned, Relation index, FmgrInfo *procinfo, Oid collation) -{ - char *base = NULL; - - for (int i = 0; i < neighbors->length; i++) - { - HnswCandidate *hc = &neighbors->items[i]; - HnswElement element = HnswPtrAccess(base, hc->element); - double distance; - - HnswLoadElement(element, &distance, &q, index, procinfo, collation, true, NULL); - hc->distance = distance; - - /* Prune element if being deleted */ - if (element->heaptidsLength == 0) - { - *pruned = &neighbors->items[i]; - break; - } - } -} - /* * Update connections */ void HnswUpdateConnection(char *base, HnswElement newElement, HnswCandidate * hc, HnswNeighborArray * neighbors, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation) { - HnswElement hce = HnswPtrAccess(base, hc->element); HnswCandidate newHc; HnswPtrStore(base, newHc.element, newElement); @@ -1123,27 +1096,19 @@ HnswUpdateConnection(char *base, HnswElement newElement, HnswCandidate * hc, Hns else { /* Shrink connections */ + List *c = NIL; HnswCandidate *pruned = NULL; - /* Load elements on insert */ - if (index != NULL) - LoadElementsForInsert(neighbors, HnswGetValue(base, hce), &pruned, index, procinfo, collation); + /* Add candidates */ + for (int i = 0; i < neighbors->length; i++) + c = lappend(c, &neighbors->items[i]); + c = lappend(c, &newHc); + SelectNeighbors(base, c, lm, procinfo, collation, neighbors, &newHc, &pruned, true); + + /* Should not happen */ if (pruned == NULL) - { - List *c = NIL; - - /* Add candidates */ - for (int i = 0; i < neighbors->length; i++) - c = lappend(c, &neighbors->items[i]); - c = lappend(c, &newHc); - - SelectNeighbors(base, c, lm, procinfo, collation, neighbors, &newHc, &pruned, true); - - /* Should not happen */ - if (pruned == NULL) - return; - } + return; /* Find and replace the pruned element */ for (int i = 0; i < neighbors->length; i++) From 525e3b81e1cedf02bb5ef32ac093310113ac9cca Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 29 Sep 2024 19:47:25 -0700 Subject: [PATCH 10/27] Improved HnswUpdateConnection parameters [skip ci] --- src/hnsw.h | 2 +- src/hnswbuild.c | 2 +- src/hnswinsert.c | 2 +- src/hnswutils.c | 4 ++-- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index e3974c3..c58ced1 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -393,7 +393,7 @@ void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collatio void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec); void HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance); void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element); -void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, HnswNeighborArray * neighbors, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation); +void HnswUpdateConnection(char *base, HnswNeighborArray * neighbors, HnswElement newElement, float distance, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation); bool HnswLoadNeighborTids(HnswElement element, ItemPointerData *indextids, Relation index, int m, int lm, int lc); void HnswInitLockTranche(void); const HnswTypeInfo *HnswGetTypeInfo(Relation index); diff --git a/src/hnswbuild.c b/src/hnswbuild.c index 10bda20..82c981b 100644 --- a/src/hnswbuild.c +++ b/src/hnswbuild.c @@ -388,7 +388,7 @@ UpdateNeighborsInMemory(char *base, FmgrInfo *procinfo, Oid collation, HnswEleme Assert(neighborElement); LWLockAcquire(&neighborElement->lock, LW_EXCLUSIVE); - HnswUpdateConnection(base, e, hc, HnswGetNeighbors(base, neighborElement, lc), lm, NULL, NULL, procinfo, collation); + HnswUpdateConnection(base, HnswGetNeighbors(base, neighborElement, lc), e, hc->distance, lm, NULL, NULL, procinfo, collation); LWLockRelease(&neighborElement->lock); } } diff --git a/src/hnswinsert.c b/src/hnswinsert.c index 106428b..3d73f2a 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -461,7 +461,7 @@ HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Hns LoadElementsForInsert(neighborNeighbors, q, &idx, index, procinfo, collation); if (idx == -1) - HnswUpdateConnection(base, e, hc, neighborNeighbors, lm, &idx, index, procinfo, collation); + HnswUpdateConnection(base, neighborNeighbors, e, hc->distance, lm, &idx, index, procinfo, collation); } /* New element was not selected as a neighbor */ diff --git a/src/hnswutils.c b/src/hnswutils.c index 8ac9c08..f8f627f 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -1078,12 +1078,12 @@ AddConnections(char *base, HnswElement element, List *neighbors, int lc) * Update connections */ void -HnswUpdateConnection(char *base, HnswElement newElement, HnswCandidate * hc, HnswNeighborArray * neighbors, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation) +HnswUpdateConnection(char *base, HnswNeighborArray * neighbors, HnswElement newElement, float distance, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation) { HnswCandidate newHc; HnswPtrStore(base, newHc.element, newElement); - newHc.distance = hc->distance; + newHc.distance = distance; if (neighbors->length < lm) { From 7ba593c492b5029a490d424f3c467ccdb677aed2 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 29 Sep 2024 23:03:02 -0700 Subject: [PATCH 11/27] Improved SelectNeighbors signature [skip ci] --- src/hnswutils.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index f8f627f..3d0b484 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -959,14 +959,14 @@ CheckElementCloser(char *base, HnswCandidate * e, List *r, FmgrInfo *procinfo, O * Algorithm 4 from paper */ static List * -SelectNeighbors(char *base, List *c, int lm, FmgrInfo *procinfo, Oid collation, HnswNeighborArray * neighbors, HnswCandidate * newCandidate, HnswCandidate * *pruned, bool sortCandidates) +SelectNeighbors(char *base, List *c, int lm, FmgrInfo *procinfo, Oid collation, bool *closerSet, HnswCandidate * newCandidate, HnswCandidate * *pruned, bool sortCandidates) { List *r = NIL; List *w = list_copy(c); HnswCandidate **wd; int wdlen = 0; int wdoff = 0; - bool mustCalculate = !neighbors->closerSet; + bool mustCalculate = !(*closerSet); List *added = NIL; bool removedAny = false; @@ -1043,7 +1043,7 @@ SelectNeighbors(char *base, List *c, int lm, FmgrInfo *procinfo, Oid collation, } /* Cached value can only be used in future if sorted deterministically */ - neighbors->closerSet = sortCandidates; + *closerSet = sortCandidates; /* Keep pruned connections */ while (wdoff < wdlen && list_length(r) < lm) @@ -1104,7 +1104,7 @@ HnswUpdateConnection(char *base, HnswNeighborArray * neighbors, HnswElement newE c = lappend(c, &neighbors->items[i]); c = lappend(c, &newHc); - SelectNeighbors(base, c, lm, procinfo, collation, neighbors, &newHc, &pruned, true); + SelectNeighbors(base, c, lm, procinfo, collation, &neighbors->closerSet, &newHc, &pruned, true); /* Should not happen */ if (pruned == NULL) @@ -1242,7 +1242,7 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint * sortCandidates to true for in-memory builds to enable closer * caching, but there does not seem to be a difference in performance. */ - neighbors = SelectNeighbors(base, lw, lm, procinfo, collation, HnswGetNeighbors(base, element, lc), NULL, NULL, false); + neighbors = SelectNeighbors(base, lw, lm, procinfo, collation, &HnswGetNeighbors(base, element, lc)->closerSet, NULL, NULL, false); AddConnections(base, element, neighbors, lc); From 658d74e2f6e4c68e71d77c65e421988361ce1131 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 29 Sep 2024 23:48:58 -0700 Subject: [PATCH 12/27] Use Size for memory [skip ci] --- src/hnsw.h | 4 ++-- src/hnswbuild.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index c58ced1..116d9bc 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -185,8 +185,8 @@ typedef struct HnswGraph /* Allocations state */ LWLock allocatorLock; - long memoryUsed; - long memoryTotal; + Size memoryUsed; + Size memoryTotal; /* Flushed state */ LWLock flushLock; diff --git a/src/hnswbuild.c b/src/hnswbuild.c index 82c981b..87d4823 100644 --- a/src/hnswbuild.c +++ b/src/hnswbuild.c @@ -607,7 +607,7 @@ BuildCallback(Relation index, ItemPointer tid, Datum *values, * Initialize the graph */ static void -InitGraph(HnswGraph * graph, char *base, long memoryTotal) +InitGraph(HnswGraph * graph, char *base, Size memoryTotal) { /* Initialize the lock tranche if needed */ HnswInitLockTranche(); @@ -708,7 +708,7 @@ InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, Index buildstate->normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); buildstate->collation = index->rd_indcollation[0]; - InitGraph(&buildstate->graphData, NULL, maintenance_work_mem * 1024L); + InitGraph(&buildstate->graphData, NULL, (Size) maintenance_work_mem * 1024L); buildstate->graph = &buildstate->graphData; buildstate->ml = HnswGetMl(buildstate->m); buildstate->maxLevel = HnswGetMaxLevel(buildstate->m); From d148b4e61b4c2464c66a13ac8bb0f2583524a22e Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 30 Sep 2024 09:59:12 -0700 Subject: [PATCH 13/27] Fixed insert logic --- src/hnswinsert.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hnswinsert.c b/src/hnswinsert.c index 3d73f2a..8e3a27c 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -452,7 +452,7 @@ HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Hns */ /* Select neighbors */ - if (neighbors->length < lm) + if (neighborNeighbors->length < lm) idx = -2; else { From a8b4b6675a947974f021476b2e51dc657283d7ea Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 30 Sep 2024 10:14:52 -0700 Subject: [PATCH 14/27] Moved logic to get update index to separate function --- src/hnswinsert.c | 70 ++++++++++++++++++++++++++++-------------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/src/hnswinsert.c b/src/hnswinsert.c index 8e3a27c..c198c93 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -390,6 +390,45 @@ LoadElementsForInsert(HnswNeighborArray * neighbors, Datum q, int *idx, Relation } } +/* + * Get update index + */ +static int +GetUpdateIndex(HnswElement element, HnswElement newElement, float distance, int m, int lm, int lc, Relation index, FmgrInfo *procinfo, Oid collation) +{ + char *base = NULL; + int idx = -1; + HnswNeighborArray *neighbors; + + /* + * Get latest neighbors since they may have changed. Do not lock yet since + * selecting neighbors can take time. Could use optimistic locking to + * retry if another update occurs before getting exclusive lock. + */ + neighbors = HnswLoadNeighbors(element, index, m, lm, lc); + + /* + * Could improve performance for vacuuming by checking neighbors against + * list of elements being deleted to find index. It's important to exclude + * already deleted elements for this since they can be replaced at any + * time. + */ + + if (neighbors->length < lm) + idx = -2; + else + { + Datum q = HnswGetValue(base, element); + + LoadElementsForInsert(neighbors, q, &idx, index, procinfo, collation); + + if (idx == -1) + HnswUpdateConnection(base, neighbors, newElement, distance, lm, &idx, index, procinfo, collation); + } + + return idx; +} + /* * Check if connection already exists */ @@ -430,39 +469,12 @@ HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Hns Page page; GenericXLogState *state; HnswNeighborTuple ntup; - int idx = -1; + int idx; int startIdx; HnswElement neighborElement = HnswPtrAccess(base, hc->element); OffsetNumber offno = neighborElement->neighborOffno; - HnswNeighborArray *neighborNeighbors; - /* - * Get latest neighbors since they may have changed. Do not lock - * yet since selecting neighbors can take time. Could use - * optimistic locking to retry if another update occurs before - * getting exclusive lock. - */ - neighborNeighbors = HnswLoadNeighbors(neighborElement, index, m, lm, lc); - - /* - * Could improve performance for vacuuming by checking neighbors - * against list of elements being deleted to find index. It's - * important to exclude already deleted elements for this since - * they can be replaced at any time. - */ - - /* Select neighbors */ - if (neighborNeighbors->length < lm) - idx = -2; - else - { - Datum q = HnswGetValue(base, neighborElement); - - LoadElementsForInsert(neighborNeighbors, q, &idx, index, procinfo, collation); - - if (idx == -1) - HnswUpdateConnection(base, neighborNeighbors, e, hc->distance, lm, &idx, index, procinfo, collation); - } + idx = GetUpdateIndex(neighborElement, e, hc->distance, m, lm, lc, index, procinfo, collation); /* New element was not selected as a neighbor */ if (idx == -1) From ff6da4fceab43305e4892ac76764a37417e4cada Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 30 Sep 2024 10:30:01 -0700 Subject: [PATCH 15/27] Moved logic to get update neighbor on disk to separate function --- src/hnswinsert.c | 138 +++++++++++++++++++++++++---------------------- 1 file changed, 74 insertions(+), 64 deletions(-) diff --git a/src/hnswinsert.c b/src/hnswinsert.c index c198c93..a3949b2 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -449,6 +449,78 @@ ConnectionExists(HnswElement e, HnswNeighborTuple ntup, int startIdx, int lm) return false; } +/* + * Update neighbor + */ +static void +UpdateNeighborOnDisk(HnswElement element, HnswElement newElement, int idx, int m, int lm, int lc, Relation index, bool checkExisting, bool building) +{ + Buffer buf; + Page page; + GenericXLogState *state; + HnswNeighborTuple ntup; + int startIdx; + OffsetNumber offno = element->neighborOffno; + + /* Register page */ + buf = ReadBuffer(index, element->neighborPage); + LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); + if (building) + { + state = NULL; + page = BufferGetPage(buf); + } + else + { + state = GenericXLogStart(index); + page = GenericXLogRegisterBuffer(state, buf, 0); + } + + /* Get tuple */ + ntup = (HnswNeighborTuple) PageGetItem(page, PageGetItemId(page, offno)); + + /* Calculate index for update */ + startIdx = (element->level - lc) * m; + + /* Check for existing connection */ + if (checkExisting && ConnectionExists(newElement, ntup, startIdx, lm)) + idx = -1; + else if (idx == -2) + { + /* Find free offset if still exists */ + /* TODO Retry updating connections if not */ + for (int j = 0; j < lm; j++) + { + if (!ItemPointerIsValid(&ntup->indextids[startIdx + j])) + { + idx = startIdx + j; + break; + } + } + } + else + idx += startIdx; + + /* Make robust to issues */ + if (idx >= 0 && idx < ntup->count) + { + ItemPointer indextid = &ntup->indextids[idx]; + + /* Update neighbor on the buffer */ + ItemPointerSet(indextid, newElement->blkno, newElement->offno); + + /* Commit */ + if (building) + MarkBufferDirty(buf); + else + GenericXLogFinish(state); + } + else if (!building) + GenericXLogAbort(state); + + UnlockReleaseBuffer(buf); +} + /* * Update neighbors */ @@ -465,14 +537,8 @@ HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Hns for (int i = 0; i < neighbors->length; i++) { HnswCandidate *hc = &neighbors->items[i]; - Buffer buf; - Page page; - GenericXLogState *state; - HnswNeighborTuple ntup; - int idx; - int startIdx; HnswElement neighborElement = HnswPtrAccess(base, hc->element); - OffsetNumber offno = neighborElement->neighborOffno; + int idx; idx = GetUpdateIndex(neighborElement, e, hc->distance, m, lm, lc, index, procinfo, collation); @@ -480,63 +546,7 @@ HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Hns if (idx == -1) continue; - /* Register page */ - buf = ReadBuffer(index, neighborElement->neighborPage); - LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE); - if (building) - { - state = NULL; - page = BufferGetPage(buf); - } - else - { - state = GenericXLogStart(index); - page = GenericXLogRegisterBuffer(state, buf, 0); - } - - /* Get tuple */ - ntup = (HnswNeighborTuple) PageGetItem(page, PageGetItemId(page, offno)); - - /* Calculate index for update */ - startIdx = (neighborElement->level - lc) * m; - - /* Check for existing connection */ - if (checkExisting && ConnectionExists(e, ntup, startIdx, lm)) - idx = -1; - else if (idx == -2) - { - /* Find free offset if still exists */ - /* TODO Retry updating connections if not */ - for (int j = 0; j < lm; j++) - { - if (!ItemPointerIsValid(&ntup->indextids[startIdx + j])) - { - idx = startIdx + j; - break; - } - } - } - else - idx += startIdx; - - /* Make robust to issues */ - if (idx >= 0 && idx < ntup->count) - { - ItemPointer indextid = &ntup->indextids[idx]; - - /* Update neighbor on the buffer */ - ItemPointerSet(indextid, e->blkno, e->offno); - - /* Commit */ - if (building) - MarkBufferDirty(buf); - else - GenericXLogFinish(state); - } - else if (!building) - GenericXLogAbort(state); - - UnlockReleaseBuffer(buf); + UpdateNeighborOnDisk(neighborElement, e, idx, m, lm, lc, index, checkExisting, building); } } } From 57248ba128f1f4b3773d0575de8a8c936e3eee59 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Mon, 30 Sep 2024 11:15:27 -0700 Subject: [PATCH 16/27] Use separate memory context for updating neighbors, which improves performance around 10% for larger vectors --- CHANGELOG.md | 1 + src/hnswinsert.c | 16 ++++++++++++++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e5a17b3..a7d9924 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ - Added casts for arrays to `sparsevec` - Improved cost estimation +- Improved performance of HNSW inserts and on-disk index builds - Reduced memory usage for HNSW index scans - Dropped support for Postgres 12 diff --git a/src/hnswinsert.c b/src/hnswinsert.c index a3949b2..2f18b12 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -394,11 +394,12 @@ LoadElementsForInsert(HnswNeighborArray * neighbors, Datum q, int *idx, Relation * Get update index */ static int -GetUpdateIndex(HnswElement element, HnswElement newElement, float distance, int m, int lm, int lc, Relation index, FmgrInfo *procinfo, Oid collation) +GetUpdateIndex(HnswElement element, HnswElement newElement, float distance, int m, int lm, int lc, Relation index, FmgrInfo *procinfo, Oid collation, MemoryContext updateCtx) { char *base = NULL; int idx = -1; HnswNeighborArray *neighbors; + MemoryContext oldCtx = MemoryContextSwitchTo(updateCtx); /* * Get latest neighbors since they may have changed. Do not lock yet since @@ -426,6 +427,9 @@ GetUpdateIndex(HnswElement element, HnswElement newElement, float distance, int HnswUpdateConnection(base, neighbors, newElement, distance, lm, &idx, index, procinfo, collation); } + MemoryContextSwitchTo(oldCtx); + MemoryContextReset(updateCtx); + return idx; } @@ -529,6 +533,14 @@ HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Hns { char *base = NULL; + /* Use separate memory context to improve performance for larger vectors */ + MemoryContext updateCtx = GenerationContextCreate(CurrentMemoryContext, + "Hnsw insert update context", +#if PG_VERSION_NUM >= 150000 + 128 * 1024, 128 * 1024, +#endif + 128 * 1024); + for (int lc = e->level; lc >= 0; lc--) { int lm = HnswGetLayerM(m, lc); @@ -540,7 +552,7 @@ HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Hns HnswElement neighborElement = HnswPtrAccess(base, hc->element); int idx; - idx = GetUpdateIndex(neighborElement, e, hc->distance, m, lm, lc, index, procinfo, collation); + idx = GetUpdateIndex(neighborElement, e, hc->distance, m, lm, lc, index, procinfo, collation, updateCtx); /* New element was not selected as a neighbor */ if (idx == -1) From d5f4a0e43515caed7a7ef287e317e1bd238acddd Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 8 Oct 2024 12:21:26 -0700 Subject: [PATCH 17/27] Fixed memory context leak in HnswUpdateNeighborsOnDisk - fixes #692 --- src/hnswinsert.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/hnswinsert.c b/src/hnswinsert.c index 2f18b12..2dfd8d3 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -561,6 +561,8 @@ HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Hns UpdateNeighborOnDisk(neighborElement, e, idx, m, lm, lc, index, checkExisting, building); } } + + MemoryContextDelete(updateCtx); } /* From 77688b43093c3461ed97f7232e268c8c3b638600 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 8 Oct 2024 12:42:03 -0700 Subject: [PATCH 18/27] Improve total cost for cost estimation (#686) --- src/hnsw.c | 40 +++++++++++++++++++++--------------- src/ivfflat.c | 36 +++++++++++++++----------------- test/t/017_hnsw_filtering.pl | 6 ++---- test/t/039_hnsw_cost.pl | 13 ++++++++++-- 4 files changed, 53 insertions(+), 42 deletions(-) diff --git a/src/hnsw.c b/src/hnsw.c index 0b6640a..c2579c1 100644 --- a/src/hnsw.c +++ b/src/hnsw.c @@ -100,10 +100,8 @@ hnswcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, { GenericCosts costs; int m; - int entryLevel; - int layer0TuplesMax; - double layer0Selectivity; - double scalingFactor = 0.55; + double ratio; + double startupPages; double spc_seq_page_cost; Relation index; @@ -120,6 +118,8 @@ hnswcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, MemSet(&costs, 0, sizeof(costs)); + genericcostestimate(root, path, loop_count, &costs); + index = index_open(path->indexinfo->indexoid, NoLock); HnswGetMetaPageInfo(index, &m, NULL); index_close(index, NoLock); @@ -151,30 +151,38 @@ hnswcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, * at L0, accounting for previously visited tuples, multiplied by the * "scalingFactor" (currently hardcoded). */ - entryLevel = (int) (log(path->indexinfo->tuples + 1) * HnswGetMl(m)); - layer0TuplesMax = HnswGetLayerM(m, 0) * hnsw_ef_search; - layer0Selectivity = (scalingFactor * log(path->indexinfo->tuples + 1)) / - (log(m) * (1 + log(hnsw_ef_search))); + if (path->indexinfo->tuples > 0) + { + double scalingFactor = 0.55; + int entryLevel = (int) (log(path->indexinfo->tuples) * HnswGetMl(m)); + int layer0TuplesMax = HnswGetLayerM(m, 0) * hnsw_ef_search; + double layer0Selectivity = scalingFactor * log(path->indexinfo->tuples) / (log(m) * (1 + log(hnsw_ef_search))); - costs.numIndexTuples = (entryLevel * m) + - (layer0TuplesMax * layer0Selectivity); + ratio = (entryLevel * m + layer0TuplesMax * layer0Selectivity) / path->indexinfo->tuples; - genericcostestimate(root, path, loop_count, &costs); + if (ratio > 1) + ratio = 1; + } + else + ratio = 1; get_tablespace_page_costs(path->indexinfo->reltablespace, NULL, &spc_seq_page_cost); + /* Startup cost is cost before returning the first row */ + costs.indexStartupCost = costs.indexTotalCost * ratio; + /* Adjust cost if needed since TOAST not included in seq scan cost */ - if (costs.numIndexPages > path->indexinfo->rel->pages) + startupPages = costs.numIndexPages * ratio; + if (startupPages > path->indexinfo->rel->pages && ratio < 0.5) { /* Change all page cost from random to sequential */ - costs.indexTotalCost -= costs.numIndexPages * (costs.spc_random_page_cost - spc_seq_page_cost); + costs.indexStartupCost -= startupPages * (costs.spc_random_page_cost - spc_seq_page_cost); /* Remove cost of extra pages */ - costs.indexTotalCost -= (costs.numIndexPages - path->indexinfo->rel->pages) * spc_seq_page_cost; + costs.indexStartupCost -= (startupPages - path->indexinfo->rel->pages) * spc_seq_page_cost; } - /* Use total cost since most work happens before first tuple is returned */ - *indexStartupCost = costs.indexTotalCost; + *indexStartupCost = costs.indexStartupCost; *indexTotalCost = costs.indexTotalCost; *indexSelectivity = costs.indexSelectivity; *indexCorrelation = costs.indexCorrelation; diff --git a/src/ivfflat.c b/src/ivfflat.c index 986e19d..395040d 100644 --- a/src/ivfflat.c +++ b/src/ivfflat.c @@ -69,6 +69,8 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, GenericCosts costs; int lists; double ratio; + double sequentialRatio = 0.5; + double startupPages; double spc_seq_page_cost; Relation index; @@ -85,6 +87,8 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, MemSet(&costs, 0, sizeof(costs)); + genericcostestimate(root, path, loop_count, &costs); + index = index_open(path->indexinfo->indexoid, NoLock); IvfflatGetMetaPageInfo(index, &lists, NULL); index_close(index, NoLock); @@ -94,34 +98,26 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, if (ratio > 1.0) ratio = 1.0; - /* - * This gives us the subset of tuples to visit. This value is passed into - * the generic cost estimator to determine the number of pages to visit - * during the index scan. - */ - costs.numIndexTuples = path->indexinfo->tuples * ratio; - - genericcostestimate(root, path, loop_count, &costs); - get_tablespace_page_costs(path->indexinfo->reltablespace, NULL, &spc_seq_page_cost); + /* Change some page cost from random to sequential */ + costs.indexTotalCost -= sequentialRatio * costs.numIndexPages * (costs.spc_random_page_cost - spc_seq_page_cost); + + /* Startup cost is cost before returning the first row */ + costs.indexStartupCost = costs.indexTotalCost * ratio; + /* Adjust cost if needed since TOAST not included in seq scan cost */ - if (costs.numIndexPages > path->indexinfo->rel->pages && ratio < 0.5) + startupPages = costs.numIndexPages * ratio; + if (startupPages > path->indexinfo->rel->pages && ratio < 0.5) { - /* Change all page cost from random to sequential */ - costs.indexTotalCost -= costs.numIndexPages * (costs.spc_random_page_cost - spc_seq_page_cost); + /* Change rest of page cost from random to sequential */ + costs.indexStartupCost -= (1 - sequentialRatio) * startupPages * (costs.spc_random_page_cost - spc_seq_page_cost); /* Remove cost of extra pages */ - costs.indexTotalCost -= (costs.numIndexPages - path->indexinfo->rel->pages) * spc_seq_page_cost; - } - else - { - /* Change some page cost from random to sequential */ - costs.indexTotalCost -= 0.5 * costs.numIndexPages * (costs.spc_random_page_cost - spc_seq_page_cost); + costs.indexStartupCost -= (startupPages - path->indexinfo->rel->pages) * spc_seq_page_cost; } - /* Use total cost since most work happens before first tuple is returned */ - *indexStartupCost = costs.indexTotalCost; + *indexStartupCost = costs.indexStartupCost; *indexTotalCost = costs.indexTotalCost; *indexSelectivity = costs.indexSelectivity; *indexCorrelation = costs.indexCorrelation; diff --git a/test/t/017_hnsw_filtering.pl b/test/t/017_hnsw_filtering.pl index 9dbdcf3..afa2a1c 100644 --- a/test/t/017_hnsw_filtering.pl +++ b/test/t/017_hnsw_filtering.pl @@ -41,8 +41,7 @@ my $c = int(rand() * $nc); my $explain = $node->safe_psql("postgres", qq( EXPLAIN ANALYZE SELECT i FROM tst WHERE c = $c ORDER BY v <-> '$query' LIMIT $limit; )); -# TODO Do not use index -like($explain, qr/Index Scan using idx/); +like($explain, qr/Seq Scan/); # Test attribute filtering with few rows removed $explain = $node->safe_psql("postgres", qq( @@ -60,8 +59,7 @@ like($explain, qr/Index Scan using idx/); $explain = $node->safe_psql("postgres", qq( EXPLAIN ANALYZE SELECT i FROM tst WHERE c < 1 ORDER BY v <-> '$query' LIMIT $limit; )); -# TODO Do not use index -like($explain, qr/Index Scan using idx/); +like($explain, qr/Seq Scan/); # Test attribute filtering with few rows removed like $explain = $node->safe_psql("postgres", qq( diff --git a/test/t/039_hnsw_cost.pl b/test/t/039_hnsw_cost.pl index a26c09a..97ea5e7 100644 --- a/test/t/039_hnsw_cost.pl +++ b/test/t/039_hnsw_cost.pl @@ -17,12 +17,11 @@ $node->safe_psql("postgres", "CREATE EXTENSION vector;"); for my $dim (@dims) { my $array_sql = join(",", ('random()') x $dim); - my $n = $dim == 384 ? 3000 : 1000; # Create table and index $node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector($dim));"); $node->safe_psql("postgres", - "INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, $n) i;" + "INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, 2000) i;" ); $node->safe_psql("postgres", "CREATE INDEX idx ON tst USING hnsw (v vector_l2_ops);"); $node->safe_psql("postgres", "ANALYZE tst;"); @@ -40,6 +39,16 @@ for my $dim (@dims) )); like($explain, qr/Index Scan using idx/); + # 3x the rows are needed for distance filters + # since the planner uses DEFAULT_INEQ_SEL for the selectivity (should be 1) + # Recreate index for performance + $node->safe_psql("postgres", "DROP INDEX idx;"); + $node->safe_psql("postgres", + "INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(2001, 6000) i;" + ); + $node->safe_psql("postgres", "CREATE INDEX idx ON tst USING hnsw (v vector_l2_ops);"); + $node->safe_psql("postgres", "ANALYZE tst;"); + $explain = $node->safe_psql("postgres", qq( EXPLAIN ANALYZE SELECT i FROM tst WHERE v <-> '$query' < 1 ORDER BY v <-> '$query' LIMIT $limit; )); From f4b67b078f8dc158b4d88a125d7043cc6cc6ac65 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 9 Oct 2024 17:01:49 -0700 Subject: [PATCH 19/27] DRY HNSW distance calculations --- src/hnswutils.c | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index 3d0b484..a7eb819 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -482,6 +482,15 @@ HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHe } } +/* + * Calculate the distance between values + */ +static inline float +HnswGetDistance(Datum a, Datum b, FmgrInfo *procinfo, Oid collation) +{ + return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, a, b)); +} + /* * Load an element and optionally get its distance from q */ @@ -507,7 +516,7 @@ HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, double *distance, Dat if (DatumGetPointer(*q) == NULL) *distance = 0; else - *distance = DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->data))); + *distance = HnswGetDistance(*q, PointerGetDatum(&etup->data), procinfo, collation); } /* Load element */ @@ -539,7 +548,7 @@ GetElementDistance(char *base, HnswElement element, Datum q, FmgrInfo *procinfo, { Datum value = HnswGetValue(base, element); - return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, q, value)); + return HnswGetDistance(q, value, procinfo, collation); } /* @@ -921,18 +930,6 @@ CompareCandidateDistancesOffset(const ListCell *a, const ListCell *b) return 0; } -/* - * Calculate the distance between elements - */ -static float -HnswGetDistance(char *base, HnswElement a, HnswElement b, FmgrInfo *procinfo, Oid collation) -{ - Datum aValue = HnswGetValue(base, a); - Datum bValue = HnswGetValue(base, b); - - return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, aValue, bValue)); -} - /* * Check if an element is closer to q than any element from R */ @@ -940,13 +937,15 @@ static bool CheckElementCloser(char *base, HnswCandidate * e, List *r, FmgrInfo *procinfo, Oid collation) { HnswElement eElement = HnswPtrAccess(base, e->element); + Datum eValue = HnswGetValue(base, eElement); ListCell *lc2; foreach(lc2, r) { HnswCandidate *ri = lfirst(lc2); HnswElement riElement = HnswPtrAccess(base, ri->element); - float distance = HnswGetDistance(base, eElement, riElement, procinfo, collation); + Datum riValue = HnswGetValue(base, riElement); + float distance = HnswGetDistance(eValue, riValue, procinfo, collation); if (distance <= e->distance) return false; From 3126fbdb6f24fe32fe6ace7a83d72c4fdabab69c Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 9 Oct 2024 17:04:25 -0700 Subject: [PATCH 20/27] Use double for distance [skip ci] --- src/hnswutils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index a7eb819..856c309 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -485,7 +485,7 @@ HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHe /* * Calculate the distance between values */ -static inline float +static inline double HnswGetDistance(Datum a, Datum b, FmgrInfo *procinfo, Oid collation) { return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, a, b)); From 57c05c59a2c5ba0f786d55a650da7517170fdf7e Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 9 Oct 2024 20:50:17 -0700 Subject: [PATCH 21/27] DRY code for forming index value --- src/hnsw.h | 3 ++- src/hnswbuild.c | 24 ++++++------------------ src/hnswinsert.c | 29 ++++++++--------------------- src/hnswutils.c | 27 +++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 40 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index 116d9bc..f9e5621 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -388,10 +388,11 @@ void HnswSetNeighborTuple(char *base, HnswNeighborTuple ntup, HnswElement e, in void HnswAddHeapTid(HnswElement element, ItemPointer heaptid); HnswNeighborArray *HnswInitNeighborArray(int lm, HnswAllocator * allocator); void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator * alloc); -bool HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, ItemPointer heap_tid, bool building); +bool HnswInsertTupleOnDisk(Relation index, Datum value, ItemPointer heaptid, bool building); void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting, bool building); void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec); void HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance); +bool HnswFormIndexValue(Datum *out, Datum *values, bool *isnull, const HnswTypeInfo * typeInfo, FmgrInfo *normprocinfo, Oid collation); void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element); void HnswUpdateConnection(char *base, HnswNeighborArray * neighbors, HnswElement newElement, float distance, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation); bool HnswLoadNeighborTids(HnswElement element, ItemPointerData *indextids, Relation index, int m, int lm, int lc); diff --git a/src/hnswbuild.c b/src/hnswbuild.c index 87d4823..02e1749 100644 --- a/src/hnswbuild.c +++ b/src/hnswbuild.c @@ -473,7 +473,6 @@ InsertTupleInMemory(HnswBuildState * buildstate, HnswElement element) static bool InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, HnswBuildState * buildstate) { - const HnswTypeInfo *typeInfo = buildstate->typeInfo; HnswGraph *graph = buildstate->graph; HnswElement element; HnswAllocator *allocator = &buildstate->allocator; @@ -481,22 +480,11 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn Pointer valuePtr; LWLock *flushLock = &graph->flushLock; char *base = buildstate->hnswarea; + Datum value; - /* Detoast once for all calls */ - Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); - - /* Check value */ - if (typeInfo->checkValue != NULL) - typeInfo->checkValue(DatumGetPointer(value)); - - /* Normalize if needed */ - if (buildstate->normprocinfo != NULL) - { - if (!HnswCheckNorm(buildstate->normprocinfo, buildstate->collation, value)) - return false; - - value = HnswNormValue(typeInfo, buildstate->collation, value); - } + /* Form index value */ + if (!HnswFormIndexValue(&value, values, isnull, buildstate->typeInfo, buildstate->normprocinfo, buildstate->collation)) + return false; /* Get datum size */ valueSize = VARSIZE_ANY(DatumGetPointer(value)); @@ -509,7 +497,7 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn { LWLockRelease(flushLock); - return HnswInsertTupleOnDisk(index, value, values, isnull, heaptid, true); + return HnswInsertTupleOnDisk(index, value, heaptid, true); } /* @@ -541,7 +529,7 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn LWLockRelease(flushLock); - return HnswInsertTupleOnDisk(index, value, values, isnull, heaptid, true); + return HnswInsertTupleOnDisk(index, value, heaptid, true); } /* Ok, we can proceed to allocate the element */ diff --git a/src/hnswinsert.c b/src/hnswinsert.c index 2dfd8d3..9c5b190 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -679,7 +679,7 @@ UpdateGraphOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement * Insert a tuple into the index */ bool -HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, ItemPointer heap_tid, bool building) +HnswInsertTupleOnDisk(Relation index, Datum value, ItemPointer heaptid, bool building) { HnswElement entryPoint; HnswElement element; @@ -701,7 +701,7 @@ HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, HnswGetMetaPageInfo(index, &m, &entryPoint); /* Create an element */ - element = HnswInitElement(base, heap_tid, m, HnswGetMl(m), HnswGetMaxLevel(m), NULL); + element = HnswInitElement(base, heaptid, m, HnswGetMl(m), HnswGetMaxLevel(m), NULL); HnswPtrStore(base, element->value, DatumGetPointer(value)); /* Prevent concurrent inserts when likely updating entry point */ @@ -734,31 +734,18 @@ HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, * Insert a tuple into the index */ static void -HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid) +HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid) { Datum value; const HnswTypeInfo *typeInfo = HnswGetTypeInfo(index); - FmgrInfo *normprocinfo; + FmgrInfo *normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); Oid collation = index->rd_indcollation[0]; - /* Detoast once for all calls */ - value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + /* Form index value */ + if (!HnswFormIndexValue(&value, values, isnull, typeInfo, normprocinfo, collation)) + return; - /* Check value */ - if (typeInfo->checkValue != NULL) - typeInfo->checkValue(DatumGetPointer(value)); - - /* Normalize if needed */ - normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); - if (normprocinfo != NULL) - { - if (!HnswCheckNorm(normprocinfo, collation, value)) - return; - - value = HnswNormValue(typeInfo, collation, value); - } - - HnswInsertTupleOnDisk(index, value, values, isnull, heap_tid, false); + HnswInsertTupleOnDisk(index, value, heaptid, false); } /* diff --git a/src/hnswutils.c b/src/hnswutils.c index 856c309..743fa87 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -394,6 +394,33 @@ HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, Bloc UnlockReleaseBuffer(buf); } +/* + * Form index value + */ +bool +HnswFormIndexValue(Datum *out, Datum *values, bool *isnull, const HnswTypeInfo * typeInfo, FmgrInfo *normprocinfo, Oid collation) +{ + /* Detoast once for all calls */ + Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* Check value */ + if (typeInfo->checkValue != NULL) + typeInfo->checkValue(DatumGetPointer(value)); + + /* Normalize if needed */ + if (normprocinfo != NULL) + { + if (!HnswCheckNorm(normprocinfo, collation, value)) + return false; + + value = HnswNormValue(typeInfo, collation, value); + } + + *out = value; + + return true; +} + /* * Set element tuple, except for neighbor info */ From a98534e5ab4735c4a33adb8ea63aef5b832ae5c0 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 9 Oct 2024 21:03:18 -0700 Subject: [PATCH 22/27] DRY HNSW procinfo --- src/hnsw.h | 1 + src/hnswbuild.c | 4 +--- src/hnswinsert.c | 6 ++++-- src/hnswscan.c | 4 +--- src/hnswutils.c | 13 +++++++++++++ src/hnswvacuum.c | 4 ++-- 6 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index f9e5621..10cc04a 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -370,6 +370,7 @@ typedef struct HnswVacuumState int HnswGetM(Relation index); int HnswGetEfConstruction(Relation index); FmgrInfo *HnswOptionalProcInfo(Relation index, uint16 procnum); +void HnswSetProcinfo(Relation index, FmgrInfo **procinfo, FmgrInfo **normprocinfo, Oid *collation); Datum HnswNormValue(const HnswTypeInfo * typeInfo, Oid collation, Datum value); bool HnswCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value); Buffer HnswNewBuffer(Relation index, ForkNumber forkNum); diff --git a/src/hnswbuild.c b/src/hnswbuild.c index 02e1749..12a2169 100644 --- a/src/hnswbuild.c +++ b/src/hnswbuild.c @@ -692,9 +692,7 @@ InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, Index buildstate->indtuples = 0; /* Get support functions */ - buildstate->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); - buildstate->normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); - buildstate->collation = index->rd_indcollation[0]; + HnswSetProcinfo(index, &buildstate->procinfo, &buildstate->normprocinfo, &buildstate->collation); InitGraph(&buildstate->graphData, NULL, (Size) maintenance_work_mem * 1024L); buildstate->graph = &buildstate->graphData; diff --git a/src/hnswinsert.c b/src/hnswinsert.c index 9c5b190..b916521 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -685,11 +685,13 @@ HnswInsertTupleOnDisk(Relation index, Datum value, ItemPointer heaptid, bool bui HnswElement element; int m; int efConstruction = HnswGetEfConstruction(index); - FmgrInfo *procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); - Oid collation = index->rd_indcollation[0]; + FmgrInfo *procinfo; + Oid collation; LOCKMODE lockmode = ShareLock; char *base = NULL; + HnswSetProcinfo(index, &procinfo, NULL, &collation); + /* * Get a shared lock. This allows vacuum to ensure no in-flight inserts * before repairing graph. Use a page lock so it does not interfere with diff --git a/src/hnswscan.c b/src/hnswscan.c index 30815af..88ecf68 100644 --- a/src/hnswscan.c +++ b/src/hnswscan.c @@ -86,9 +86,7 @@ hnswbeginscan(Relation index, int nkeys, int norderbys) ALLOCSET_DEFAULT_SIZES); /* Set support functions */ - so->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); - so->normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); - so->collation = index->rd_indcollation[0]; + HnswSetProcinfo(index, &so->procinfo, &so->normprocinfo, &so->collation); scan->opaque = so; diff --git a/src/hnswutils.c b/src/hnswutils.c index 743fa87..198e438 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -153,6 +153,19 @@ HnswOptionalProcInfo(Relation index, uint16 procnum) return index_getprocinfo(index, 1, procnum); } +/* + * Set procinfo + */ +void +HnswSetProcinfo(Relation index, FmgrInfo **procinfo, FmgrInfo **normprocinfo, Oid *collation) +{ + *procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); + *collation = index->rd_indcollation[0]; + + if (normprocinfo != NULL) + *normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); +} + /* * Normalize value */ diff --git a/src/hnswvacuum.c b/src/hnswvacuum.c index 67cc645..7931f85 100644 --- a/src/hnswvacuum.c +++ b/src/hnswvacuum.c @@ -573,13 +573,13 @@ InitVacuumState(HnswVacuumState * vacuumstate, IndexVacuumInfo *info, IndexBulkD vacuumstate->callback_state = callback_state; vacuumstate->efConstruction = HnswGetEfConstruction(index); vacuumstate->bas = GetAccessStrategy(BAS_BULKREAD); - vacuumstate->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); - vacuumstate->collation = index->rd_indcollation[0]; vacuumstate->ntup = palloc0(HNSW_TUPLE_ALLOC_SIZE); vacuumstate->tmpCtx = AllocSetContextCreate(CurrentMemoryContext, "Hnsw vacuum temporary context", ALLOCSET_DEFAULT_SIZES); + HnswSetProcinfo(index, &vacuumstate->procinfo, NULL, &vacuumstate->collation); + /* Get m from metapage */ HnswGetMetaPageInfo(index, &vacuumstate->m, NULL); From 17266ed409f6075569e886851a19f79a49606ed6 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 9 Oct 2024 21:49:32 -0700 Subject: [PATCH 23/27] Use inMemory for conditionals --- src/hnswutils.c | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/src/hnswutils.c b/src/hnswutils.c index 198e438..03c033e 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -598,9 +598,10 @@ HnswSearchCandidate * HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec) { HnswSearchCandidate *sc = palloc(sizeof(HnswSearchCandidate)); + bool inMemory = index == NULL; HnswPtrStore(base, sc->element, entryPoint); - if (index == NULL) + if (inMemory) sc->distance = GetElementDistance(base, entryPoint, q, procinfo, collation); else HnswLoadElement(entryPoint, &sc->distance, &q, index, procinfo, collation, loadVec, NULL); @@ -644,9 +645,9 @@ CompareFurthestCandidates(const pairingheap_node *a, const pairingheap_node *b, * Init visited */ static inline void -InitVisited(char *base, visited_hash * v, Relation index, int ef, int m) +InitVisited(char *base, visited_hash * v, bool inMemory, int ef, int m) { - if (index != NULL) + if (!inMemory) v->tids = tidhash_create(CurrentMemoryContext, ef * m * 2, NULL); else if (base != NULL) v->offsets = offsethash_create(CurrentMemoryContext, ef * m * 2, NULL); @@ -658,9 +659,9 @@ InitVisited(char *base, visited_hash * v, Relation index, int ef, int m) * Add to visited */ static inline void -AddToVisited(char *base, visited_hash * v, HnswElementPtr elementPtr, Relation index, bool *found) +AddToVisited(char *base, visited_hash * v, HnswElementPtr elementPtr, bool inMemory, bool *found) { - if (index != NULL) + if (!inMemory) { HnswElement element = HnswPtrAccess(base, elementPtr); ItemPointerData indextid; @@ -721,7 +722,7 @@ HnswLoadUnvisitedFromMemory(char *base, HnswElement element, HnswUnvisited * unv HnswCandidate *hc = &localNeighborhood->items[i]; bool found; - AddToVisited(base, v, hc->element, NULL, &found); + AddToVisited(base, v, hc->element, true, &found); if (!found) unvisited[(*unvisitedLength)++].element = HnswPtrAccess(base, hc->element); @@ -805,11 +806,12 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F int lm = HnswGetLayerM(m, lc); HnswUnvisited *unvisited = palloc(lm * sizeof(HnswUnvisited)); int unvisitedLength; + bool inMemory = index == NULL; - InitVisited(base, &v, index, ef, m); + InitVisited(base, &v, inMemory, ef, m); /* Create local memory for neighborhood if needed */ - if (index == NULL) + if (inMemory) { neighborhoodSize = HNSW_NEIGHBOR_ARRAY_SIZE(lm); localNeighborhood = palloc(neighborhoodSize); @@ -821,7 +823,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F HnswSearchCandidate *sc = (HnswSearchCandidate *) lfirst(lc2); bool found; - AddToVisited(base, &v, sc->element, index, &found); + AddToVisited(base, &v, sc->element, inMemory, &found); pairingheap_add(C, &sc->c_node); pairingheap_add(W, &sc->w_node); @@ -846,7 +848,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F cElement = HnswPtrAccess(base, c->element); - if (index == NULL) + if (inMemory) HnswLoadUnvisitedFromMemory(base, cElement, unvisited, &unvisitedLength, &v, lc, localNeighborhood, neighborhoodSize); else HnswLoadUnvisitedFromDisk(cElement, unvisited, &unvisitedLength, &v, index, m, lm, lc); @@ -860,7 +862,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F f = HnswGetSearchCandidate(w_node, pairingheap_first(W)); - if (index == NULL) + if (inMemory) { eElement = unvisited[i].element; eDistance = GetElementDistance(base, eElement, q, procinfo, collation); @@ -1222,9 +1224,10 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint int entryLevel; Datum q = HnswGetValue(base, element); HnswElement skipElement = existing ? element : NULL; + bool inMemory = index == NULL; /* Precompute hash */ - if (index == NULL) + if (inMemory) PrecomputeHash(base, element); /* No neighbors if no entry point */ @@ -1273,7 +1276,7 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint /* Elements being deleted or skipped can help with search */ /* but should be removed before selecting neighbors */ - if (index != NULL) + if (!inMemory) lw = RemoveElements(base, lw, skipElement); /* From 45a6eef9e0cd9298dd0800941e9c6b5eaf159d17 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 9 Oct 2024 21:52:10 -0700 Subject: [PATCH 24/27] Improved variable name [skip ci] --- src/hnswscan.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hnswscan.c b/src/hnswscan.c index 88ecf68..c8af889 100644 --- a/src/hnswscan.c +++ b/src/hnswscan.c @@ -166,8 +166,8 @@ hnswgettuple(IndexScanDesc scan, ScanDirection dir) while (list_length(so->w) > 0) { char *base = NULL; - HnswSearchCandidate *hc = llast(so->w); - HnswElement element = HnswPtrAccess(base, hc->element); + HnswSearchCandidate *sc = llast(so->w); + HnswElement element = HnswPtrAccess(base, sc->element); ItemPointer heaptid; /* Move to next element if no valid heap TIDs */ From 064db12de7d0ca1625a77152d2ad13304f003639 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 9 Oct 2024 21:59:21 -0700 Subject: [PATCH 25/27] Moved procinfo initialization for inserts [skip ci] --- src/hnsw.h | 2 +- src/hnswbuild.c | 8 +++++--- src/hnswinsert.c | 15 +++++++-------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index 10cc04a..364e03e 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -389,7 +389,7 @@ void HnswSetNeighborTuple(char *base, HnswNeighborTuple ntup, HnswElement e, in void HnswAddHeapTid(HnswElement element, ItemPointer heaptid); HnswNeighborArray *HnswInitNeighborArray(int lm, HnswAllocator * allocator); void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator * alloc); -bool HnswInsertTupleOnDisk(Relation index, Datum value, ItemPointer heaptid, bool building); +bool HnswInsertTupleOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Datum value, ItemPointer heaptid, bool building); void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting, bool building); void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec); void HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance); diff --git a/src/hnswbuild.c b/src/hnswbuild.c index 12a2169..6fab132 100644 --- a/src/hnswbuild.c +++ b/src/hnswbuild.c @@ -476,6 +476,8 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn HnswGraph *graph = buildstate->graph; HnswElement element; HnswAllocator *allocator = &buildstate->allocator; + FmgrInfo *procinfo = buildstate->procinfo; + Oid collation = buildstate->collation; Size valueSize; Pointer valuePtr; LWLock *flushLock = &graph->flushLock; @@ -483,7 +485,7 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn Datum value; /* Form index value */ - if (!HnswFormIndexValue(&value, values, isnull, buildstate->typeInfo, buildstate->normprocinfo, buildstate->collation)) + if (!HnswFormIndexValue(&value, values, isnull, buildstate->typeInfo, buildstate->normprocinfo, collation)) return false; /* Get datum size */ @@ -497,7 +499,7 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn { LWLockRelease(flushLock); - return HnswInsertTupleOnDisk(index, value, heaptid, true); + return HnswInsertTupleOnDisk(index, procinfo, collation, value, heaptid, true); } /* @@ -529,7 +531,7 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn LWLockRelease(flushLock); - return HnswInsertTupleOnDisk(index, value, heaptid, true); + return HnswInsertTupleOnDisk(index, procinfo, collation, value, heaptid, true); } /* Ok, we can proceed to allocate the element */ diff --git a/src/hnswinsert.c b/src/hnswinsert.c index b916521..45530b9 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -679,19 +679,15 @@ UpdateGraphOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement * Insert a tuple into the index */ bool -HnswInsertTupleOnDisk(Relation index, Datum value, ItemPointer heaptid, bool building) +HnswInsertTupleOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Datum value, ItemPointer heaptid, bool building) { HnswElement entryPoint; HnswElement element; int m; int efConstruction = HnswGetEfConstruction(index); - FmgrInfo *procinfo; - Oid collation; LOCKMODE lockmode = ShareLock; char *base = NULL; - HnswSetProcinfo(index, &procinfo, NULL, &collation); - /* * Get a shared lock. This allows vacuum to ensure no in-flight inserts * before repairing graph. Use a page lock so it does not interfere with @@ -740,14 +736,17 @@ HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid { Datum value; const HnswTypeInfo *typeInfo = HnswGetTypeInfo(index); - FmgrInfo *normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); - Oid collation = index->rd_indcollation[0]; + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + Oid collation; + + HnswSetProcinfo(index, &procinfo, &normprocinfo, &collation); /* Form index value */ if (!HnswFormIndexValue(&value, values, isnull, typeInfo, normprocinfo, collation)) return; - HnswInsertTupleOnDisk(index, value, heaptid, false); + HnswInsertTupleOnDisk(index, procinfo, collation, value, heaptid, false); } /* From 32ab27d72a36f4aced101a5d8f42e1fc78d42024 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 9 Oct 2024 23:10:26 -0700 Subject: [PATCH 26/27] Added HnswSupport struct for support functions --- src/hnsw.h | 38 ++++++++++++----------- src/hnswbuild.c | 26 ++++++++-------- src/hnswinsert.c | 34 ++++++++++---------- src/hnswscan.c | 15 +++++---- src/hnswutils.c | 80 +++++++++++++++++++++++------------------------- src/hnswvacuum.c | 14 ++++----- 6 files changed, 101 insertions(+), 106 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index 364e03e..bfc12f6 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -237,6 +237,13 @@ typedef struct HnswTypeInfo void (*checkValue) (Pointer v); } HnswTypeInfo; +typedef struct HnswSupport +{ + FmgrInfo *procinfo; + FmgrInfo *normprocinfo; + Oid collation; +} HnswSupport; + typedef struct HnswBuildState { /* Info */ @@ -256,9 +263,7 @@ typedef struct HnswBuildState double reltuples; /* Support functions */ - FmgrInfo *procinfo; - FmgrInfo *normprocinfo; - Oid collation; + HnswSupport support; /* Variables */ HnswGraph graphData; @@ -333,9 +338,7 @@ typedef struct HnswScanOpaqueData MemoryContext tmpCtx; /* Support functions */ - FmgrInfo *procinfo; - FmgrInfo *normprocinfo; - Oid collation; + HnswSupport support; } HnswScanOpaqueData; typedef HnswScanOpaqueData * HnswScanOpaque; @@ -353,8 +356,7 @@ typedef struct HnswVacuumState int efConstruction; /* Support functions */ - FmgrInfo *procinfo; - Oid collation; + HnswSupport support; /* Variables */ struct tidhash_hash *deleted; @@ -370,32 +372,32 @@ typedef struct HnswVacuumState int HnswGetM(Relation index); int HnswGetEfConstruction(Relation index); FmgrInfo *HnswOptionalProcInfo(Relation index, uint16 procnum); -void HnswSetProcinfo(Relation index, FmgrInfo **procinfo, FmgrInfo **normprocinfo, Oid *collation); +void HnswInitSupport(HnswSupport * support, Relation index); Datum HnswNormValue(const HnswTypeInfo * typeInfo, Oid collation, Datum value); -bool HnswCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value); +bool HnswCheckNorm(HnswSupport * support, Datum value); Buffer HnswNewBuffer(Relation index, ForkNumber forkNum); void HnswInitPage(Buffer buf, Page page); void HnswInit(void); -List *HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, int m, bool inserting, HnswElement skipElement); +List *HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, HnswSupport * support, int m, bool inserting, HnswElement skipElement); HnswElement HnswGetEntryPoint(Relation index); void HnswGetMetaPageInfo(Relation index, int *m, HnswElement * entryPoint); void *HnswAlloc(HnswAllocator * allocator, Size size); HnswElement HnswInitElement(char *base, ItemPointer tid, int m, double ml, int maxLevel, HnswAllocator * alloc); HnswElement HnswInitElementFromBlock(BlockNumber blkno, OffsetNumber offno); -void HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool existing); -HnswSearchCandidate *HnswEntryCandidate(char *base, HnswElement em, Datum q, Relation rel, FmgrInfo *procinfo, Oid collation, bool loadVec); +void HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint, Relation index, HnswSupport * support, int m, int efConstruction, bool existing); +HnswSearchCandidate *HnswEntryCandidate(char *base, HnswElement em, Datum q, Relation rel, HnswSupport * support, bool loadVec); void HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber insertPage, ForkNumber forkNum, bool building); void HnswSetNeighborTuple(char *base, HnswNeighborTuple ntup, HnswElement e, int m); void HnswAddHeapTid(HnswElement element, ItemPointer heaptid); HnswNeighborArray *HnswInitNeighborArray(int lm, HnswAllocator * allocator); void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator * alloc); -bool HnswInsertTupleOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Datum value, ItemPointer heaptid, bool building); -void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting, bool building); +bool HnswInsertTupleOnDisk(Relation index, HnswSupport * support, Datum value, ItemPointer heaptid, bool building); +void HnswUpdateNeighborsOnDisk(Relation index, HnswSupport * support, HnswElement e, int m, bool checkExisting, bool building); void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec); -void HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance); -bool HnswFormIndexValue(Datum *out, Datum *values, bool *isnull, const HnswTypeInfo * typeInfo, FmgrInfo *normprocinfo, Oid collation); +void HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, HnswSupport * support, bool loadVec, double *maxDistance); +bool HnswFormIndexValue(Datum *out, Datum *values, bool *isnull, const HnswTypeInfo * typeInfo, HnswSupport * support); void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element); -void HnswUpdateConnection(char *base, HnswNeighborArray * neighbors, HnswElement newElement, float distance, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation); +void HnswUpdateConnection(char *base, HnswNeighborArray * neighbors, HnswElement newElement, float distance, int lm, int *updateIdx, Relation index, HnswSupport * support); bool HnswLoadNeighborTids(HnswElement element, ItemPointerData *indextids, Relation index, int m, int lm, int lc); void HnswInitLockTranche(void); const HnswTypeInfo *HnswGetTypeInfo(Relation index); diff --git a/src/hnswbuild.c b/src/hnswbuild.c index 6fab132..b667478 100644 --- a/src/hnswbuild.c +++ b/src/hnswbuild.c @@ -366,7 +366,7 @@ AddElementInMemory(char *base, HnswGraph * graph, HnswElement element) * Update neighbors */ static void -UpdateNeighborsInMemory(char *base, FmgrInfo *procinfo, Oid collation, HnswElement e, int m) +UpdateNeighborsInMemory(char *base, HnswSupport * support, HnswElement e, int m) { for (int lc = e->level; lc >= 0; lc--) { @@ -388,7 +388,7 @@ UpdateNeighborsInMemory(char *base, FmgrInfo *procinfo, Oid collation, HnswEleme Assert(neighborElement); LWLockAcquire(&neighborElement->lock, LW_EXCLUSIVE); - HnswUpdateConnection(base, HnswGetNeighbors(base, neighborElement, lc), e, hc->distance, lm, NULL, NULL, procinfo, collation); + HnswUpdateConnection(base, HnswGetNeighbors(base, neighborElement, lc), e, hc->distance, lm, NULL, NULL, support); LWLockRelease(&neighborElement->lock); } } @@ -398,7 +398,7 @@ UpdateNeighborsInMemory(char *base, FmgrInfo *procinfo, Oid collation, HnswEleme * Update graph in memory */ static void -UpdateGraphInMemory(FmgrInfo *procinfo, Oid collation, HnswElement element, int m, int efConstruction, HnswElement entryPoint, HnswBuildState * buildstate) +UpdateGraphInMemory(HnswSupport * support, HnswElement element, int m, int efConstruction, HnswElement entryPoint, HnswBuildState * buildstate) { HnswGraph *graph = buildstate->graph; char *base = buildstate->hnswarea; @@ -411,7 +411,7 @@ UpdateGraphInMemory(FmgrInfo *procinfo, Oid collation, HnswElement element, int AddElementInMemory(base, graph, element); /* Update neighbors */ - UpdateNeighborsInMemory(base, procinfo, collation, element, m); + UpdateNeighborsInMemory(base, support, element, m); /* Update entry point if needed (already have lock) */ if (entryPoint == NULL || element->level > entryPoint->level) @@ -424,9 +424,8 @@ UpdateGraphInMemory(FmgrInfo *procinfo, Oid collation, HnswElement element, int static void InsertTupleInMemory(HnswBuildState * buildstate, HnswElement element) { - FmgrInfo *procinfo = buildstate->procinfo; - Oid collation = buildstate->collation; HnswGraph *graph = buildstate->graph; + HnswSupport *support = &buildstate->support; HnswElement entryPoint; LWLock *entryLock = &graph->entryLock; LWLock *entryWaitLock = &graph->entryWaitLock; @@ -458,10 +457,10 @@ InsertTupleInMemory(HnswBuildState * buildstate, HnswElement element) } /* Find neighbors for element */ - HnswFindElementNeighbors(base, element, entryPoint, NULL, procinfo, collation, m, efConstruction, false); + HnswFindElementNeighbors(base, element, entryPoint, NULL, support, m, efConstruction, false); /* Update graph in memory */ - UpdateGraphInMemory(procinfo, collation, element, m, efConstruction, entryPoint, buildstate); + UpdateGraphInMemory(support, element, m, efConstruction, entryPoint, buildstate); /* Release entry lock */ LWLockRelease(entryLock); @@ -476,8 +475,7 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn HnswGraph *graph = buildstate->graph; HnswElement element; HnswAllocator *allocator = &buildstate->allocator; - FmgrInfo *procinfo = buildstate->procinfo; - Oid collation = buildstate->collation; + HnswSupport *support = &buildstate->support; Size valueSize; Pointer valuePtr; LWLock *flushLock = &graph->flushLock; @@ -485,7 +483,7 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn Datum value; /* Form index value */ - if (!HnswFormIndexValue(&value, values, isnull, buildstate->typeInfo, buildstate->normprocinfo, collation)) + if (!HnswFormIndexValue(&value, values, isnull, buildstate->typeInfo, support)) return false; /* Get datum size */ @@ -499,7 +497,7 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn { LWLockRelease(flushLock); - return HnswInsertTupleOnDisk(index, procinfo, collation, value, heaptid, true); + return HnswInsertTupleOnDisk(index, support, value, heaptid, true); } /* @@ -531,7 +529,7 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn LWLockRelease(flushLock); - return HnswInsertTupleOnDisk(index, procinfo, collation, value, heaptid, true); + return HnswInsertTupleOnDisk(index, support, value, heaptid, true); } /* Ok, we can proceed to allocate the element */ @@ -694,7 +692,7 @@ InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, Index buildstate->indtuples = 0; /* Get support functions */ - HnswSetProcinfo(index, &buildstate->procinfo, &buildstate->normprocinfo, &buildstate->collation); + HnswInitSupport(&buildstate->support, index); InitGraph(&buildstate->graphData, NULL, (Size) maintenance_work_mem * 1024L); buildstate->graph = &buildstate->graphData; diff --git a/src/hnswinsert.c b/src/hnswinsert.c index 45530b9..87204ca 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -368,7 +368,7 @@ HnswLoadNeighbors(HnswElement element, Relation index, int m, int lm, int lc) * Load elements for insert */ static void -LoadElementsForInsert(HnswNeighborArray * neighbors, Datum q, int *idx, Relation index, FmgrInfo *procinfo, Oid collation) +LoadElementsForInsert(HnswNeighborArray * neighbors, Datum q, int *idx, Relation index, HnswSupport * support) { char *base = NULL; @@ -378,7 +378,7 @@ LoadElementsForInsert(HnswNeighborArray * neighbors, Datum q, int *idx, Relation HnswElement element = HnswPtrAccess(base, hc->element); double distance; - HnswLoadElement(element, &distance, &q, index, procinfo, collation, true, NULL); + HnswLoadElement(element, &distance, &q, index, support, true, NULL); hc->distance = distance; /* Prune element if being deleted */ @@ -394,7 +394,7 @@ LoadElementsForInsert(HnswNeighborArray * neighbors, Datum q, int *idx, Relation * Get update index */ static int -GetUpdateIndex(HnswElement element, HnswElement newElement, float distance, int m, int lm, int lc, Relation index, FmgrInfo *procinfo, Oid collation, MemoryContext updateCtx) +GetUpdateIndex(HnswElement element, HnswElement newElement, float distance, int m, int lm, int lc, Relation index, HnswSupport * support, MemoryContext updateCtx) { char *base = NULL; int idx = -1; @@ -421,10 +421,10 @@ GetUpdateIndex(HnswElement element, HnswElement newElement, float distance, int { Datum q = HnswGetValue(base, element); - LoadElementsForInsert(neighbors, q, &idx, index, procinfo, collation); + LoadElementsForInsert(neighbors, q, &idx, index, support); if (idx == -1) - HnswUpdateConnection(base, neighbors, newElement, distance, lm, &idx, index, procinfo, collation); + HnswUpdateConnection(base, neighbors, newElement, distance, lm, &idx, index, support); } MemoryContextSwitchTo(oldCtx); @@ -529,7 +529,7 @@ UpdateNeighborOnDisk(HnswElement element, HnswElement newElement, int idx, int m * Update neighbors */ void -HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting, bool building) +HnswUpdateNeighborsOnDisk(Relation index, HnswSupport * support, HnswElement e, int m, bool checkExisting, bool building) { char *base = NULL; @@ -552,7 +552,7 @@ HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Hns HnswElement neighborElement = HnswPtrAccess(base, hc->element); int idx; - idx = GetUpdateIndex(neighborElement, e, hc->distance, m, lm, lc, index, procinfo, collation, updateCtx); + idx = GetUpdateIndex(neighborElement, e, hc->distance, m, lm, lc, index, support, updateCtx); /* New element was not selected as a neighbor */ if (idx == -1) @@ -652,7 +652,7 @@ FindDuplicateOnDisk(Relation index, HnswElement element, bool building) * Update graph on disk */ static void -UpdateGraphOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement element, int m, int efConstruction, HnswElement entryPoint, bool building) +UpdateGraphOnDisk(Relation index, HnswSupport * support, HnswElement element, int m, int efConstruction, HnswElement entryPoint, bool building) { BlockNumber newInsertPage = InvalidBlockNumber; @@ -668,7 +668,7 @@ UpdateGraphOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement HnswUpdateMetaPage(index, 0, NULL, newInsertPage, MAIN_FORKNUM, building); /* Update neighbors */ - HnswUpdateNeighborsOnDisk(index, procinfo, collation, element, m, false, building); + HnswUpdateNeighborsOnDisk(index, support, element, m, false, building); /* Update entry point if needed */ if (entryPoint == NULL || element->level > entryPoint->level) @@ -679,7 +679,7 @@ UpdateGraphOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement * Insert a tuple into the index */ bool -HnswInsertTupleOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Datum value, ItemPointer heaptid, bool building) +HnswInsertTupleOnDisk(Relation index, HnswSupport * support, Datum value, ItemPointer heaptid, bool building) { HnswElement entryPoint; HnswElement element; @@ -717,10 +717,10 @@ HnswInsertTupleOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, Datum v } /* Find neighbors for element */ - HnswFindElementNeighbors(base, element, entryPoint, index, procinfo, collation, m, efConstruction, false); + HnswFindElementNeighbors(base, element, entryPoint, index, support, m, efConstruction, false); /* Update graph on disk */ - UpdateGraphOnDisk(index, procinfo, collation, element, m, efConstruction, entryPoint, building); + UpdateGraphOnDisk(index, support, element, m, efConstruction, entryPoint, building); /* Release lock */ UnlockPage(index, HNSW_UPDATE_LOCK, lockmode); @@ -736,17 +736,15 @@ HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid { Datum value; const HnswTypeInfo *typeInfo = HnswGetTypeInfo(index); - FmgrInfo *procinfo; - FmgrInfo *normprocinfo; - Oid collation; + HnswSupport support; - HnswSetProcinfo(index, &procinfo, &normprocinfo, &collation); + HnswInitSupport(&support, index); /* Form index value */ - if (!HnswFormIndexValue(&value, values, isnull, typeInfo, normprocinfo, collation)) + if (!HnswFormIndexValue(&value, values, isnull, typeInfo, &support)) return; - HnswInsertTupleOnDisk(index, procinfo, collation, value, heaptid, false); + HnswInsertTupleOnDisk(index, &support, value, heaptid, false); } /* diff --git a/src/hnswscan.c b/src/hnswscan.c index c8af889..e3aaced 100644 --- a/src/hnswscan.c +++ b/src/hnswscan.c @@ -15,8 +15,7 @@ GetScanItems(IndexScanDesc scan, Datum q) { HnswScanOpaque so = (HnswScanOpaque) scan->opaque; Relation index = scan->indexRelation; - FmgrInfo *procinfo = so->procinfo; - Oid collation = so->collation; + HnswSupport *support = &so->support; List *ep; List *w; int m; @@ -29,15 +28,15 @@ GetScanItems(IndexScanDesc scan, Datum q) if (entryPoint == NULL) return NIL; - ep = list_make1(HnswEntryCandidate(base, entryPoint, q, index, procinfo, collation, false)); + ep = list_make1(HnswEntryCandidate(base, entryPoint, q, index, support, false)); for (int lc = entryPoint->level; lc >= 1; lc--) { - w = HnswSearchLayer(base, q, ep, 1, lc, index, procinfo, collation, m, false, NULL); + w = HnswSearchLayer(base, q, ep, 1, lc, index, support, m, false, NULL); ep = w; } - return HnswSearchLayer(base, q, ep, hnsw_ef_search, 0, index, procinfo, collation, m, false, NULL); + return HnswSearchLayer(base, q, ep, hnsw_ef_search, 0, index, support, m, false, NULL); } /* @@ -60,8 +59,8 @@ GetScanValue(IndexScanDesc scan) Assert(!VARATT_IS_EXTENDED(DatumGetPointer(value))); /* Normalize if needed */ - if (so->normprocinfo != NULL) - value = HnswNormValue(so->typeInfo, so->collation, value); + if (so->support.normprocinfo != NULL) + value = HnswNormValue(so->typeInfo, so->support.collation, value); } return value; @@ -86,7 +85,7 @@ hnswbeginscan(Relation index, int nkeys, int norderbys) ALLOCSET_DEFAULT_SIZES); /* Set support functions */ - HnswSetProcinfo(index, &so->procinfo, &so->normprocinfo, &so->collation); + HnswInitSupport(&so->support, index); scan->opaque = so; diff --git a/src/hnswutils.c b/src/hnswutils.c index 03c033e..7fa0720 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -154,16 +154,14 @@ HnswOptionalProcInfo(Relation index, uint16 procnum) } /* - * Set procinfo + * Init support functions */ void -HnswSetProcinfo(Relation index, FmgrInfo **procinfo, FmgrInfo **normprocinfo, Oid *collation) +HnswInitSupport(HnswSupport * support, Relation index) { - *procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); - *collation = index->rd_indcollation[0]; - - if (normprocinfo != NULL) - *normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); + support->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); + support->collation = index->rd_indcollation[0]; + support->normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); } /* @@ -179,9 +177,9 @@ HnswNormValue(const HnswTypeInfo * typeInfo, Oid collation, Datum value) * Check if non-zero norm */ bool -HnswCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value) +HnswCheckNorm(HnswSupport * support, Datum value) { - return DatumGetFloat8(FunctionCall1Coll(procinfo, collation, value)) > 0; + return DatumGetFloat8(FunctionCall1Coll(support->normprocinfo, support->collation, value)) > 0; } /* @@ -411,7 +409,7 @@ HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, Bloc * Form index value */ bool -HnswFormIndexValue(Datum *out, Datum *values, bool *isnull, const HnswTypeInfo * typeInfo, FmgrInfo *normprocinfo, Oid collation) +HnswFormIndexValue(Datum *out, Datum *values, bool *isnull, const HnswTypeInfo * typeInfo, HnswSupport * support) { /* Detoast once for all calls */ Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); @@ -421,12 +419,12 @@ HnswFormIndexValue(Datum *out, Datum *values, bool *isnull, const HnswTypeInfo * typeInfo->checkValue(DatumGetPointer(value)); /* Normalize if needed */ - if (normprocinfo != NULL) + if (support->normprocinfo != NULL) { - if (!HnswCheckNorm(normprocinfo, collation, value)) + if (!HnswCheckNorm(support, value)) return false; - value = HnswNormValue(typeInfo, collation, value); + value = HnswNormValue(typeInfo, support->collation, value); } *out = value; @@ -526,16 +524,16 @@ HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHe * Calculate the distance between values */ static inline double -HnswGetDistance(Datum a, Datum b, FmgrInfo *procinfo, Oid collation) +HnswGetDistance(Datum a, Datum b, HnswSupport * support) { - return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, a, b)); + return DatumGetFloat8(FunctionCall2Coll(support->procinfo, support->collation, a, b)); } /* * Load an element and optionally get its distance from q */ static void -HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance, HnswElement * element) +HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, double *distance, Datum *q, Relation index, HnswSupport * support, bool loadVec, double *maxDistance, HnswElement * element) { Buffer buf; Page page; @@ -556,7 +554,7 @@ HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, double *distance, Dat if (DatumGetPointer(*q) == NULL) *distance = 0; else - *distance = HnswGetDistance(*q, PointerGetDatum(&etup->data), procinfo, collation); + *distance = HnswGetDistance(*q, PointerGetDatum(&etup->data), support); } /* Load element */ @@ -575,36 +573,36 @@ HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, double *distance, Dat * Load an element and optionally get its distance from q */ void -HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance) +HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, HnswSupport * support, bool loadVec, double *maxDistance) { - HnswLoadElementImpl(element->blkno, element->offno, distance, q, index, procinfo, collation, loadVec, maxDistance, &element); + HnswLoadElementImpl(element->blkno, element->offno, distance, q, index, support, loadVec, maxDistance, &element); } /* * Get the distance for an element */ static double -GetElementDistance(char *base, HnswElement element, Datum q, FmgrInfo *procinfo, Oid collation) +GetElementDistance(char *base, HnswElement element, Datum q, HnswSupport * support) { Datum value = HnswGetValue(base, element); - return HnswGetDistance(q, value, procinfo, collation); + return HnswGetDistance(q, value, support); } /* * Create a candidate for the entry point */ HnswSearchCandidate * -HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec) +HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, HnswSupport * support, bool loadVec) { HnswSearchCandidate *sc = palloc(sizeof(HnswSearchCandidate)); bool inMemory = index == NULL; HnswPtrStore(base, sc->element, entryPoint); if (inMemory) - sc->distance = GetElementDistance(base, entryPoint, q, procinfo, collation); + sc->distance = GetElementDistance(base, entryPoint, q, support); else - HnswLoadElement(entryPoint, &sc->distance, &q, index, procinfo, collation, loadVec, NULL); + HnswLoadElement(entryPoint, &sc->distance, &q, index, support, loadVec, NULL); return sc; } @@ -793,7 +791,7 @@ HnswLoadUnvisitedFromDisk(HnswElement element, HnswUnvisited * unvisited, int *u * Algorithm 2 from paper */ List * -HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, int m, bool inserting, HnswElement skipElement) +HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, HnswSupport * support, int m, bool inserting, HnswElement skipElement) { List *w = NIL; pairingheap *C = pairingheap_allocate(CompareNearestCandidates, NULL); @@ -865,7 +863,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F if (inMemory) { eElement = unvisited[i].element; - eDistance = GetElementDistance(base, eElement, q, procinfo, collation); + eDistance = GetElementDistance(base, eElement, q, support); } else { @@ -875,7 +873,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F /* Avoid any allocations if not adding */ eElement = NULL; - HnswLoadElementImpl(blkno, offno, &eDistance, &q, index, procinfo, collation, inserting, alwaysAdd ? NULL : &f->distance, &eElement); + HnswLoadElementImpl(blkno, offno, &eDistance, &q, index, support, inserting, alwaysAdd ? NULL : &f->distance, &eElement); if (eElement == NULL) continue; @@ -976,7 +974,7 @@ CompareCandidateDistancesOffset(const ListCell *a, const ListCell *b) * Check if an element is closer to q than any element from R */ static bool -CheckElementCloser(char *base, HnswCandidate * e, List *r, FmgrInfo *procinfo, Oid collation) +CheckElementCloser(char *base, HnswCandidate * e, List *r, HnswSupport * support) { HnswElement eElement = HnswPtrAccess(base, e->element); Datum eValue = HnswGetValue(base, eElement); @@ -987,7 +985,7 @@ CheckElementCloser(char *base, HnswCandidate * e, List *r, FmgrInfo *procinfo, O HnswCandidate *ri = lfirst(lc2); HnswElement riElement = HnswPtrAccess(base, ri->element); Datum riValue = HnswGetValue(base, riElement); - float distance = HnswGetDistance(eValue, riValue, procinfo, collation); + float distance = HnswGetDistance(eValue, riValue, support); if (distance <= e->distance) return false; @@ -1000,7 +998,7 @@ CheckElementCloser(char *base, HnswCandidate * e, List *r, FmgrInfo *procinfo, O * Algorithm 4 from paper */ static List * -SelectNeighbors(char *base, List *c, int lm, FmgrInfo *procinfo, Oid collation, bool *closerSet, HnswCandidate * newCandidate, HnswCandidate * *pruned, bool sortCandidates) +SelectNeighbors(char *base, List *c, int lm, HnswSupport * support, bool *closerSet, HnswCandidate * newCandidate, HnswCandidate * *pruned, bool sortCandidates) { List *r = NIL; List *w = list_copy(c); @@ -1034,7 +1032,7 @@ SelectNeighbors(char *base, List *c, int lm, FmgrInfo *procinfo, Oid collation, /* Use previous state of r and wd to skip work when possible */ if (mustCalculate) - e->closer = CheckElementCloser(base, e, r, procinfo, collation); + e->closer = CheckElementCloser(base, e, r, support); else if (list_length(added) > 0) { /* Keep Valgrind happy for in-memory, parallel builds */ @@ -1047,7 +1045,7 @@ SelectNeighbors(char *base, List *c, int lm, FmgrInfo *procinfo, Oid collation, */ if (e->closer) { - e->closer = CheckElementCloser(base, e, added, procinfo, collation); + e->closer = CheckElementCloser(base, e, added, support); if (!e->closer) removedAny = true; @@ -1060,7 +1058,7 @@ SelectNeighbors(char *base, List *c, int lm, FmgrInfo *procinfo, Oid collation, */ if (removedAny) { - e->closer = CheckElementCloser(base, e, r, procinfo, collation); + e->closer = CheckElementCloser(base, e, r, support); if (e->closer) added = lappend(added, e); } @@ -1068,7 +1066,7 @@ SelectNeighbors(char *base, List *c, int lm, FmgrInfo *procinfo, Oid collation, } else if (e == newCandidate) { - e->closer = CheckElementCloser(base, e, r, procinfo, collation); + e->closer = CheckElementCloser(base, e, r, support); if (e->closer) added = lappend(added, e); } @@ -1119,7 +1117,7 @@ AddConnections(char *base, HnswElement element, List *neighbors, int lc) * Update connections */ void -HnswUpdateConnection(char *base, HnswNeighborArray * neighbors, HnswElement newElement, float distance, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation) +HnswUpdateConnection(char *base, HnswNeighborArray * neighbors, HnswElement newElement, float distance, int lm, int *updateIdx, Relation index, HnswSupport * support) { HnswCandidate newHc; @@ -1145,7 +1143,7 @@ HnswUpdateConnection(char *base, HnswNeighborArray * neighbors, HnswElement newE c = lappend(c, &neighbors->items[i]); c = lappend(c, &newHc); - SelectNeighbors(base, c, lm, procinfo, collation, &neighbors->closerSet, &newHc, &pruned, true); + SelectNeighbors(base, c, lm, support, &neighbors->closerSet, &newHc, &pruned, true); /* Should not happen */ if (pruned == NULL) @@ -1216,7 +1214,7 @@ PrecomputeHash(char *base, HnswElement element) * Algorithm 1 from paper */ void -HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool existing) +HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint, Relation index, HnswSupport * support, int m, int efConstruction, bool existing) { List *ep; List *w; @@ -1235,13 +1233,13 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint return; /* Get entry point and level */ - ep = list_make1(HnswEntryCandidate(base, entryPoint, q, index, procinfo, collation, true)); + ep = list_make1(HnswEntryCandidate(base, entryPoint, q, index, support, true)); entryLevel = entryPoint->level; /* 1st phase: greedy search to insert level */ for (int lc = entryLevel; lc >= level + 1; lc--) { - w = HnswSearchLayer(base, q, ep, 1, lc, index, procinfo, collation, m, true, skipElement); + w = HnswSearchLayer(base, q, ep, 1, lc, index, support, m, true, skipElement); ep = w; } @@ -1260,7 +1258,7 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint List *lw = NIL; ListCell *lc2; - w = HnswSearchLayer(base, q, ep, efConstruction, lc, index, procinfo, collation, m, true, skipElement); + w = HnswSearchLayer(base, q, ep, efConstruction, lc, index, support, m, true, skipElement); /* Convert search candidates to candidates */ foreach(lc2, w) @@ -1284,7 +1282,7 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint * sortCandidates to true for in-memory builds to enable closer * caching, but there does not seem to be a difference in performance. */ - neighbors = SelectNeighbors(base, lw, lm, procinfo, collation, &HnswGetNeighbors(base, element, lc)->closerSet, NULL, NULL, false); + neighbors = SelectNeighbors(base, lw, lm, support, &HnswGetNeighbors(base, element, lc)->closerSet, NULL, NULL, false); AddConnections(base, element, neighbors, lc); diff --git a/src/hnswvacuum.c b/src/hnswvacuum.c index 7931f85..d3cdf68 100644 --- a/src/hnswvacuum.c +++ b/src/hnswvacuum.c @@ -184,13 +184,12 @@ static void RepairGraphElement(HnswVacuumState * vacuumstate, HnswElement element, HnswElement entryPoint) { Relation index = vacuumstate->index; + HnswSupport *support = &vacuumstate->support; Buffer buf; Page page; GenericXLogState *state; int m = vacuumstate->m; int efConstruction = vacuumstate->efConstruction; - FmgrInfo *procinfo = vacuumstate->procinfo; - Oid collation = vacuumstate->collation; BufferAccessStrategy bas = vacuumstate->bas; HnswNeighborTuple ntup = vacuumstate->ntup; Size ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, m); @@ -205,7 +204,7 @@ RepairGraphElement(HnswVacuumState * vacuumstate, HnswElement element, HnswEleme element->heaptidsLength = 0; /* Find neighbors for element, skipping itself */ - HnswFindElementNeighbors(base, element, entryPoint, index, procinfo, collation, m, efConstruction, true); + HnswFindElementNeighbors(base, element, entryPoint, index, support, m, efConstruction, true); /* Zero memory for each element */ MemSet(ntup, 0, HNSW_TUPLE_ALLOC_SIZE); @@ -229,7 +228,7 @@ RepairGraphElement(HnswVacuumState * vacuumstate, HnswElement element, HnswEleme UnlockReleaseBuffer(buf); /* Update neighbors */ - HnswUpdateNeighborsOnDisk(index, procinfo, collation, element, m, true, false); + HnswUpdateNeighborsOnDisk(index, support, element, m, true, false); } /* @@ -239,6 +238,7 @@ static void RepairGraphEntryPoint(HnswVacuumState * vacuumstate) { Relation index = vacuumstate->index; + HnswSupport *support = &vacuumstate->support; HnswElement highestPoint = &vacuumstate->highestPoint; HnswElement entryPoint; MemoryContext oldCtx = MemoryContextSwitchTo(vacuumstate->tmpCtx); @@ -256,7 +256,7 @@ RepairGraphEntryPoint(HnswVacuumState * vacuumstate) LockPage(index, HNSW_UPDATE_LOCK, ShareLock); /* Load element */ - HnswLoadElement(highestPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true, NULL); + HnswLoadElement(highestPoint, NULL, NULL, index, support, true, NULL); /* Repair if needed */ if (NeedsUpdated(vacuumstate, highestPoint)) @@ -294,7 +294,7 @@ RepairGraphEntryPoint(HnswVacuumState * vacuumstate) * is outdated, this can remove connections at higher levels in * the graph until they are repaired, but this should be fine. */ - HnswLoadElement(entryPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true, NULL); + HnswLoadElement(entryPoint, NULL, NULL, index, support, true, NULL); if (NeedsUpdated(vacuumstate, entryPoint)) { @@ -578,7 +578,7 @@ InitVacuumState(HnswVacuumState * vacuumstate, IndexVacuumInfo *info, IndexBulkD "Hnsw vacuum temporary context", ALLOCSET_DEFAULT_SIZES); - HnswSetProcinfo(index, &vacuumstate->procinfo, NULL, &vacuumstate->collation); + HnswInitSupport(&vacuumstate->support, index); /* Get m from metapage */ HnswGetMetaPageInfo(index, &vacuumstate->m, NULL); From fa6782985ab9fd6c3f289196b7cf3053c1d46eef Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 9 Oct 2024 23:45:47 -0700 Subject: [PATCH 27/27] Added HnswQuery struct for query data --- src/hnsw.h | 11 ++++++++--- src/hnswinsert.c | 10 ++++++---- src/hnswscan.c | 11 +++++++---- src/hnswutils.c | 30 ++++++++++++++++-------------- 4 files changed, 37 insertions(+), 25 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index bfc12f6..e034068 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -244,6 +244,11 @@ typedef struct HnswSupport Oid collation; } HnswSupport; +typedef struct HnswQuery +{ + Datum value; +} HnswQuery; + typedef struct HnswBuildState { /* Info */ @@ -378,14 +383,14 @@ bool HnswCheckNorm(HnswSupport * support, Datum value); Buffer HnswNewBuffer(Relation index, ForkNumber forkNum); void HnswInitPage(Buffer buf, Page page); void HnswInit(void); -List *HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, HnswSupport * support, int m, bool inserting, HnswElement skipElement); +List *HnswSearchLayer(char *base, HnswQuery * q, List *ep, int ef, int lc, Relation index, HnswSupport * support, int m, bool inserting, HnswElement skipElement); HnswElement HnswGetEntryPoint(Relation index); void HnswGetMetaPageInfo(Relation index, int *m, HnswElement * entryPoint); void *HnswAlloc(HnswAllocator * allocator, Size size); HnswElement HnswInitElement(char *base, ItemPointer tid, int m, double ml, int maxLevel, HnswAllocator * alloc); HnswElement HnswInitElementFromBlock(BlockNumber blkno, OffsetNumber offno); void HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint, Relation index, HnswSupport * support, int m, int efConstruction, bool existing); -HnswSearchCandidate *HnswEntryCandidate(char *base, HnswElement em, Datum q, Relation rel, HnswSupport * support, bool loadVec); +HnswSearchCandidate *HnswEntryCandidate(char *base, HnswElement em, HnswQuery * q, Relation rel, HnswSupport * support, bool loadVec); void HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, BlockNumber insertPage, ForkNumber forkNum, bool building); void HnswSetNeighborTuple(char *base, HnswNeighborTuple ntup, HnswElement e, int m); void HnswAddHeapTid(HnswElement element, ItemPointer heaptid); @@ -394,7 +399,7 @@ void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator * bool HnswInsertTupleOnDisk(Relation index, HnswSupport * support, Datum value, ItemPointer heaptid, bool building); void HnswUpdateNeighborsOnDisk(Relation index, HnswSupport * support, HnswElement e, int m, bool checkExisting, bool building); void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec); -void HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, HnswSupport * support, bool loadVec, double *maxDistance); +void HnswLoadElement(HnswElement element, double *distance, HnswQuery * q, Relation index, HnswSupport * support, bool loadVec, double *maxDistance); bool HnswFormIndexValue(Datum *out, Datum *values, bool *isnull, const HnswTypeInfo * typeInfo, HnswSupport * support); void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element); void HnswUpdateConnection(char *base, HnswNeighborArray * neighbors, HnswElement newElement, float distance, int lm, int *updateIdx, Relation index, HnswSupport * support); diff --git a/src/hnswinsert.c b/src/hnswinsert.c index 87204ca..84eb1d4 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -368,7 +368,7 @@ HnswLoadNeighbors(HnswElement element, Relation index, int m, int lm, int lc) * Load elements for insert */ static void -LoadElementsForInsert(HnswNeighborArray * neighbors, Datum q, int *idx, Relation index, HnswSupport * support) +LoadElementsForInsert(HnswNeighborArray * neighbors, HnswQuery * q, int *idx, Relation index, HnswSupport * support) { char *base = NULL; @@ -378,7 +378,7 @@ LoadElementsForInsert(HnswNeighborArray * neighbors, Datum q, int *idx, Relation HnswElement element = HnswPtrAccess(base, hc->element); double distance; - HnswLoadElement(element, &distance, &q, index, support, true, NULL); + HnswLoadElement(element, &distance, q, index, support, true, NULL); hc->distance = distance; /* Prune element if being deleted */ @@ -419,9 +419,11 @@ GetUpdateIndex(HnswElement element, HnswElement newElement, float distance, int idx = -2; else { - Datum q = HnswGetValue(base, element); + HnswQuery q; - LoadElementsForInsert(neighbors, q, &idx, index, support); + q.value = HnswGetValue(base, element); + + LoadElementsForInsert(neighbors, &q, &idx, index, support); if (idx == -1) HnswUpdateConnection(base, neighbors, newElement, distance, lm, &idx, index, support); diff --git a/src/hnswscan.c b/src/hnswscan.c index e3aaced..2c6a454 100644 --- a/src/hnswscan.c +++ b/src/hnswscan.c @@ -11,7 +11,7 @@ * Algorithm 5 from paper */ static List * -GetScanItems(IndexScanDesc scan, Datum q) +GetScanItems(IndexScanDesc scan, Datum value) { HnswScanOpaque so = (HnswScanOpaque) scan->opaque; Relation index = scan->indexRelation; @@ -21,6 +21,9 @@ GetScanItems(IndexScanDesc scan, Datum q) int m; HnswElement entryPoint; char *base = NULL; + HnswQuery q; + + q.value = value; /* Get m and entry point */ HnswGetMetaPageInfo(index, &m, &entryPoint); @@ -28,15 +31,15 @@ GetScanItems(IndexScanDesc scan, Datum q) if (entryPoint == NULL) return NIL; - ep = list_make1(HnswEntryCandidate(base, entryPoint, q, index, support, false)); + ep = list_make1(HnswEntryCandidate(base, entryPoint, &q, index, support, false)); for (int lc = entryPoint->level; lc >= 1; lc--) { - w = HnswSearchLayer(base, q, ep, 1, lc, index, support, m, false, NULL); + w = HnswSearchLayer(base, &q, ep, 1, lc, index, support, m, false, NULL); ep = w; } - return HnswSearchLayer(base, q, ep, hnsw_ef_search, 0, index, support, m, false, NULL); + return HnswSearchLayer(base, &q, ep, hnsw_ef_search, 0, index, support, m, false, NULL); } /* diff --git a/src/hnswutils.c b/src/hnswutils.c index 7fa0720..fe2b16e 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -533,7 +533,7 @@ HnswGetDistance(Datum a, Datum b, HnswSupport * support) * Load an element and optionally get its distance from q */ static void -HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, double *distance, Datum *q, Relation index, HnswSupport * support, bool loadVec, double *maxDistance, HnswElement * element) +HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, double *distance, HnswQuery * q, Relation index, HnswSupport * support, bool loadVec, double *maxDistance, HnswElement * element) { Buffer buf; Page page; @@ -551,10 +551,10 @@ HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, double *distance, Dat /* Calculate distance */ if (distance != NULL) { - if (DatumGetPointer(*q) == NULL) + if (DatumGetPointer(q->value) == NULL) *distance = 0; else - *distance = HnswGetDistance(*q, PointerGetDatum(&etup->data), support); + *distance = HnswGetDistance(q->value, PointerGetDatum(&etup->data), support); } /* Load element */ @@ -573,7 +573,7 @@ HnswLoadElementImpl(BlockNumber blkno, OffsetNumber offno, double *distance, Dat * Load an element and optionally get its distance from q */ void -HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, HnswSupport * support, bool loadVec, double *maxDistance) +HnswLoadElement(HnswElement element, double *distance, HnswQuery * q, Relation index, HnswSupport * support, bool loadVec, double *maxDistance) { HnswLoadElementImpl(element->blkno, element->offno, distance, q, index, support, loadVec, maxDistance, &element); } @@ -582,18 +582,18 @@ HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, * Get the distance for an element */ static double -GetElementDistance(char *base, HnswElement element, Datum q, HnswSupport * support) +GetElementDistance(char *base, HnswElement element, HnswQuery * q, HnswSupport * support) { Datum value = HnswGetValue(base, element); - return HnswGetDistance(q, value, support); + return HnswGetDistance(q->value, value, support); } /* * Create a candidate for the entry point */ HnswSearchCandidate * -HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, HnswSupport * support, bool loadVec) +HnswEntryCandidate(char *base, HnswElement entryPoint, HnswQuery * q, Relation index, HnswSupport * support, bool loadVec) { HnswSearchCandidate *sc = palloc(sizeof(HnswSearchCandidate)); bool inMemory = index == NULL; @@ -602,7 +602,7 @@ HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, if (inMemory) sc->distance = GetElementDistance(base, entryPoint, q, support); else - HnswLoadElement(entryPoint, &sc->distance, &q, index, support, loadVec, NULL); + HnswLoadElement(entryPoint, &sc->distance, q, index, support, loadVec, NULL); return sc; } @@ -791,7 +791,7 @@ HnswLoadUnvisitedFromDisk(HnswElement element, HnswUnvisited * unvisited, int *u * Algorithm 2 from paper */ List * -HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, HnswSupport * support, int m, bool inserting, HnswElement skipElement) +HnswSearchLayer(char *base, HnswQuery * q, List *ep, int ef, int lc, Relation index, HnswSupport * support, int m, bool inserting, HnswElement skipElement) { List *w = NIL; pairingheap *C = pairingheap_allocate(CompareNearestCandidates, NULL); @@ -873,7 +873,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, H /* Avoid any allocations if not adding */ eElement = NULL; - HnswLoadElementImpl(blkno, offno, &eDistance, &q, index, support, inserting, alwaysAdd ? NULL : &f->distance, &eElement); + HnswLoadElementImpl(blkno, offno, &eDistance, q, index, support, inserting, alwaysAdd ? NULL : &f->distance, &eElement); if (eElement == NULL) continue; @@ -1220,10 +1220,12 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint List *w; int level = element->level; int entryLevel; - Datum q = HnswGetValue(base, element); + HnswQuery q; HnswElement skipElement = existing ? element : NULL; bool inMemory = index == NULL; + q.value = HnswGetValue(base, element); + /* Precompute hash */ if (inMemory) PrecomputeHash(base, element); @@ -1233,13 +1235,13 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint return; /* Get entry point and level */ - ep = list_make1(HnswEntryCandidate(base, entryPoint, q, index, support, true)); + ep = list_make1(HnswEntryCandidate(base, entryPoint, &q, index, support, true)); entryLevel = entryPoint->level; /* 1st phase: greedy search to insert level */ for (int lc = entryLevel; lc >= level + 1; lc--) { - w = HnswSearchLayer(base, q, ep, 1, lc, index, support, m, true, skipElement); + w = HnswSearchLayer(base, &q, ep, 1, lc, index, support, m, true, skipElement); ep = w; } @@ -1258,7 +1260,7 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint List *lw = NIL; ListCell *lc2; - w = HnswSearchLayer(base, q, ep, efConstruction, lc, index, support, m, true, skipElement); + w = HnswSearchLayer(base, &q, ep, efConstruction, lc, index, support, m, true, skipElement); /* Convert search candidates to candidates */ foreach(lc2, w)