From 7412ee6cee6ce008eb62f3d3f76e23e09fcdd3a0 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 22 Sep 2024 00:00:02 -0700 Subject: [PATCH] Use smaller batch size for better performance --- src/hnsw.h | 7 +++++-- src/hnswscan.c | 48 +++++++++++++++++++----------------------------- src/hnswutils.c | 30 +++++++++++++++++++++++------- 3 files changed, 47 insertions(+), 38 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index 442a9ea..e6bfde5 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -185,6 +185,9 @@ typedef struct HnswSearchCandidate float distance; } HnswSearchCandidate; +#define HnswGetSearchCandidate(membername, ptr) pairingheap_container(HnswSearchCandidate, membername, ptr) +#define HnswGetSearchCandidateConst(membername, ptr) pairingheap_const_container(HnswSearchCandidate, membername, ptr) + /* HNSW index options */ typedef struct HnswOptions { @@ -360,7 +363,7 @@ typedef struct HnswScanOpaqueData bool first; List *w; visited_hash v; - List *discarded; + pairingheap *discarded; Datum q; int m; int64 tuples; @@ -409,7 +412,7 @@ bool HnswCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value); Buffer HnswNewBuffer(Relation index, ForkNumber forkNum); void HnswInitPage(Buffer buf, Page page); void HnswInit(void); -List *HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, int m, bool inserting, HnswElement skipElement, visited_hash * v, List **discarded, bool initVisited); +List *HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, int m, bool inserting, HnswElement skipElement, visited_hash * v, pairingheap **discarded, bool initVisited); HnswElement HnswGetEntryPoint(Relation index); void HnswGetMetaPageInfo(Relation index, int *m, HnswElement * entryPoint); void *HnswAlloc(HnswAllocator * allocator, Size size); diff --git a/src/hnswscan.c b/src/hnswscan.c index 926e872..b7e05a6 100644 --- a/src/hnswscan.c +++ b/src/hnswscan.c @@ -53,14 +53,24 @@ ResumeScanItems(IndexScanDesc scan) Relation index = scan->indexRelation; FmgrInfo *procinfo = so->procinfo; Oid collation = so->collation; - List *ep; + List *ep = NIL; char *base = NULL; - if (list_length(so->discarded) == 0) + if (pairingheap_is_empty(so->discarded)) return NIL; - ep = so->discarded; - so->discarded = NIL; + for (int i = 0; i < hnsw_ef_search; i++) + { + HnswSearchCandidate *hc; + + if (pairingheap_is_empty(so->discarded)) + break; + + hc = HnswGetSearchCandidate(w_node, pairingheap_remove_first(so->discarded)); + + ep = lappend(ep, hc); + } + return HnswSearchLayer(base, so->q, ep, hnsw_ef_search, 0, index, procinfo, collation, so->m, false, NULL, &so->v, &so->discarded, false); } @@ -128,9 +138,11 @@ hnswrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int no HnswScanOpaque so = (HnswScanOpaque) scan->opaque; if (!so->first) + { + pairingheap_reset(so->discarded); tidhash_reset(so->v.tids); + } so->first = true; - so->discarded = NIL; so->tuples = 0; MemoryContextReset(so->tmpCtx); @@ -141,24 +153,6 @@ hnswrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int no memmove(scan->orderByData, orderbys, scan->numberOfOrderBys * sizeof(ScanKeyData)); } -/* - * Compare search candidate distances - */ -static int -CompareSearchCandidateDistances(const ListCell *a, const ListCell *b) -{ - HnswSearchCandidate *hca = lfirst(a); - HnswSearchCandidate *hcb = lfirst(b); - - if (hca->distance < hcb->distance) - return 1; - - if (hca->distance > hcb->distance) - return -1; - - return 0; -} - /* * Fetch the next tuple in the given scan */ @@ -225,7 +219,7 @@ hnswgettuple(IndexScanDesc scan, ScanDirection dir) if (MemoryContextMemAllocated(so->tmpCtx, false) > (work_mem * 1024L)) { - if (list_length(so->discarded) == 0) + if (pairingheap_is_empty(so->discarded)) { ereport(NOTICE, (errmsg("hnsw iterative search exceeded work_mem after " INT64_FORMAT " tuples", so->tuples), @@ -235,11 +229,7 @@ hnswgettuple(IndexScanDesc scan, ScanDirection dir) } /* Return remaining tuples */ - so->w = so->discarded; - so->discarded = NIL; - - /* Sort in reverse order since results are removed from end */ - list_sort(so->w, CompareSearchCandidateDistances); + so->w = lappend(so->w, HnswGetSearchCandidate(w_node, pairingheap_remove_first(so->discarded))); } else { diff --git a/src/hnswutils.c b/src/hnswutils.c index da7d8bc..bdec9bd 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -619,9 +619,6 @@ HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, return hc; } -#define HnswGetSearchCandidate(membername, ptr) pairingheap_container(HnswSearchCandidate, membername, ptr) -#define HnswGetSearchCandidateConst(membername, ptr) pairingheap_const_container(HnswSearchCandidate, membername, ptr) - /* * Compare candidate distances */ @@ -637,6 +634,21 @@ CompareNearestCandidates(const pairingheap_node *a, const pairingheap_node *b, v return 0; } +/* + * Compare discarded candidate distances + */ +static int +CompareNearestDiscardedCandidates(const pairingheap_node *a, const pairingheap_node *b, void *arg) +{ + if (HnswGetSearchCandidateConst(w_node, a)->distance < HnswGetSearchCandidateConst(w_node, b)->distance) + return 1; + + if (HnswGetSearchCandidateConst(w_node, a)->distance > HnswGetSearchCandidateConst(w_node, b)->distance) + return -1; + + return 0; +} + /* * Compare candidate distances */ @@ -795,7 +807,7 @@ HnswLoadUnvisitedFromDisk(HnswElement element, HnswUnvisited * unvisited, int *u * Algorithm 2 from paper */ List * -HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, int m, bool inserting, HnswElement skipElement, visited_hash * v, List **discarded, bool initVisited) +HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, int m, bool inserting, HnswElement skipElement, visited_hash * v, pairingheap **discarded, bool initVisited) { List *w = NIL; pairingheap *C = pairingheap_allocate(CompareNearestCandidates, NULL); @@ -816,8 +828,13 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F } if (initVisited) + { InitVisited(base, v, index, ef, m); + if (discarded != NULL) + *discarded = pairingheap_allocate(CompareNearestDiscardedCandidates, NULL); + } + /* Create local memory for neighborhood if needed */ if (index == NULL) { @@ -895,8 +912,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F e = palloc(sizeof(HnswSearchCandidate)); HnswPtrStore(base, e->element, eElement); e->distance = eDistance; - - *discarded = lappend(*discarded, e); + pairingheap_add(*discarded, &e->w_node); } continue; @@ -928,7 +944,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F HnswSearchCandidate *d = HnswGetSearchCandidate(w_node, pairingheap_remove_first(W)); if (discarded != NULL) - *discarded = lappend(*discarded, d); + pairingheap_add(*discarded, &d->w_node); } } }