Use smaller batch size for better performance

This commit is contained in:
Andrew Kane
2024-09-22 00:00:02 -07:00
parent ba0196ba10
commit 7412ee6cee
3 changed files with 47 additions and 38 deletions

View File

@@ -185,6 +185,9 @@ typedef struct HnswSearchCandidate
float distance;
} HnswSearchCandidate;
#define HnswGetSearchCandidate(membername, ptr) pairingheap_container(HnswSearchCandidate, membername, ptr)
#define HnswGetSearchCandidateConst(membername, ptr) pairingheap_const_container(HnswSearchCandidate, membername, ptr)
/* HNSW index options */
typedef struct HnswOptions
{
@@ -360,7 +363,7 @@ typedef struct HnswScanOpaqueData
bool first;
List *w;
visited_hash v;
List *discarded;
pairingheap *discarded;
Datum q;
int m;
int64 tuples;
@@ -409,7 +412,7 @@ bool HnswCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value);
Buffer HnswNewBuffer(Relation index, ForkNumber forkNum);
void HnswInitPage(Buffer buf, Page page);
void HnswInit(void);
List *HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, int m, bool inserting, HnswElement skipElement, visited_hash * v, List **discarded, bool initVisited);
List *HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, int m, bool inserting, HnswElement skipElement, visited_hash * v, pairingheap **discarded, bool initVisited);
HnswElement HnswGetEntryPoint(Relation index);
void HnswGetMetaPageInfo(Relation index, int *m, HnswElement * entryPoint);
void *HnswAlloc(HnswAllocator * allocator, Size size);

View File

@@ -53,14 +53,24 @@ ResumeScanItems(IndexScanDesc scan)
Relation index = scan->indexRelation;
FmgrInfo *procinfo = so->procinfo;
Oid collation = so->collation;
List *ep;
List *ep = NIL;
char *base = NULL;
if (list_length(so->discarded) == 0)
if (pairingheap_is_empty(so->discarded))
return NIL;
ep = so->discarded;
so->discarded = NIL;
for (int i = 0; i < hnsw_ef_search; i++)
{
HnswSearchCandidate *hc;
if (pairingheap_is_empty(so->discarded))
break;
hc = HnswGetSearchCandidate(w_node, pairingheap_remove_first(so->discarded));
ep = lappend(ep, hc);
}
return HnswSearchLayer(base, so->q, ep, hnsw_ef_search, 0, index, procinfo, collation, so->m, false, NULL, &so->v, &so->discarded, false);
}
@@ -128,9 +138,11 @@ hnswrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int no
HnswScanOpaque so = (HnswScanOpaque) scan->opaque;
if (!so->first)
{
pairingheap_reset(so->discarded);
tidhash_reset(so->v.tids);
}
so->first = true;
so->discarded = NIL;
so->tuples = 0;
MemoryContextReset(so->tmpCtx);
@@ -141,24 +153,6 @@ hnswrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int no
memmove(scan->orderByData, orderbys, scan->numberOfOrderBys * sizeof(ScanKeyData));
}
/*
* Compare search candidate distances
*/
static int
CompareSearchCandidateDistances(const ListCell *a, const ListCell *b)
{
HnswSearchCandidate *hca = lfirst(a);
HnswSearchCandidate *hcb = lfirst(b);
if (hca->distance < hcb->distance)
return 1;
if (hca->distance > hcb->distance)
return -1;
return 0;
}
/*
* Fetch the next tuple in the given scan
*/
@@ -225,7 +219,7 @@ hnswgettuple(IndexScanDesc scan, ScanDirection dir)
if (MemoryContextMemAllocated(so->tmpCtx, false) > (work_mem * 1024L))
{
if (list_length(so->discarded) == 0)
if (pairingheap_is_empty(so->discarded))
{
ereport(NOTICE,
(errmsg("hnsw iterative search exceeded work_mem after " INT64_FORMAT " tuples", so->tuples),
@@ -235,11 +229,7 @@ hnswgettuple(IndexScanDesc scan, ScanDirection dir)
}
/* Return remaining tuples */
so->w = so->discarded;
so->discarded = NIL;
/* Sort in reverse order since results are removed from end */
list_sort(so->w, CompareSearchCandidateDistances);
so->w = lappend(so->w, HnswGetSearchCandidate(w_node, pairingheap_remove_first(so->discarded)));
}
else
{

View File

@@ -619,9 +619,6 @@ HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index,
return hc;
}
#define HnswGetSearchCandidate(membername, ptr) pairingheap_container(HnswSearchCandidate, membername, ptr)
#define HnswGetSearchCandidateConst(membername, ptr) pairingheap_const_container(HnswSearchCandidate, membername, ptr)
/*
* Compare candidate distances
*/
@@ -637,6 +634,21 @@ CompareNearestCandidates(const pairingheap_node *a, const pairingheap_node *b, v
return 0;
}
/*
* Compare discarded candidate distances
*/
static int
CompareNearestDiscardedCandidates(const pairingheap_node *a, const pairingheap_node *b, void *arg)
{
if (HnswGetSearchCandidateConst(w_node, a)->distance < HnswGetSearchCandidateConst(w_node, b)->distance)
return 1;
if (HnswGetSearchCandidateConst(w_node, a)->distance > HnswGetSearchCandidateConst(w_node, b)->distance)
return -1;
return 0;
}
/*
* Compare candidate distances
*/
@@ -795,7 +807,7 @@ HnswLoadUnvisitedFromDisk(HnswElement element, HnswUnvisited * unvisited, int *u
* Algorithm 2 from paper
*/
List *
HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, int m, bool inserting, HnswElement skipElement, visited_hash * v, List **discarded, bool initVisited)
HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, int m, bool inserting, HnswElement skipElement, visited_hash * v, pairingheap **discarded, bool initVisited)
{
List *w = NIL;
pairingheap *C = pairingheap_allocate(CompareNearestCandidates, NULL);
@@ -816,8 +828,13 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
}
if (initVisited)
{
InitVisited(base, v, index, ef, m);
if (discarded != NULL)
*discarded = pairingheap_allocate(CompareNearestDiscardedCandidates, NULL);
}
/* Create local memory for neighborhood if needed */
if (index == NULL)
{
@@ -895,8 +912,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
e = palloc(sizeof(HnswSearchCandidate));
HnswPtrStore(base, e->element, eElement);
e->distance = eDistance;
*discarded = lappend(*discarded, e);
pairingheap_add(*discarded, &e->w_node);
}
continue;
@@ -928,7 +944,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
HnswSearchCandidate *d = HnswGetSearchCandidate(w_node, pairingheap_remove_first(W));
if (discarded != NULL)
*discarded = lappend(*discarded, d);
pairingheap_add(*discarded, &d->w_node);
}
}
}