mirror of
https://github.com/pgvector/pgvector.git
synced 2026-07-01 02:02:10 +08:00
Use smaller batch size for better performance
This commit is contained in:
@@ -185,6 +185,9 @@ typedef struct HnswSearchCandidate
|
||||
float distance;
|
||||
} HnswSearchCandidate;
|
||||
|
||||
#define HnswGetSearchCandidate(membername, ptr) pairingheap_container(HnswSearchCandidate, membername, ptr)
|
||||
#define HnswGetSearchCandidateConst(membername, ptr) pairingheap_const_container(HnswSearchCandidate, membername, ptr)
|
||||
|
||||
/* HNSW index options */
|
||||
typedef struct HnswOptions
|
||||
{
|
||||
@@ -360,7 +363,7 @@ typedef struct HnswScanOpaqueData
|
||||
bool first;
|
||||
List *w;
|
||||
visited_hash v;
|
||||
List *discarded;
|
||||
pairingheap *discarded;
|
||||
Datum q;
|
||||
int m;
|
||||
int64 tuples;
|
||||
@@ -409,7 +412,7 @@ bool HnswCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value);
|
||||
Buffer HnswNewBuffer(Relation index, ForkNumber forkNum);
|
||||
void HnswInitPage(Buffer buf, Page page);
|
||||
void HnswInit(void);
|
||||
List *HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, int m, bool inserting, HnswElement skipElement, visited_hash * v, List **discarded, bool initVisited);
|
||||
List *HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, int m, bool inserting, HnswElement skipElement, visited_hash * v, pairingheap **discarded, bool initVisited);
|
||||
HnswElement HnswGetEntryPoint(Relation index);
|
||||
void HnswGetMetaPageInfo(Relation index, int *m, HnswElement * entryPoint);
|
||||
void *HnswAlloc(HnswAllocator * allocator, Size size);
|
||||
|
||||
@@ -53,14 +53,24 @@ ResumeScanItems(IndexScanDesc scan)
|
||||
Relation index = scan->indexRelation;
|
||||
FmgrInfo *procinfo = so->procinfo;
|
||||
Oid collation = so->collation;
|
||||
List *ep;
|
||||
List *ep = NIL;
|
||||
char *base = NULL;
|
||||
|
||||
if (list_length(so->discarded) == 0)
|
||||
if (pairingheap_is_empty(so->discarded))
|
||||
return NIL;
|
||||
|
||||
ep = so->discarded;
|
||||
so->discarded = NIL;
|
||||
for (int i = 0; i < hnsw_ef_search; i++)
|
||||
{
|
||||
HnswSearchCandidate *hc;
|
||||
|
||||
if (pairingheap_is_empty(so->discarded))
|
||||
break;
|
||||
|
||||
hc = HnswGetSearchCandidate(w_node, pairingheap_remove_first(so->discarded));
|
||||
|
||||
ep = lappend(ep, hc);
|
||||
}
|
||||
|
||||
return HnswSearchLayer(base, so->q, ep, hnsw_ef_search, 0, index, procinfo, collation, so->m, false, NULL, &so->v, &so->discarded, false);
|
||||
}
|
||||
|
||||
@@ -128,9 +138,11 @@ hnswrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int no
|
||||
HnswScanOpaque so = (HnswScanOpaque) scan->opaque;
|
||||
|
||||
if (!so->first)
|
||||
{
|
||||
pairingheap_reset(so->discarded);
|
||||
tidhash_reset(so->v.tids);
|
||||
}
|
||||
so->first = true;
|
||||
so->discarded = NIL;
|
||||
so->tuples = 0;
|
||||
MemoryContextReset(so->tmpCtx);
|
||||
|
||||
@@ -141,24 +153,6 @@ hnswrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int no
|
||||
memmove(scan->orderByData, orderbys, scan->numberOfOrderBys * sizeof(ScanKeyData));
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare search candidate distances
|
||||
*/
|
||||
static int
|
||||
CompareSearchCandidateDistances(const ListCell *a, const ListCell *b)
|
||||
{
|
||||
HnswSearchCandidate *hca = lfirst(a);
|
||||
HnswSearchCandidate *hcb = lfirst(b);
|
||||
|
||||
if (hca->distance < hcb->distance)
|
||||
return 1;
|
||||
|
||||
if (hca->distance > hcb->distance)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Fetch the next tuple in the given scan
|
||||
*/
|
||||
@@ -225,7 +219,7 @@ hnswgettuple(IndexScanDesc scan, ScanDirection dir)
|
||||
|
||||
if (MemoryContextMemAllocated(so->tmpCtx, false) > (work_mem * 1024L))
|
||||
{
|
||||
if (list_length(so->discarded) == 0)
|
||||
if (pairingheap_is_empty(so->discarded))
|
||||
{
|
||||
ereport(NOTICE,
|
||||
(errmsg("hnsw iterative search exceeded work_mem after " INT64_FORMAT " tuples", so->tuples),
|
||||
@@ -235,11 +229,7 @@ hnswgettuple(IndexScanDesc scan, ScanDirection dir)
|
||||
}
|
||||
|
||||
/* Return remaining tuples */
|
||||
so->w = so->discarded;
|
||||
so->discarded = NIL;
|
||||
|
||||
/* Sort in reverse order since results are removed from end */
|
||||
list_sort(so->w, CompareSearchCandidateDistances);
|
||||
so->w = lappend(so->w, HnswGetSearchCandidate(w_node, pairingheap_remove_first(so->discarded)));
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -619,9 +619,6 @@ HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index,
|
||||
return hc;
|
||||
}
|
||||
|
||||
#define HnswGetSearchCandidate(membername, ptr) pairingheap_container(HnswSearchCandidate, membername, ptr)
|
||||
#define HnswGetSearchCandidateConst(membername, ptr) pairingheap_const_container(HnswSearchCandidate, membername, ptr)
|
||||
|
||||
/*
|
||||
* Compare candidate distances
|
||||
*/
|
||||
@@ -637,6 +634,21 @@ CompareNearestCandidates(const pairingheap_node *a, const pairingheap_node *b, v
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare discarded candidate distances
|
||||
*/
|
||||
static int
|
||||
CompareNearestDiscardedCandidates(const pairingheap_node *a, const pairingheap_node *b, void *arg)
|
||||
{
|
||||
if (HnswGetSearchCandidateConst(w_node, a)->distance < HnswGetSearchCandidateConst(w_node, b)->distance)
|
||||
return 1;
|
||||
|
||||
if (HnswGetSearchCandidateConst(w_node, a)->distance > HnswGetSearchCandidateConst(w_node, b)->distance)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Compare candidate distances
|
||||
*/
|
||||
@@ -795,7 +807,7 @@ HnswLoadUnvisitedFromDisk(HnswElement element, HnswUnvisited * unvisited, int *u
|
||||
* Algorithm 2 from paper
|
||||
*/
|
||||
List *
|
||||
HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, int m, bool inserting, HnswElement skipElement, visited_hash * v, List **discarded, bool initVisited)
|
||||
HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, int m, bool inserting, HnswElement skipElement, visited_hash * v, pairingheap **discarded, bool initVisited)
|
||||
{
|
||||
List *w = NIL;
|
||||
pairingheap *C = pairingheap_allocate(CompareNearestCandidates, NULL);
|
||||
@@ -816,8 +828,13 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
|
||||
}
|
||||
|
||||
if (initVisited)
|
||||
{
|
||||
InitVisited(base, v, index, ef, m);
|
||||
|
||||
if (discarded != NULL)
|
||||
*discarded = pairingheap_allocate(CompareNearestDiscardedCandidates, NULL);
|
||||
}
|
||||
|
||||
/* Create local memory for neighborhood if needed */
|
||||
if (index == NULL)
|
||||
{
|
||||
@@ -895,8 +912,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
|
||||
e = palloc(sizeof(HnswSearchCandidate));
|
||||
HnswPtrStore(base, e->element, eElement);
|
||||
e->distance = eDistance;
|
||||
|
||||
*discarded = lappend(*discarded, e);
|
||||
pairingheap_add(*discarded, &e->w_node);
|
||||
}
|
||||
|
||||
continue;
|
||||
@@ -928,7 +944,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
|
||||
HnswSearchCandidate *d = HnswGetSearchCandidate(w_node, pairingheap_remove_first(W));
|
||||
|
||||
if (discarded != NULL)
|
||||
*discarded = lappend(*discarded, d);
|
||||
pairingheap_add(*discarded, &d->w_node);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user