mirror of
https://github.com/pgvector/pgvector.git
synced 2026-06-06 14:01:31 +08:00
Speed up HNSW index build
This commit is contained in:
@@ -110,12 +110,14 @@ typedef struct HnswCandidate
|
||||
{
|
||||
HnswElement element;
|
||||
float distance;
|
||||
bool closer;
|
||||
} HnswCandidate;
|
||||
|
||||
typedef struct HnswNeighborArray
|
||||
{
|
||||
int length;
|
||||
HnswCandidate *items;
|
||||
bool closerSet;
|
||||
} HnswNeighborArray;
|
||||
|
||||
typedef struct HnswPairingHeapNode
|
||||
|
||||
@@ -139,6 +139,7 @@ HnswInitNeighbors(HnswElement element, int m)
|
||||
a = &element->neighbors[lc];
|
||||
a->length = 0;
|
||||
a->items = palloc(sizeof(HnswCandidate) * lm);
|
||||
a->closerSet = false;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -748,11 +749,12 @@ CheckElementCloser(HnswCandidate * e, List *r, int lc, FmgrInfo *procinfo, Oid c
|
||||
* Algorithm 4 from paper
|
||||
*/
|
||||
static List *
|
||||
SelectNeighbors(List *c, int m, int lc, FmgrInfo *procinfo, Oid collation, HnswCandidate * *pruned)
|
||||
SelectNeighbors(List *c, int m, int lc, FmgrInfo *procinfo, Oid collation, HnswElement e2, HnswCandidate * newCandidate, HnswCandidate * *pruned)
|
||||
{
|
||||
List *r = NIL;
|
||||
List *w = list_copy(c);
|
||||
pairingheap *wd;
|
||||
bool mustCalculate = !e2->neighbors[lc].closerSet;
|
||||
|
||||
if (list_length(w) <= m)
|
||||
return w;
|
||||
@@ -763,18 +765,29 @@ SelectNeighbors(List *c, int m, int lc, FmgrInfo *procinfo, Oid collation, HnswC
|
||||
{
|
||||
/* Assumes w is already ordered desc */
|
||||
HnswCandidate *e = llast(w);
|
||||
bool closer;
|
||||
|
||||
w = list_delete_last(w);
|
||||
|
||||
closer = CheckElementCloser(e, r, lc, procinfo, collation);
|
||||
/*
|
||||
* r and wd will be the same as previous calls until the new
|
||||
* candidate, so can skip distance calculations
|
||||
*/
|
||||
if (mustCalculate)
|
||||
e->closer = CheckElementCloser(e, r, lc, procinfo, collation);
|
||||
else if (e == newCandidate)
|
||||
{
|
||||
e->closer = CheckElementCloser(e, r, lc, procinfo, collation);
|
||||
mustCalculate = true;
|
||||
}
|
||||
|
||||
if (closer)
|
||||
if (e->closer)
|
||||
r = lappend(r, e);
|
||||
else
|
||||
pairingheap_add(wd, &(CreatePairingHeapNode(e)->ph_node));
|
||||
}
|
||||
|
||||
e2->neighbors[lc].closerSet = true;
|
||||
|
||||
/* Keep pruned connections */
|
||||
while (!pairingheap_is_empty(wd) && list_length(r) < m)
|
||||
r = lappend(r, ((HnswPairingHeapNode *) pairingheap_remove_first(wd))->inner);
|
||||
@@ -909,7 +922,7 @@ HnswUpdateConnection(HnswElement element, HnswCandidate * hc, int m, int lc, int
|
||||
c = lappend(c, &hc2);
|
||||
list_sort(c, CompareCandidateDistances);
|
||||
|
||||
SelectNeighbors(c, m, lc, procinfo, collation, &pruned);
|
||||
SelectNeighbors(c, m, lc, procinfo, collation, hc->element, &hc2, &pruned);
|
||||
|
||||
/* Should not happen */
|
||||
if (pruned == NULL)
|
||||
@@ -1008,7 +1021,7 @@ HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, F
|
||||
else
|
||||
lw = w;
|
||||
|
||||
neighbors = SelectNeighbors(lw, lm, lc, procinfo, collation, NULL);
|
||||
neighbors = SelectNeighbors(lw, lm, lc, procinfo, collation, element, NULL, NULL);
|
||||
|
||||
AddConnections(element, neighbors, lm, lc);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user