mirror of
https://github.com/pgvector/pgvector.git
synced 2026-07-01 18:21:16 +08:00
Improved construction code
This commit is contained in:
@@ -258,7 +258,7 @@ List *HnswSearchLayer(Datum q, List *ep, int ef, int lc, Relation index, Fmgr
|
||||
HnswElement HnswGetEntryPoint(Relation index);
|
||||
HnswElement HnswInitElement(ItemPointer tid, int m, double ml, int maxLevel);
|
||||
void HnswFreeElement(HnswElement element);
|
||||
HnswElement HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, List ***updateNeighbors, bool vacuuming);
|
||||
HnswElement HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool vacuuming);
|
||||
HnswCandidate *HnswEntryCandidate(HnswElement em, Datum q, Relation rel, FmgrInfo *procinfo, Oid collation, bool loadvec);
|
||||
void HnswUpdateMetaPage(Relation index, bool updateEntry, HnswElement entryPoint, BlockNumber insertPage, ForkNumber forkNum);
|
||||
void HnswSetNeighborTuple(HnswNeighborTuple ntup, HnswElement e, int m);
|
||||
|
||||
@@ -288,7 +288,17 @@ InsertTuple(Relation index, Datum *values, HnswElement element, HnswBuildState *
|
||||
memcpy(element->vec, DatumGetVector(value), VECTOR_SIZE(buildstate->dimensions));
|
||||
|
||||
/* Insert element in graph */
|
||||
*dup = HnswInsertElement(element, entryPoint, NULL, procinfo, collation, m, efConstruction, NULL, false);
|
||||
*dup = HnswInsertElement(element, entryPoint, NULL, procinfo, collation, m, efConstruction, false);
|
||||
|
||||
/* Update neighbors */
|
||||
for (int lc = element->level; lc >= 0; lc--)
|
||||
{
|
||||
int lm = HnswGetLayerM(m, lc);
|
||||
HnswNeighborArray *neighbors = &element->neighbors[lc];
|
||||
|
||||
for (int i = 0; i < neighbors->length; i++)
|
||||
HnswUpdateConnection(element, &neighbors->items[i], lm, lc, NULL, NULL, procinfo, collation);
|
||||
}
|
||||
|
||||
/* Update entry point if needed */
|
||||
if (*dup == NULL && (entryPoint == NULL || element->level > entryPoint->level))
|
||||
|
||||
@@ -273,17 +273,16 @@ WriteNewElementPages(Relation index, HnswElement e, int m)
|
||||
* Update neighbors
|
||||
*/
|
||||
static void
|
||||
UpdateNeighborPages(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, List **neighbors)
|
||||
UpdateNeighborPages(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m)
|
||||
{
|
||||
for (int lc = e->level; lc >= 0; lc--)
|
||||
{
|
||||
int lm = HnswGetLayerM(m, lc);
|
||||
List *levelNeighbors = neighbors[lc];
|
||||
ListCell *lc2;
|
||||
HnswNeighborArray *neighbors = &e->neighbors[lc];
|
||||
|
||||
foreach(lc2, levelNeighbors)
|
||||
for (int i = 0; i < neighbors->length; i++)
|
||||
{
|
||||
HnswCandidate *hc = lfirst(lc2);
|
||||
HnswCandidate *hc = &neighbors->items[i];
|
||||
Buffer buf;
|
||||
Page page;
|
||||
GenericXLogState *state;
|
||||
@@ -393,7 +392,7 @@ HnswAddDuplicate(Relation index, HnswElement element, HnswElement dup)
|
||||
* Write changes to disk
|
||||
*/
|
||||
static void
|
||||
WriteElement(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement element, int m, List **neighbors, HnswElement dup, HnswElement entryPoint)
|
||||
WriteElement(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement element, int m, HnswElement dup, HnswElement entryPoint)
|
||||
{
|
||||
/* Try to add to existing page */
|
||||
if (dup != NULL)
|
||||
@@ -404,7 +403,7 @@ WriteElement(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement elem
|
||||
|
||||
/* If fails, take this path */
|
||||
WriteNewElementPages(index, element, m);
|
||||
UpdateNeighborPages(index, procinfo, collation, element, m, neighbors);
|
||||
UpdateNeighborPages(index, procinfo, collation, element, m);
|
||||
|
||||
/* Update metapage if needed */
|
||||
if (entryPoint == NULL || element->level > entryPoint->level)
|
||||
@@ -426,7 +425,6 @@ HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_ti
|
||||
double ml = HnswGetMl(m);
|
||||
FmgrInfo *procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC);
|
||||
Oid collation = index->rd_indcollation[0];
|
||||
List **neighbors;
|
||||
HnswElement dup;
|
||||
|
||||
/* Detoast once for all calls */
|
||||
@@ -448,10 +446,10 @@ HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_ti
|
||||
entryPoint = HnswGetEntryPoint(index);
|
||||
|
||||
/* Insert element in graph */
|
||||
dup = HnswInsertElement(element, entryPoint, index, procinfo, collation, m, efConstruction, &neighbors, false);
|
||||
dup = HnswInsertElement(element, entryPoint, index, procinfo, collation, m, efConstruction, false);
|
||||
|
||||
/* Write to disk */
|
||||
WriteElement(index, procinfo, collation, element, m, neighbors, dup, entryPoint);
|
||||
WriteElement(index, procinfo, collation, element, m, dup, entryPoint);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -744,13 +744,13 @@ SelectNeighbors(List *c, int m, int lc, FmgrInfo *procinfo, Oid collation, HnswC
|
||||
* Find duplicate element
|
||||
*/
|
||||
static HnswElement
|
||||
HnswFindDuplicate(HnswElement e, List *neighbors)
|
||||
HnswFindDuplicate(HnswElement e)
|
||||
{
|
||||
ListCell *lc;
|
||||
HnswNeighborArray *neighbors = &e->neighbors[0];
|
||||
|
||||
foreach(lc, neighbors)
|
||||
for (int i = 0; i < neighbors->length; i++)
|
||||
{
|
||||
HnswCandidate *neighbor = lfirst(lc);
|
||||
HnswCandidate *neighbor = &neighbors->items[i];
|
||||
|
||||
/* Exit early since ordered by distance */
|
||||
if (vector_cmp_internal(e->vec, neighbor->element->vec) != 0)
|
||||
@@ -885,13 +885,12 @@ HnswUpdateConnection(HnswElement element, HnswCandidate * hc, int m, int lc, int
|
||||
* Algorithm 1 from paper
|
||||
*/
|
||||
HnswElement
|
||||
HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, List ***updateNeighbors, bool vacuuming)
|
||||
HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, bool vacuuming)
|
||||
{
|
||||
List *ep = NIL;
|
||||
List *w;
|
||||
int level = element->level;
|
||||
int entryLevel;
|
||||
List **neighbors = palloc(sizeof(List *) * (level + 1));
|
||||
Datum q = PointerGetDatum(element->vec);
|
||||
HnswElement dup;
|
||||
BlockNumber *skipPage = vacuuming ? &element->neighborPage : NULL;
|
||||
@@ -920,16 +919,14 @@ HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, F
|
||||
ep = w;
|
||||
}
|
||||
|
||||
while (level > entryLevel)
|
||||
{
|
||||
neighbors[level] = NIL;
|
||||
level--;
|
||||
}
|
||||
if (level > entryLevel)
|
||||
level = entryLevel;
|
||||
|
||||
/* 2nd phase */
|
||||
for (int lc = level; lc >= 0; lc--)
|
||||
{
|
||||
int lm = HnswGetLayerM(m, lc);
|
||||
List *neighbors;
|
||||
|
||||
w = HnswSearchLayer(q, ep, efConstruction, lc, index, procinfo, collation, true, skipPage, skipOffno);
|
||||
|
||||
@@ -937,30 +934,18 @@ HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, F
|
||||
if (removeEntryPoint)
|
||||
w = list_delete_ptr(w, entryCandidate);
|
||||
|
||||
neighbors[lc] = SelectNeighbors(w, lm, lc, procinfo, collation, NULL);
|
||||
neighbors = SelectNeighbors(w, lm, lc, procinfo, collation, NULL);
|
||||
|
||||
AddConnections(element, neighbors[lc], lm, lc);
|
||||
|
||||
/* Update connections */
|
||||
if (!vacuuming && updateNeighbors == NULL)
|
||||
{
|
||||
ListCell *lc2;
|
||||
|
||||
foreach(lc2, neighbors[lc])
|
||||
HnswUpdateConnection(element, lfirst(lc2), lm, lc, NULL, index, procinfo, collation);
|
||||
}
|
||||
AddConnections(element, neighbors, lm, lc);
|
||||
|
||||
ep = w;
|
||||
}
|
||||
|
||||
if (updateNeighbors != NULL)
|
||||
*updateNeighbors = neighbors;
|
||||
|
||||
/* Look for duplicates */
|
||||
/* This must come last, since duplicate updates can fail */
|
||||
if (level >= 0 && !vacuuming)
|
||||
{
|
||||
dup = HnswFindDuplicate(element, neighbors[0]);
|
||||
dup = HnswFindDuplicate(element);
|
||||
if (dup != NULL)
|
||||
return dup;
|
||||
}
|
||||
|
||||
@@ -223,7 +223,7 @@ RepairGraphElement(HnswVacuumState * vacuumstate, HnswElement element)
|
||||
element->heaptids = NIL;
|
||||
|
||||
/* Add element to graph, skipping itself */
|
||||
HnswInsertElement(element, entryPoint, index, procinfo, collation, m, efConstruction, NULL, true);
|
||||
HnswInsertElement(element, entryPoint, index, procinfo, collation, m, efConstruction, true);
|
||||
|
||||
/* Update neighbor tuple */
|
||||
/* Do this before getting page to minimize locking */
|
||||
|
||||
Reference in New Issue
Block a user