mirror of
https://github.com/pgvector/pgvector.git
synced 2026-07-01 10:11:20 +08:00
Improved code
This commit is contained in:
@@ -166,7 +166,7 @@ Supported index types are:
|
||||
|
||||
## IVFFlat
|
||||
|
||||
TODO Add description
|
||||
An IVFFlat index clusters vectors into lists, and then searches a subset of those lists. It has faster build times and uses less memory than HNSW, but has lower query performance.
|
||||
|
||||
Three keys to achieving good recall are:
|
||||
|
||||
@@ -217,7 +217,12 @@ COMMIT;
|
||||
|
||||
## HNSW
|
||||
|
||||
TODO Add description and options
|
||||
An HNSW index creates a multilayer graph between vectors. It has slower build times and uses more memory than IVFFlat, but has better query performance. There’s no training step like IVFFlat, so the index can be created without any data in the table.
|
||||
|
||||
The options for HNSW are:
|
||||
|
||||
- `m` - the max number of connections per layer (the bottom layer uses `2 * m`)
|
||||
- `ef_construction` - the size of the dynamic candidate list for constructing the graph
|
||||
|
||||
Add an index for each distance function you want to use.
|
||||
|
||||
|
||||
13
src/hnsw.h
13
src/hnsw.h
@@ -41,6 +41,7 @@
|
||||
#define HNSW_ELEMENT_TUPLE_TYPE 1
|
||||
#define HNSW_NEIGHBOR_TUPLE_TYPE 2
|
||||
|
||||
/* Make graph robust against non-HOT updates */
|
||||
#define HNSW_HEAPTIDS 10
|
||||
|
||||
/* Build phases */
|
||||
@@ -49,7 +50,6 @@
|
||||
|
||||
#define HNSW_ELEMENT_TUPLE_SIZE(_dim) MAXALIGN(offsetof(HnswElementTupleData, vec) + VECTOR_SIZE(_dim))
|
||||
#define HNSW_NEIGHBOR_TUPLE_SIZE(level, m) MAXALIGN(offsetof(HnswNeighborTupleData, neighbors) + ((level) + 2) * (m) * sizeof(HnswNeighborTupleItem))
|
||||
#define HNSW_NEIGHBOR_COUNT(itemid) ((ItemIdGetLength(itemid) - offsetof(HnswNeighborTupleData, neighbors)) / sizeof(HnswNeighborTupleItem))
|
||||
|
||||
#define HnswPageGetOpaque(page) ((HnswPageOpaque) PageGetSpecialPointer(page))
|
||||
#define HnswPageGetMeta(page) ((HnswMetaPageData *) PageGetContents(page))
|
||||
@@ -164,8 +164,8 @@ typedef struct HnswMetaPageData
|
||||
uint32 magicNumber;
|
||||
uint32 version;
|
||||
uint32 dimensions;
|
||||
uint32 m;
|
||||
uint32 efConstruction;
|
||||
uint16 m;
|
||||
uint16 efConstruction;
|
||||
BlockNumber entryBlkno;
|
||||
OffsetNumber entryOffno;
|
||||
int16 entryLevel;
|
||||
@@ -201,15 +201,14 @@ typedef struct HnswNeighborTupleItem
|
||||
{
|
||||
ItemPointerData indextid;
|
||||
uint16 unused;
|
||||
float distance;
|
||||
float distance; /* improves performance of inserts */
|
||||
} HnswNeighborTupleItem;
|
||||
|
||||
typedef struct HnswNeighborTupleData
|
||||
{
|
||||
uint8 type;
|
||||
uint8 unused;
|
||||
uint16 unused2;
|
||||
uint32 unused3;
|
||||
uint16 count;
|
||||
HnswNeighborTupleItem neighbors[FLEXIBLE_ARRAY_MEMBER];
|
||||
} HnswNeighborTupleData;
|
||||
|
||||
@@ -277,7 +276,7 @@ void HnswSetNeighborTuple(HnswNeighborTuple ntup, HnswElement e, int m);
|
||||
void HnswAddHeapTid(HnswElement element, ItemPointer heaptid);
|
||||
void HnswInitNeighbors(HnswElement element, int m);
|
||||
bool HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heapRel);
|
||||
void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadvec);
|
||||
void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec);
|
||||
void HnswSetElementTuple(HnswElementTuple etup, HnswElement element);
|
||||
|
||||
/* Index access methods */
|
||||
|
||||
@@ -295,6 +295,7 @@ UpdateNeighborPages(Relation index, HnswElement e, int m, List *updates)
|
||||
GenericXLogState *state;
|
||||
HnswUpdate *update = lfirst(lc);
|
||||
ItemId itemid;
|
||||
HnswNeighborTuple ntup;
|
||||
Size ntupSize;
|
||||
int idx;
|
||||
OffsetNumber offno = update->hc.element->neighborOffno;
|
||||
@@ -305,23 +306,24 @@ UpdateNeighborPages(Relation index, HnswElement e, int m, List *updates)
|
||||
state = GenericXLogStart(index);
|
||||
page = GenericXLogRegisterBuffer(state, buf, 0);
|
||||
|
||||
/* Get tuple */
|
||||
itemid = PageGetItemId(page, offno);
|
||||
ntup = (HnswNeighborTuple) PageGetItem(page, itemid);
|
||||
ntupSize = ItemIdGetLength(itemid);
|
||||
|
||||
/* Calculate index */
|
||||
idx = HnswGetIndex(update, m);
|
||||
|
||||
/* Make robust against issues */
|
||||
if (idx < (int) HNSW_NEIGHBOR_COUNT(itemid))
|
||||
/* Make robust to issues */
|
||||
if (idx < ntup->count)
|
||||
{
|
||||
HnswNeighborTuple ntup = (HnswNeighborTuple) PageGetItem(page, itemid);
|
||||
|
||||
HnswNeighborTupleItem *neighbor = &ntup->neighbors[idx];
|
||||
|
||||
/* Set item data */
|
||||
/* Update neighbor */
|
||||
ItemPointerSet(&neighbor->indextid, e->blkno, e->offno);
|
||||
neighbor->distance = update->hc.distance;
|
||||
|
||||
/* Update connections */
|
||||
/* Overwrite tuple */
|
||||
if (!PageIndexTupleOverwrite(page, offno, (Item) ntup, ntupSize))
|
||||
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
|
||||
|
||||
@@ -337,7 +339,7 @@ UpdateNeighborPages(Relation index, HnswElement e, int m, List *updates)
|
||||
}
|
||||
|
||||
/*
|
||||
* Add a heap tid to an existing element
|
||||
* Add a heap TID to an existing element
|
||||
*/
|
||||
static bool
|
||||
HnswAddDuplicate(Relation index, HnswElement element, HnswElement dup)
|
||||
@@ -371,10 +373,10 @@ HnswAddDuplicate(Relation index, HnswElement element, HnswElement dup)
|
||||
return false;
|
||||
}
|
||||
|
||||
/* Add heap tid */
|
||||
/* Add heap TID */
|
||||
etup->heaptids[i] = *((ItemPointer) linitial(element->heaptids));
|
||||
|
||||
/* Update index tuple */
|
||||
/* Overwrite tuple */
|
||||
if (!PageIndexTupleOverwrite(page, dup->offno, (Item) etup, etupSize))
|
||||
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
|
||||
|
||||
|
||||
1054
src/hnswutils.c
1054
src/hnswutils.c
File diff suppressed because it is too large
Load Diff
@@ -20,7 +20,7 @@ DeletedContains(HTAB *deleted, ItemPointer indextid)
|
||||
}
|
||||
|
||||
/*
|
||||
* Remove deleted heap tids
|
||||
* Remove deleted heap TIDs
|
||||
*
|
||||
* OK to remove for entry point, since always considered for searches and inserts
|
||||
*/
|
||||
@@ -114,6 +114,7 @@ RemoveHeapTids(HnswVacuumState * vacuumstate)
|
||||
/* Keep track of highest non-entry point */
|
||||
highestPoint->blkno = blkno;
|
||||
highestPoint->offno = offno;
|
||||
highestPoint->level = etup->level;
|
||||
highestLevel = etup->level;
|
||||
}
|
||||
}
|
||||
@@ -142,22 +143,18 @@ NeedsUpdated(HnswVacuumState * vacuumstate, HnswElement element)
|
||||
BufferAccessStrategy bas = vacuumstate->bas;
|
||||
Buffer buf;
|
||||
Page page;
|
||||
ItemId itemid;
|
||||
int neighborCount;
|
||||
HnswNeighborTuple ntup;
|
||||
bool needsUpdated = false;
|
||||
|
||||
buf = ReadBufferExtended(index, MAIN_FORKNUM, element->neighborPage, RBM_NORMAL, bas);
|
||||
LockBuffer(buf, BUFFER_LOCK_SHARE);
|
||||
page = BufferGetPage(buf);
|
||||
itemid = PageGetItemId(page, element->neighborOffno);
|
||||
ntup = (HnswNeighborTuple) PageGetItem(page, itemid);
|
||||
neighborCount = HNSW_NEIGHBOR_COUNT(itemid);
|
||||
ntup = (HnswNeighborTuple) PageGetItem(page, PageGetItemId(page, element->neighborOffno));
|
||||
|
||||
Assert(HnswIsNeighborTuple(ntup));
|
||||
|
||||
/* Check neighbors */
|
||||
for (int i = 0; i < neighborCount; i++)
|
||||
for (int i = 0; i < ntup->count; i++)
|
||||
{
|
||||
HnswNeighborTupleItem *neighbor = &ntup->neighbors[i];
|
||||
|
||||
@@ -213,26 +210,32 @@ RepairGraphElement(HnswVacuumState * vacuumstate, HnswElement element)
|
||||
return;
|
||||
|
||||
entryPoint = &vacuumstate->highestPoint;
|
||||
|
||||
/* Reset neighbors from previous update */
|
||||
entryPoint->neighbors = NULL;
|
||||
}
|
||||
else
|
||||
entryPoint = NULL;
|
||||
}
|
||||
|
||||
/* Init fields */
|
||||
HnswInitNeighbors(element, m);
|
||||
element->heaptids = NIL;
|
||||
|
||||
/* Add element to graph, skipping itself */
|
||||
HnswInsertElement(element, entryPoint, index, procinfo, collation, m, efConstruction, NULL, true);
|
||||
|
||||
/* Write out new neighbors on page */
|
||||
/* Update neighbor tuple */
|
||||
/* Do this before getting page to minimize locking */
|
||||
HnswSetNeighborTuple(ntup, element, m);
|
||||
|
||||
/* Get neighbor page */
|
||||
buf = ReadBufferExtended(index, MAIN_FORKNUM, element->neighborPage, RBM_NORMAL, bas);
|
||||
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
|
||||
state = GenericXLogStart(index);
|
||||
page = GenericXLogRegisterBuffer(state, buf, 0);
|
||||
|
||||
/* Update neighbors */
|
||||
HnswSetNeighborTuple(ntup, element, m);
|
||||
|
||||
/* Overwrite tuple */
|
||||
if (!PageIndexTupleOverwrite(page, element->neighborOffno, (Item) ntup, ntupSize))
|
||||
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
|
||||
|
||||
@@ -261,6 +264,7 @@ RepairGraphEntryPoint(HnswVacuumState * vacuumstate)
|
||||
RepairGraphElement(vacuumstate, highestPoint);
|
||||
}
|
||||
|
||||
/* See if entry point needs updated */
|
||||
entryPoint = HnswGetEntryPoint(index);
|
||||
if (entryPoint != NULL)
|
||||
{
|
||||
@@ -402,7 +406,6 @@ MarkDeleted(HnswVacuumState * vacuumstate)
|
||||
Page npage;
|
||||
BlockNumber neighborPage;
|
||||
OffsetNumber neighborOffno;
|
||||
int neighborCount;
|
||||
|
||||
/* Skip neighbor tuples */
|
||||
if (!HnswIsElementTuple(etup))
|
||||
@@ -412,20 +415,20 @@ MarkDeleted(HnswVacuumState * vacuumstate)
|
||||
if (etup->deleted)
|
||||
continue;
|
||||
|
||||
/* Skip live tuples */
|
||||
if (ItemPointerIsValid(&etup->heaptids[0]))
|
||||
{
|
||||
stats->num_index_tuples++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* Update stats */
|
||||
stats->tuples_removed++;
|
||||
|
||||
/* Calculate sizes */
|
||||
etupSize = HNSW_ELEMENT_TUPLE_SIZE(etup->vec.dim);
|
||||
ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(etup->level, vacuumstate->m);
|
||||
|
||||
neighborCount = (etup->level + 2) * vacuumstate->m;
|
||||
|
||||
/* Get neighbor page */
|
||||
neighborPage = ItemPointerGetBlockNumber(&etup->neighbortid);
|
||||
neighborOffno = ItemPointerGetOffsetNumber(&etup->neighbortid);
|
||||
@@ -449,15 +452,17 @@ MarkDeleted(HnswVacuumState * vacuumstate)
|
||||
MemSet(&etup->vec.x, 0, etup->vec.dim * sizeof(float));
|
||||
|
||||
/* Overwrite neighbors */
|
||||
for (int i = 0; i < neighborCount; i++)
|
||||
for (int i = 0; i < ntup->count; i++)
|
||||
{
|
||||
ItemPointerSetInvalid(&ntup->neighbors[i].indextid);
|
||||
ntup->neighbors[i].distance = NAN;
|
||||
}
|
||||
|
||||
/* Overwrite element tuple */
|
||||
if (!PageIndexTupleOverwrite(page, offno, (Item) etup, etupSize))
|
||||
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
|
||||
|
||||
/* Overwrite neighbor tuple */
|
||||
if (!PageIndexTupleOverwrite(npage, neighborOffno, (Item) ntup, ntupSize))
|
||||
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
|
||||
|
||||
@@ -543,7 +548,7 @@ hnswbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
|
||||
|
||||
InitVacuumState(&vacuumstate, info, stats, callback, callback_state);
|
||||
|
||||
/* Pass 1: Remove heap tids */
|
||||
/* Pass 1: Remove heap TIDs */
|
||||
RemoveHeapTids(&vacuumstate);
|
||||
|
||||
/* Pass 2: Repair graph */
|
||||
|
||||
Reference in New Issue
Block a user