Improved code

This commit is contained in:
Andrew Kane
2023-08-08 16:15:36 -07:00
parent 19c7b4e85b
commit 0c0cb3e35e
5 changed files with 575 additions and 558 deletions

View File

@@ -166,7 +166,7 @@ Supported index types are:
## IVFFlat
TODO Add description
An IVFFlat index clusters vectors into lists, and then searches a subset of those lists. It has faster build times and uses less memory than HNSW, but has lower query performance.
Three keys to achieving good recall are:
@@ -217,7 +217,12 @@ COMMIT;
## HNSW
TODO Add description and options
An HNSW index creates a multilayer graph between vectors. It has slower build times and uses more memory than IVFFlat, but has better query performance. Theres no training step like IVFFlat, so the index can be created without any data in the table.
The options for HNSW are:
- `m` - the max number of connections per layer (the bottom layer uses `2 * m`)
- `ef_construction` - the size of the dynamic candidate list for constructing the graph
Add an index for each distance function you want to use.

View File

@@ -41,6 +41,7 @@
#define HNSW_ELEMENT_TUPLE_TYPE 1
#define HNSW_NEIGHBOR_TUPLE_TYPE 2
/* Make graph robust against non-HOT updates */
#define HNSW_HEAPTIDS 10
/* Build phases */
@@ -49,7 +50,6 @@
#define HNSW_ELEMENT_TUPLE_SIZE(_dim) MAXALIGN(offsetof(HnswElementTupleData, vec) + VECTOR_SIZE(_dim))
#define HNSW_NEIGHBOR_TUPLE_SIZE(level, m) MAXALIGN(offsetof(HnswNeighborTupleData, neighbors) + ((level) + 2) * (m) * sizeof(HnswNeighborTupleItem))
#define HNSW_NEIGHBOR_COUNT(itemid) ((ItemIdGetLength(itemid) - offsetof(HnswNeighborTupleData, neighbors)) / sizeof(HnswNeighborTupleItem))
#define HnswPageGetOpaque(page) ((HnswPageOpaque) PageGetSpecialPointer(page))
#define HnswPageGetMeta(page) ((HnswMetaPageData *) PageGetContents(page))
@@ -164,8 +164,8 @@ typedef struct HnswMetaPageData
uint32 magicNumber;
uint32 version;
uint32 dimensions;
uint32 m;
uint32 efConstruction;
uint16 m;
uint16 efConstruction;
BlockNumber entryBlkno;
OffsetNumber entryOffno;
int16 entryLevel;
@@ -201,15 +201,14 @@ typedef struct HnswNeighborTupleItem
{
ItemPointerData indextid;
uint16 unused;
float distance;
float distance; /* improves performance of inserts */
} HnswNeighborTupleItem;
typedef struct HnswNeighborTupleData
{
uint8 type;
uint8 unused;
uint16 unused2;
uint32 unused3;
uint16 count;
HnswNeighborTupleItem neighbors[FLEXIBLE_ARRAY_MEMBER];
} HnswNeighborTupleData;
@@ -277,7 +276,7 @@ void HnswSetNeighborTuple(HnswNeighborTuple ntup, HnswElement e, int m);
void HnswAddHeapTid(HnswElement element, ItemPointer heaptid);
void HnswInitNeighbors(HnswElement element, int m);
bool HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heapRel);
void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadvec);
void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec);
void HnswSetElementTuple(HnswElementTuple etup, HnswElement element);
/* Index access methods */

View File

@@ -295,6 +295,7 @@ UpdateNeighborPages(Relation index, HnswElement e, int m, List *updates)
GenericXLogState *state;
HnswUpdate *update = lfirst(lc);
ItemId itemid;
HnswNeighborTuple ntup;
Size ntupSize;
int idx;
OffsetNumber offno = update->hc.element->neighborOffno;
@@ -305,23 +306,24 @@ UpdateNeighborPages(Relation index, HnswElement e, int m, List *updates)
state = GenericXLogStart(index);
page = GenericXLogRegisterBuffer(state, buf, 0);
/* Get tuple */
itemid = PageGetItemId(page, offno);
ntup = (HnswNeighborTuple) PageGetItem(page, itemid);
ntupSize = ItemIdGetLength(itemid);
/* Calculate index */
idx = HnswGetIndex(update, m);
/* Make robust against issues */
if (idx < (int) HNSW_NEIGHBOR_COUNT(itemid))
/* Make robust to issues */
if (idx < ntup->count)
{
HnswNeighborTuple ntup = (HnswNeighborTuple) PageGetItem(page, itemid);
HnswNeighborTupleItem *neighbor = &ntup->neighbors[idx];
/* Set item data */
/* Update neighbor */
ItemPointerSet(&neighbor->indextid, e->blkno, e->offno);
neighbor->distance = update->hc.distance;
/* Update connections */
/* Overwrite tuple */
if (!PageIndexTupleOverwrite(page, offno, (Item) ntup, ntupSize))
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
@@ -337,7 +339,7 @@ UpdateNeighborPages(Relation index, HnswElement e, int m, List *updates)
}
/*
* Add a heap tid to an existing element
* Add a heap TID to an existing element
*/
static bool
HnswAddDuplicate(Relation index, HnswElement element, HnswElement dup)
@@ -371,10 +373,10 @@ HnswAddDuplicate(Relation index, HnswElement element, HnswElement dup)
return false;
}
/* Add heap tid */
/* Add heap TID */
etup->heaptids[i] = *((ItemPointer) linitial(element->heaptids));
/* Update index tuple */
/* Overwrite tuple */
if (!PageIndexTupleOverwrite(page, dup->offno, (Item) etup, etupSize))
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));

File diff suppressed because it is too large Load Diff

View File

@@ -20,7 +20,7 @@ DeletedContains(HTAB *deleted, ItemPointer indextid)
}
/*
* Remove deleted heap tids
* Remove deleted heap TIDs
*
* OK to remove for entry point, since always considered for searches and inserts
*/
@@ -114,6 +114,7 @@ RemoveHeapTids(HnswVacuumState * vacuumstate)
/* Keep track of highest non-entry point */
highestPoint->blkno = blkno;
highestPoint->offno = offno;
highestPoint->level = etup->level;
highestLevel = etup->level;
}
}
@@ -142,22 +143,18 @@ NeedsUpdated(HnswVacuumState * vacuumstate, HnswElement element)
BufferAccessStrategy bas = vacuumstate->bas;
Buffer buf;
Page page;
ItemId itemid;
int neighborCount;
HnswNeighborTuple ntup;
bool needsUpdated = false;
buf = ReadBufferExtended(index, MAIN_FORKNUM, element->neighborPage, RBM_NORMAL, bas);
LockBuffer(buf, BUFFER_LOCK_SHARE);
page = BufferGetPage(buf);
itemid = PageGetItemId(page, element->neighborOffno);
ntup = (HnswNeighborTuple) PageGetItem(page, itemid);
neighborCount = HNSW_NEIGHBOR_COUNT(itemid);
ntup = (HnswNeighborTuple) PageGetItem(page, PageGetItemId(page, element->neighborOffno));
Assert(HnswIsNeighborTuple(ntup));
/* Check neighbors */
for (int i = 0; i < neighborCount; i++)
for (int i = 0; i < ntup->count; i++)
{
HnswNeighborTupleItem *neighbor = &ntup->neighbors[i];
@@ -213,26 +210,32 @@ RepairGraphElement(HnswVacuumState * vacuumstate, HnswElement element)
return;
entryPoint = &vacuumstate->highestPoint;
/* Reset neighbors from previous update */
entryPoint->neighbors = NULL;
}
else
entryPoint = NULL;
}
/* Init fields */
HnswInitNeighbors(element, m);
element->heaptids = NIL;
/* Add element to graph, skipping itself */
HnswInsertElement(element, entryPoint, index, procinfo, collation, m, efConstruction, NULL, true);
/* Write out new neighbors on page */
/* Update neighbor tuple */
/* Do this before getting page to minimize locking */
HnswSetNeighborTuple(ntup, element, m);
/* Get neighbor page */
buf = ReadBufferExtended(index, MAIN_FORKNUM, element->neighborPage, RBM_NORMAL, bas);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
state = GenericXLogStart(index);
page = GenericXLogRegisterBuffer(state, buf, 0);
/* Update neighbors */
HnswSetNeighborTuple(ntup, element, m);
/* Overwrite tuple */
if (!PageIndexTupleOverwrite(page, element->neighborOffno, (Item) ntup, ntupSize))
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
@@ -261,6 +264,7 @@ RepairGraphEntryPoint(HnswVacuumState * vacuumstate)
RepairGraphElement(vacuumstate, highestPoint);
}
/* See if entry point needs updated */
entryPoint = HnswGetEntryPoint(index);
if (entryPoint != NULL)
{
@@ -402,7 +406,6 @@ MarkDeleted(HnswVacuumState * vacuumstate)
Page npage;
BlockNumber neighborPage;
OffsetNumber neighborOffno;
int neighborCount;
/* Skip neighbor tuples */
if (!HnswIsElementTuple(etup))
@@ -412,20 +415,20 @@ MarkDeleted(HnswVacuumState * vacuumstate)
if (etup->deleted)
continue;
/* Skip live tuples */
if (ItemPointerIsValid(&etup->heaptids[0]))
{
stats->num_index_tuples++;
continue;
}
/* Update stats */
stats->tuples_removed++;
/* Calculate sizes */
etupSize = HNSW_ELEMENT_TUPLE_SIZE(etup->vec.dim);
ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(etup->level, vacuumstate->m);
neighborCount = (etup->level + 2) * vacuumstate->m;
/* Get neighbor page */
neighborPage = ItemPointerGetBlockNumber(&etup->neighbortid);
neighborOffno = ItemPointerGetOffsetNumber(&etup->neighbortid);
@@ -449,15 +452,17 @@ MarkDeleted(HnswVacuumState * vacuumstate)
MemSet(&etup->vec.x, 0, etup->vec.dim * sizeof(float));
/* Overwrite neighbors */
for (int i = 0; i < neighborCount; i++)
for (int i = 0; i < ntup->count; i++)
{
ItemPointerSetInvalid(&ntup->neighbors[i].indextid);
ntup->neighbors[i].distance = NAN;
}
/* Overwrite element tuple */
if (!PageIndexTupleOverwrite(page, offno, (Item) etup, etupSize))
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
/* Overwrite neighbor tuple */
if (!PageIndexTupleOverwrite(npage, neighborOffno, (Item) ntup, ntupSize))
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
@@ -543,7 +548,7 @@ hnswbulkdelete(IndexVacuumInfo *info, IndexBulkDeleteResult *stats,
InitVacuumState(&vacuumstate, info, stats, callback, callback_state);
/* Pass 1: Remove heap tids */
/* Pass 1: Remove heap TIDs */
RemoveHeapTids(&vacuumstate);
/* Pass 2: Repair graph */