Use datum for HNSW

This commit is contained in:
Andrew Kane
2023-09-12 22:18:19 -07:00
parent 9ac825d14e
commit d57a34b25c
5 changed files with 41 additions and 34 deletions

View File

@@ -57,7 +57,7 @@
/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 */
#define PROGRESS_HNSW_PHASE_LOAD 2
#define HNSW_ELEMENT_TUPLE_SIZE(_dim) MAXALIGN(offsetof(HnswElementTupleData, vec) + VECTOR_SIZE(_dim))
#define HNSW_ELEMENT_TUPLE_SIZE(_datum) MAXALIGN(offsetof(HnswElementTupleData, value) + VARSIZE_ANY(_datum))
#define HNSW_NEIGHBOR_TUPLE_SIZE(level, m) MAXALIGN(offsetof(HnswNeighborTupleData, indextids) + ((level) + 2) * (m) * sizeof(ItemPointerData))
#define HnswPageGetOpaque(page) ((HnswPageOpaque) PageGetSpecialPointer(page))
@@ -96,12 +96,13 @@ typedef struct HnswElementData
List *heaptids;
uint8 level;
uint8 deleted;
bool loaded;
HnswNeighborArray *neighbors;
BlockNumber blkno;
OffsetNumber offno;
OffsetNumber neighborOffno;
BlockNumber neighborPage;
Vector *vec;
Datum value;
} HnswElementData;
typedef HnswElementData * HnswElement;
@@ -200,7 +201,7 @@ typedef struct HnswElementTupleData
ItemPointerData heaptids[HNSW_HEAPTIDS];
ItemPointerData neighbortid;
uint16 unused2;
Vector vec;
char value[FLEXIBLE_ARRAY_MEMBER];
} HnswElementTupleData;
typedef HnswElementTupleData * HnswElementTuple;

View File

@@ -8,6 +8,7 @@
#include "lib/pairingheap.h"
#include "nodes/pg_list.h"
#include "storage/bufmgr.h"
#include "utils/datum.h"
#include "utils/memutils.h"
#if PG_VERSION_NUM >= 140000
@@ -106,8 +107,6 @@ CreateElementPages(HnswBuildState * buildstate)
{
Relation index = buildstate->index;
ForkNumber forkNum = buildstate->forkNum;
int dimensions = buildstate->dimensions;
Size etupSize;
Size maxSize;
HnswElementTuple etup;
HnswNeighborTuple ntup;
@@ -119,10 +118,9 @@ CreateElementPages(HnswBuildState * buildstate)
/* Calculate sizes */
maxSize = BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(HnswPageOpaqueData));
etupSize = HNSW_ELEMENT_TUPLE_SIZE(dimensions);
/* Allocate once */
etup = palloc0(etupSize);
etup = palloc0(maxSize);
ntup = palloc0(maxSize);
/* Prepare first page */
@@ -134,12 +132,14 @@ CreateElementPages(HnswBuildState * buildstate)
foreach(lc, buildstate->elements)
{
HnswElement element = lfirst(lc);
Size etupSize;
Size ntupSize;
Size combinedSize;
HnswSetElementTuple(etup, element);
/* Calculate sizes */
etupSize = HNSW_ELEMENT_TUPLE_SIZE(element->value);
ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, buildstate->m);
combinedSize = etupSize + ntupSize + sizeof(ItemIdData);
@@ -276,18 +276,15 @@ InsertTuple(Relation index, Datum *values, HnswElement element, HnswBuildState *
int m = buildstate->m;
/* Detoast once for all calls */
Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
element->value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
/* Normalize if needed */
if (buildstate->normprocinfo != NULL)
{
if (!HnswNormValue(buildstate->normprocinfo, collation, &value, buildstate->normvec))
if (!HnswNormValue(buildstate->normprocinfo, collation, &element->value, buildstate->normvec))
return false;
}
/* Copy value to element so accessible outside of memory context */
memcpy(element->vec, DatumGetVector(value), VECTOR_SIZE(buildstate->dimensions));
/* Insert element in graph */
HnswInsertElement(element, entryPoint, NULL, procinfo, collation, m, efConstruction, false);
@@ -363,7 +360,6 @@ BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values,
/* Allocate necessary memory outside of memory context */
element = HnswInitElement(tid, buildstate->m, buildstate->ml, buildstate->maxLevel);
element->vec = palloc(VECTOR_SIZE(buildstate->dimensions));
/* Use memory context since detoast can allocate */
oldCtx = MemoryContextSwitchTo(buildstate->tmpCtx);
@@ -371,9 +367,8 @@ BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values,
/* Insert tuple */
inserted = InsertTuple(index, values, element, buildstate, &dup);
/* Reset memory context */
/* Switch memory context */
MemoryContextSwitchTo(oldCtx);
MemoryContextReset(buildstate->tmpCtx);
/* Add outside memory context */
if (dup != NULL)
@@ -381,9 +376,15 @@ BuildCallback(Relation index, CALLBACK_ITEM_POINTER, Datum *values,
/* Add to buildstate or free */
if (inserted)
{
element->value = datumTransfer(element->value, false, -1);
buildstate->elements = lappend(buildstate->elements, element);
}
else
HnswFreeElement(element);
/* Reset memory context */
MemoryContextReset(buildstate->tmpCtx);
}
/*

View File

@@ -123,7 +123,6 @@ WriteNewElementPages(Relation index, HnswElement e, int m, BlockNumber insertPag
Size minCombinedSize;
HnswElementTuple etup;
BlockNumber currentPage = insertPage;
int dimensions = e->vec->dim;
HnswNeighborTuple ntup;
Buffer nbuf;
Page npage;
@@ -132,7 +131,7 @@ WriteNewElementPages(Relation index, HnswElement e, int m, BlockNumber insertPag
BlockNumber newInsertPage = InvalidBlockNumber;
/* Calculate sizes */
etupSize = HNSW_ELEMENT_TUPLE_SIZE(dimensions);
etupSize = HNSW_ELEMENT_TUPLE_SIZE(e->value);
ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(e->level, m);
combinedSize = etupSize + ntupSize + sizeof(ItemIdData);
maxSize = BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(HnswPageOpaqueData));
@@ -411,7 +410,7 @@ HnswAddDuplicate(Relation index, HnswElement element, HnswElement dup)
Buffer buf;
Page page;
GenericXLogState *state;
Size etupSize = HNSW_ELEMENT_TUPLE_SIZE(dup->vec->dim);
Size etupSize = HNSW_ELEMENT_TUPLE_SIZE(dup->value);
HnswElementTuple etup;
int i;
@@ -522,7 +521,7 @@ HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_ti
/* Create an element */
element = HnswInitElement(heap_tid, m, HnswGetMl(m), HnswGetMaxLevel(m));
element->vec = DatumGetVector(value);
element->value = value;
/* Prevent concurrent inserts when likely updating entry point */
if (entryPoint == NULL || element->level > entryPoint->level)

View File

@@ -4,6 +4,7 @@
#include "hnsw.h"
#include "storage/bufmgr.h"
#include "utils/datum.h"
#include "vector.h"
/*
@@ -187,7 +188,6 @@ HnswFreeElement(HnswElement element)
{
HnswFreeNeighbors(element);
list_free_deep(element->heaptids);
pfree(element->vec);
pfree(element);
}
@@ -214,7 +214,7 @@ HnswInitElementFromBlock(BlockNumber blkno, OffsetNumber offno)
element->blkno = blkno;
element->offno = offno;
element->neighbors = NULL;
element->vec = NULL;
element->loaded = false;
return element;
}
@@ -324,7 +324,7 @@ HnswSetElementTuple(HnswElementTuple etup, HnswElement element)
else
ItemPointerSetInvalid(&etup->heaptids[i]);
}
memcpy(&etup->vec, element->vec, VECTOR_SIZE(element->vec->dim));
memcpy(&etup->value, DatumGetPointer(element->value), VARSIZE_ANY(element->value));
}
/*
@@ -447,8 +447,10 @@ HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHe
if (loadVec)
{
element->vec = palloc(VECTOR_SIZE(etup->vec.dim));
memcpy(element->vec, &etup->vec, VECTOR_SIZE(etup->vec.dim));
Datum value = PointerGetDatum(&etup->value);
element->value = datumCopy(value, false, -1);
element->loaded = true;
}
}
@@ -476,7 +478,7 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index,
/* Calculate distance */
if (distance != NULL)
*distance = (float) DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->vec)));
*distance = (float) DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->value)));
UnlockReleaseBuffer(buf);
}
@@ -487,7 +489,7 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index,
static float
GetCandidateDistance(HnswCandidate * hc, Datum q, FmgrInfo *procinfo, Oid collation)
{
return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, q, PointerGetDatum(hc->element->vec)));
return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, q, hc->element->value));
}
/*
@@ -722,7 +724,7 @@ HnswGetDistance(HnswElement a, HnswElement b, int lc, FmgrInfo *procinfo, Oid co
}
}
return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, PointerGetDatum(a->vec), PointerGetDatum(b->vec)));
return DatumGetFloat8(FunctionCall2Coll(procinfo, collation, a->value, b->value));
}
/*
@@ -805,7 +807,7 @@ HnswFindDuplicate(HnswElement e)
HnswCandidate *neighbor = &neighbors->items[i];
/* Exit early since ordered by distance */
if (vector_cmp_internal(e->vec, neighbor->element->vec) != 0)
if (!datumIsEqual(e->value, neighbor->element->value, false, -1))
break;
/* Check for space */
@@ -880,13 +882,13 @@ HnswUpdateConnection(HnswElement element, HnswCandidate * hc, int m, int lc, int
/* Load elements on insert */
if (index != NULL)
{
Datum q = PointerGetDatum(hc->element->vec);
Datum q = hc->element->value;
for (int i = 0; i < currentNeighbors->length; i++)
{
HnswCandidate *hc3 = &currentNeighbors->items[i];
if (hc3->element->vec == NULL)
if (!hc3->element->loaded)
HnswLoadElement(hc3->element, &hc3->distance, &q, index, procinfo, collation, true);
else
hc3->distance = GetCandidateDistance(hc3, q, procinfo, collation);
@@ -968,7 +970,7 @@ HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, F
List *w;
int level = element->level;
int entryLevel;
Datum q = PointerGetDatum(element->vec);
Datum q = element->value;
HnswElement skipElement = existing ? element : NULL;
/* No neighbors if no entry point */

View File

@@ -93,7 +93,7 @@ RemoveHeapTids(HnswVacuumState * vacuumstate)
if (itemUpdated)
{
Size etupSize = HNSW_ELEMENT_TUPLE_SIZE(etup->vec.dim);
Size etupSize = HNSW_ELEMENT_TUPLE_SIZE(PointerGetDatum(&etup->value));
/* Mark rest as invalid */
for (int i = idx; i < HNSW_HEAPTIDS; i++)
@@ -485,6 +485,7 @@ MarkDeleted(HnswVacuumState * vacuumstate)
HnswNeighborTuple ntup;
Size etupSize;
Size ntupSize;
Datum value;
Buffer nbuf;
Page npage;
BlockNumber neighborPage;
@@ -508,8 +509,11 @@ MarkDeleted(HnswVacuumState * vacuumstate)
if (ItemPointerIsValid(&etup->heaptids[0]))
continue;
/* Get datum */
value = PointerGetDatum(&etup->value);
/* Calculate sizes */
etupSize = HNSW_ELEMENT_TUPLE_SIZE(etup->vec.dim);
etupSize = HNSW_ELEMENT_TUPLE_SIZE(value);
ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(etup->level, vacuumstate->m);
/* Get neighbor page */
@@ -532,7 +536,7 @@ MarkDeleted(HnswVacuumState * vacuumstate)
/* Overwrite element */
etup->deleted = 1;
MemSet(&etup->vec.x, 0, etup->vec.dim * sizeof(float));
MemSet(&etup->value, 0, VARSIZE_ANY(value));
/* Overwrite neighbors */
for (int i = 0; i < ntup->count; i++)