From 57c05c59a2c5ba0f786d55a650da7517170fdf7e Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 9 Oct 2024 20:50:17 -0700 Subject: [PATCH 1/2] DRY code for forming index value --- src/hnsw.h | 3 ++- src/hnswbuild.c | 24 ++++++------------------ src/hnswinsert.c | 29 ++++++++--------------------- src/hnswutils.c | 27 +++++++++++++++++++++++++++ 4 files changed, 43 insertions(+), 40 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index 116d9bc..f9e5621 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -388,10 +388,11 @@ void HnswSetNeighborTuple(char *base, HnswNeighborTuple ntup, HnswElement e, in void HnswAddHeapTid(HnswElement element, ItemPointer heaptid); HnswNeighborArray *HnswInitNeighborArray(int lm, HnswAllocator * allocator); void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator * alloc); -bool HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, ItemPointer heap_tid, bool building); +bool HnswInsertTupleOnDisk(Relation index, Datum value, ItemPointer heaptid, bool building); void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting, bool building); void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec); void HnswLoadElement(HnswElement element, double *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, double *maxDistance); +bool HnswFormIndexValue(Datum *out, Datum *values, bool *isnull, const HnswTypeInfo * typeInfo, FmgrInfo *normprocinfo, Oid collation); void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element); void HnswUpdateConnection(char *base, HnswNeighborArray * neighbors, HnswElement newElement, float distance, int lm, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation); bool HnswLoadNeighborTids(HnswElement element, ItemPointerData *indextids, Relation index, int m, int lm, int lc); diff --git a/src/hnswbuild.c b/src/hnswbuild.c index 87d4823..02e1749 100644 --- a/src/hnswbuild.c +++ b/src/hnswbuild.c @@ -473,7 +473,6 @@ InsertTupleInMemory(HnswBuildState * buildstate, HnswElement element) static bool InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, HnswBuildState * buildstate) { - const HnswTypeInfo *typeInfo = buildstate->typeInfo; HnswGraph *graph = buildstate->graph; HnswElement element; HnswAllocator *allocator = &buildstate->allocator; @@ -481,22 +480,11 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn Pointer valuePtr; LWLock *flushLock = &graph->flushLock; char *base = buildstate->hnswarea; + Datum value; - /* Detoast once for all calls */ - Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); - - /* Check value */ - if (typeInfo->checkValue != NULL) - typeInfo->checkValue(DatumGetPointer(value)); - - /* Normalize if needed */ - if (buildstate->normprocinfo != NULL) - { - if (!HnswCheckNorm(buildstate->normprocinfo, buildstate->collation, value)) - return false; - - value = HnswNormValue(typeInfo, buildstate->collation, value); - } + /* Form index value */ + if (!HnswFormIndexValue(&value, values, isnull, buildstate->typeInfo, buildstate->normprocinfo, buildstate->collation)) + return false; /* Get datum size */ valueSize = VARSIZE_ANY(DatumGetPointer(value)); @@ -509,7 +497,7 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn { LWLockRelease(flushLock); - return HnswInsertTupleOnDisk(index, value, values, isnull, heaptid, true); + return HnswInsertTupleOnDisk(index, value, heaptid, true); } /* @@ -541,7 +529,7 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn LWLockRelease(flushLock); - return HnswInsertTupleOnDisk(index, value, values, isnull, heaptid, true); + return HnswInsertTupleOnDisk(index, value, heaptid, true); } /* Ok, we can proceed to allocate the element */ diff --git a/src/hnswinsert.c b/src/hnswinsert.c index 2dfd8d3..9c5b190 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -679,7 +679,7 @@ UpdateGraphOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement * Insert a tuple into the index */ bool -HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, ItemPointer heap_tid, bool building) +HnswInsertTupleOnDisk(Relation index, Datum value, ItemPointer heaptid, bool building) { HnswElement entryPoint; HnswElement element; @@ -701,7 +701,7 @@ HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, HnswGetMetaPageInfo(index, &m, &entryPoint); /* Create an element */ - element = HnswInitElement(base, heap_tid, m, HnswGetMl(m), HnswGetMaxLevel(m), NULL); + element = HnswInitElement(base, heaptid, m, HnswGetMl(m), HnswGetMaxLevel(m), NULL); HnswPtrStore(base, element->value, DatumGetPointer(value)); /* Prevent concurrent inserts when likely updating entry point */ @@ -734,31 +734,18 @@ HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, * Insert a tuple into the index */ static void -HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid) +HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid) { Datum value; const HnswTypeInfo *typeInfo = HnswGetTypeInfo(index); - FmgrInfo *normprocinfo; + FmgrInfo *normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); Oid collation = index->rd_indcollation[0]; - /* Detoast once for all calls */ - value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + /* Form index value */ + if (!HnswFormIndexValue(&value, values, isnull, typeInfo, normprocinfo, collation)) + return; - /* Check value */ - if (typeInfo->checkValue != NULL) - typeInfo->checkValue(DatumGetPointer(value)); - - /* Normalize if needed */ - normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); - if (normprocinfo != NULL) - { - if (!HnswCheckNorm(normprocinfo, collation, value)) - return; - - value = HnswNormValue(typeInfo, collation, value); - } - - HnswInsertTupleOnDisk(index, value, values, isnull, heap_tid, false); + HnswInsertTupleOnDisk(index, value, heaptid, false); } /* diff --git a/src/hnswutils.c b/src/hnswutils.c index 856c309..743fa87 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -394,6 +394,33 @@ HnswUpdateMetaPage(Relation index, int updateEntry, HnswElement entryPoint, Bloc UnlockReleaseBuffer(buf); } +/* + * Form index value + */ +bool +HnswFormIndexValue(Datum *out, Datum *values, bool *isnull, const HnswTypeInfo * typeInfo, FmgrInfo *normprocinfo, Oid collation) +{ + /* Detoast once for all calls */ + Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0])); + + /* Check value */ + if (typeInfo->checkValue != NULL) + typeInfo->checkValue(DatumGetPointer(value)); + + /* Normalize if needed */ + if (normprocinfo != NULL) + { + if (!HnswCheckNorm(normprocinfo, collation, value)) + return false; + + value = HnswNormValue(typeInfo, collation, value); + } + + *out = value; + + return true; +} + /* * Set element tuple, except for neighbor info */ From a98534e5ab4735c4a33adb8ea63aef5b832ae5c0 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 9 Oct 2024 21:03:18 -0700 Subject: [PATCH 2/2] DRY HNSW procinfo --- src/hnsw.h | 1 + src/hnswbuild.c | 4 +--- src/hnswinsert.c | 6 ++++-- src/hnswscan.c | 4 +--- src/hnswutils.c | 13 +++++++++++++ src/hnswvacuum.c | 4 ++-- 6 files changed, 22 insertions(+), 10 deletions(-) diff --git a/src/hnsw.h b/src/hnsw.h index f9e5621..10cc04a 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -370,6 +370,7 @@ typedef struct HnswVacuumState int HnswGetM(Relation index); int HnswGetEfConstruction(Relation index); FmgrInfo *HnswOptionalProcInfo(Relation index, uint16 procnum); +void HnswSetProcinfo(Relation index, FmgrInfo **procinfo, FmgrInfo **normprocinfo, Oid *collation); Datum HnswNormValue(const HnswTypeInfo * typeInfo, Oid collation, Datum value); bool HnswCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value); Buffer HnswNewBuffer(Relation index, ForkNumber forkNum); diff --git a/src/hnswbuild.c b/src/hnswbuild.c index 02e1749..12a2169 100644 --- a/src/hnswbuild.c +++ b/src/hnswbuild.c @@ -692,9 +692,7 @@ InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, Index buildstate->indtuples = 0; /* Get support functions */ - buildstate->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); - buildstate->normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); - buildstate->collation = index->rd_indcollation[0]; + HnswSetProcinfo(index, &buildstate->procinfo, &buildstate->normprocinfo, &buildstate->collation); InitGraph(&buildstate->graphData, NULL, (Size) maintenance_work_mem * 1024L); buildstate->graph = &buildstate->graphData; diff --git a/src/hnswinsert.c b/src/hnswinsert.c index 9c5b190..b916521 100644 --- a/src/hnswinsert.c +++ b/src/hnswinsert.c @@ -685,11 +685,13 @@ HnswInsertTupleOnDisk(Relation index, Datum value, ItemPointer heaptid, bool bui HnswElement element; int m; int efConstruction = HnswGetEfConstruction(index); - FmgrInfo *procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); - Oid collation = index->rd_indcollation[0]; + FmgrInfo *procinfo; + Oid collation; LOCKMODE lockmode = ShareLock; char *base = NULL; + HnswSetProcinfo(index, &procinfo, NULL, &collation); + /* * Get a shared lock. This allows vacuum to ensure no in-flight inserts * before repairing graph. Use a page lock so it does not interfere with diff --git a/src/hnswscan.c b/src/hnswscan.c index 30815af..88ecf68 100644 --- a/src/hnswscan.c +++ b/src/hnswscan.c @@ -86,9 +86,7 @@ hnswbeginscan(Relation index, int nkeys, int norderbys) ALLOCSET_DEFAULT_SIZES); /* Set support functions */ - so->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); - so->normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); - so->collation = index->rd_indcollation[0]; + HnswSetProcinfo(index, &so->procinfo, &so->normprocinfo, &so->collation); scan->opaque = so; diff --git a/src/hnswutils.c b/src/hnswutils.c index 743fa87..198e438 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -153,6 +153,19 @@ HnswOptionalProcInfo(Relation index, uint16 procnum) return index_getprocinfo(index, 1, procnum); } +/* + * Set procinfo + */ +void +HnswSetProcinfo(Relation index, FmgrInfo **procinfo, FmgrInfo **normprocinfo, Oid *collation) +{ + *procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); + *collation = index->rd_indcollation[0]; + + if (normprocinfo != NULL) + *normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC); +} + /* * Normalize value */ diff --git a/src/hnswvacuum.c b/src/hnswvacuum.c index 67cc645..7931f85 100644 --- a/src/hnswvacuum.c +++ b/src/hnswvacuum.c @@ -573,13 +573,13 @@ InitVacuumState(HnswVacuumState * vacuumstate, IndexVacuumInfo *info, IndexBulkD vacuumstate->callback_state = callback_state; vacuumstate->efConstruction = HnswGetEfConstruction(index); vacuumstate->bas = GetAccessStrategy(BAS_BULKREAD); - vacuumstate->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC); - vacuumstate->collation = index->rd_indcollation[0]; vacuumstate->ntup = palloc0(HNSW_TUPLE_ALLOC_SIZE); vacuumstate->tmpCtx = AllocSetContextCreate(CurrentMemoryContext, "Hnsw vacuum temporary context", ALLOCSET_DEFAULT_SIZES); + HnswSetProcinfo(index, &vacuumstate->procinfo, NULL, &vacuumstate->collation); + /* Get m from metapage */ HnswGetMetaPageInfo(index, &vacuumstate->m, NULL);