Improved performance of on-disk HNSW index builds

This commit is contained in:
Andrew Kane
2024-05-26 09:19:52 -04:00
parent ff9b22977e
commit 385f4a707e
4 changed files with 14 additions and 10 deletions

View File

@@ -1,5 +1,6 @@
## 0.7.1 (unreleased)
- Improved performance of on-disk HNSW index builds
- Fixed `undefined symbol` error with GCC 8
- Fixed compilation error with universal binaries on Mac
- Fixed compilation warning with Clang < 14

View File

@@ -393,7 +393,7 @@ void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator *
bool HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, ItemPointer heap_tid, bool building);
void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting, bool building);
void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec);
void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec);
void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *loadVecDistance);
void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element);
void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm, int lc, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation);
void HnswLoadNeighbors(HnswElement element, Relation index, int m);

View File

@@ -545,7 +545,7 @@ HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHe
* Load an element and optionally get its distance from q
*/
void
HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec)
HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *loadVecDistance)
{
Buffer buf;
Page page;
@@ -560,9 +560,6 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index,
Assert(HnswIsElementTuple(etup));
/* Load element */
HnswLoadElementFromTuple(element, etup, true, loadVec);
/* Calculate distance */
if (distance != NULL)
{
@@ -570,8 +567,14 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index,
*distance = 0;
else
*distance = (float) DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->data)));
if (loadVecDistance != NULL && *distance < *loadVecDistance)
loadVec = true;
}
/* Load element */
HnswLoadElementFromTuple(element, etup, true, loadVec);
UnlockReleaseBuffer(buf);
}
@@ -599,7 +602,7 @@ HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index,
if (index == NULL)
hc->distance = GetCandidateDistance(base, hc, q, procinfo, collation);
else
HnswLoadElement(entryPoint, &hc->distance, &q, index, procinfo, collation, loadVec);
HnswLoadElement(entryPoint, &hc->distance, &q, index, procinfo, collation, loadVec, NULL);
return hc;
}
@@ -801,7 +804,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
if (index == NULL)
eDistance = GetCandidateDistance(base, e, q, procinfo, collation);
else
HnswLoadElement(eElement, &eDistance, &q, index, procinfo, collation, inserting);
HnswLoadElement(eElement, &eDistance, &q, index, procinfo, collation, inserting && wlen < ef, inserting ? &f->distance : NULL);
Assert(!eElement->deleted);
@@ -1102,7 +1105,7 @@ HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm
HnswElement hc3Element = HnswPtrAccess(base, hc3->element);
if (HnswPtrIsNull(base, hc3Element->value))
HnswLoadElement(hc3Element, &hc3->distance, &q, index, procinfo, collation, true);
HnswLoadElement(hc3Element, &hc3->distance, &q, index, procinfo, collation, true, NULL);
else
hc3->distance = GetCandidateDistance(base, hc3, q, procinfo, collation);

View File

@@ -256,7 +256,7 @@ RepairGraphEntryPoint(HnswVacuumState * vacuumstate)
LockPage(index, HNSW_UPDATE_LOCK, ShareLock);
/* Load element */
HnswLoadElement(highestPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true);
HnswLoadElement(highestPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true, NULL);
/* Repair if needed */
if (NeedsUpdated(vacuumstate, highestPoint))
@@ -294,7 +294,7 @@ RepairGraphEntryPoint(HnswVacuumState * vacuumstate)
* is outdated, this can remove connections at higher levels in
* the graph until they are repaired, but this should be fine.
*/
HnswLoadElement(entryPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true);
HnswLoadElement(entryPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true, NULL);
if (NeedsUpdated(vacuumstate, entryPoint))
{