Improved performance of on-disk HNSW index builds - #570

This commit is contained in:
Andrew Kane
2024-05-29 12:03:58 -07:00
parent ff9b22977e
commit 49c1f13095
4 changed files with 21 additions and 18 deletions

View File

@@ -1,5 +1,6 @@
## 0.7.1 (unreleased)
- Improved performance of on-disk HNSW index builds
- Fixed `undefined symbol` error with GCC 8
- Fixed compilation error with universal binaries on Mac
- Fixed compilation warning with Clang < 14

View File

@@ -393,7 +393,7 @@ void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator *
bool HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, ItemPointer heap_tid, bool building);
void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting, bool building);
void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec);
void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec);
void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance);
void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element);
void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm, int lc, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation);
void HnswLoadNeighbors(HnswElement element, Relation index, int m);

View File

@@ -545,7 +545,7 @@ HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHe
* Load an element and optionally get its distance from q
*/
void
HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec)
HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *maxDistance)
{
Buffer buf;
Page page;
@@ -560,9 +560,6 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index,
Assert(HnswIsElementTuple(etup));
/* Load element */
HnswLoadElementFromTuple(element, etup, true, loadVec);
/* Calculate distance */
if (distance != NULL)
{
@@ -572,6 +569,10 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index,
*distance = (float) DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->data)));
}
/* Load element */
if (distance == NULL || maxDistance == NULL || *distance < *maxDistance)
HnswLoadElementFromTuple(element, etup, true, loadVec);
UnlockReleaseBuffer(buf);
}
@@ -599,7 +600,7 @@ HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index,
if (index == NULL)
hc->distance = GetCandidateDistance(base, hc, q, procinfo, collation);
else
HnswLoadElement(entryPoint, &hc->distance, &q, index, procinfo, collation, loadVec);
HnswLoadElement(entryPoint, &hc->distance, &q, index, procinfo, collation, loadVec, NULL);
return hc;
}
@@ -801,19 +802,20 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
if (index == NULL)
eDistance = GetCandidateDistance(base, e, q, procinfo, collation);
else
HnswLoadElement(eElement, &eDistance, &q, index, procinfo, collation, inserting);
Assert(!eElement->deleted);
/* Make robust to issues */
if (eElement->level < lc)
continue;
HnswLoadElement(eElement, &eDistance, &q, index, procinfo, collation, inserting, wlen >= ef ? &f->distance : NULL);
if (eDistance < f->distance || wlen < ef)
{
/* Copy e */
HnswCandidate *ec = palloc(sizeof(HnswCandidate));
HnswCandidate *ec;
Assert(!eElement->deleted);
/* Make robust to issues */
if (eElement->level < lc)
continue;
/* Copy e */
ec = palloc(sizeof(HnswCandidate));
HnswPtrStore(base, ec->element, eElement);
ec->distance = eDistance;
@@ -1102,7 +1104,7 @@ HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm
HnswElement hc3Element = HnswPtrAccess(base, hc3->element);
if (HnswPtrIsNull(base, hc3Element->value))
HnswLoadElement(hc3Element, &hc3->distance, &q, index, procinfo, collation, true);
HnswLoadElement(hc3Element, &hc3->distance, &q, index, procinfo, collation, true, NULL);
else
hc3->distance = GetCandidateDistance(base, hc3, q, procinfo, collation);

View File

@@ -256,7 +256,7 @@ RepairGraphEntryPoint(HnswVacuumState * vacuumstate)
LockPage(index, HNSW_UPDATE_LOCK, ShareLock);
/* Load element */
HnswLoadElement(highestPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true);
HnswLoadElement(highestPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true, NULL);
/* Repair if needed */
if (NeedsUpdated(vacuumstate, highestPoint))
@@ -294,7 +294,7 @@ RepairGraphEntryPoint(HnswVacuumState * vacuumstate)
* is outdated, this can remove connections at higher levels in
* the graph until they are repaired, but this should be fine.
*/
HnswLoadElement(entryPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true);
HnswLoadElement(entryPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true, NULL);
if (NeedsUpdated(vacuumstate, entryPoint))
{