diff --git a/CHANGELOG.md b/CHANGELOG.md index 3eae03e..d4fba85 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## 0.7.1 (unreleased) +- Improved performance of on-disk HNSW index builds - Fixed `undefined symbol` error with GCC 8 - Fixed compilation error with universal binaries on Mac - Fixed compilation warning with Clang < 14 diff --git a/src/hnsw.h b/src/hnsw.h index 2f11137..8f024fa 100644 --- a/src/hnsw.h +++ b/src/hnsw.h @@ -393,7 +393,7 @@ void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator * bool HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, ItemPointer heap_tid, bool building); void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting, bool building); void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec); -void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec); +void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *loadVecDistance); void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element); void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm, int lc, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation); void HnswLoadNeighbors(HnswElement element, Relation index, int m); diff --git a/src/hnswutils.c b/src/hnswutils.c index 48acef0..b19467a 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -545,7 +545,7 @@ HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHe * Load an element and optionally get its distance from q */ void -HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec) +HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *loadVecDistance) { Buffer buf; Page page; @@ -560,9 +560,6 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, Assert(HnswIsElementTuple(etup)); - /* Load element */ - HnswLoadElementFromTuple(element, etup, true, loadVec); - /* Calculate distance */ if (distance != NULL) { @@ -570,8 +567,14 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, *distance = 0; else *distance = (float) DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->data))); + + if (loadVecDistance != NULL && *distance < *loadVecDistance) + loadVec = true; } + /* Load element */ + HnswLoadElementFromTuple(element, etup, true, loadVec); + UnlockReleaseBuffer(buf); } @@ -599,7 +602,7 @@ HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index, if (index == NULL) hc->distance = GetCandidateDistance(base, hc, q, procinfo, collation); else - HnswLoadElement(entryPoint, &hc->distance, &q, index, procinfo, collation, loadVec); + HnswLoadElement(entryPoint, &hc->distance, &q, index, procinfo, collation, loadVec, NULL); return hc; } @@ -801,7 +804,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F if (index == NULL) eDistance = GetCandidateDistance(base, e, q, procinfo, collation); else - HnswLoadElement(eElement, &eDistance, &q, index, procinfo, collation, inserting); + HnswLoadElement(eElement, &eDistance, &q, index, procinfo, collation, inserting && wlen < ef, inserting ? &f->distance : NULL); Assert(!eElement->deleted); @@ -1102,7 +1105,7 @@ HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm HnswElement hc3Element = HnswPtrAccess(base, hc3->element); if (HnswPtrIsNull(base, hc3Element->value)) - HnswLoadElement(hc3Element, &hc3->distance, &q, index, procinfo, collation, true); + HnswLoadElement(hc3Element, &hc3->distance, &q, index, procinfo, collation, true, NULL); else hc3->distance = GetCandidateDistance(base, hc3, q, procinfo, collation); diff --git a/src/hnswvacuum.c b/src/hnswvacuum.c index 7c14e54..67cc645 100644 --- a/src/hnswvacuum.c +++ b/src/hnswvacuum.c @@ -256,7 +256,7 @@ RepairGraphEntryPoint(HnswVacuumState * vacuumstate) LockPage(index, HNSW_UPDATE_LOCK, ShareLock); /* Load element */ - HnswLoadElement(highestPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true); + HnswLoadElement(highestPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true, NULL); /* Repair if needed */ if (NeedsUpdated(vacuumstate, highestPoint)) @@ -294,7 +294,7 @@ RepairGraphEntryPoint(HnswVacuumState * vacuumstate) * is outdated, this can remove connections at higher levels in * the graph until they are repaired, but this should be fine. */ - HnswLoadElement(entryPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true); + HnswLoadElement(entryPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true, NULL); if (NeedsUpdated(vacuumstate, entryPoint)) {