mirror of
https://github.com/pgvector/pgvector.git
synced 2026-06-06 05:51:21 +08:00
Improved performance of on-disk HNSW index builds
This commit is contained in:
@@ -1,5 +1,6 @@
|
||||
## 0.7.1 (unreleased)
|
||||
|
||||
- Improved performance of on-disk HNSW index builds
|
||||
- Fixed `undefined symbol` error with GCC 8
|
||||
- Fixed compilation error with universal binaries on Mac
|
||||
- Fixed compilation warning with Clang < 14
|
||||
|
||||
@@ -393,7 +393,7 @@ void HnswInitNeighbors(char *base, HnswElement element, int m, HnswAllocator *
|
||||
bool HnswInsertTupleOnDisk(Relation index, Datum value, Datum *values, bool *isnull, ItemPointer heap_tid, bool building);
|
||||
void HnswUpdateNeighborsOnDisk(Relation index, FmgrInfo *procinfo, Oid collation, HnswElement e, int m, bool checkExisting, bool building);
|
||||
void HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHeaptids, bool loadVec);
|
||||
void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec);
|
||||
void HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *loadVecDistance);
|
||||
void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element);
|
||||
void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm, int lc, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation);
|
||||
void HnswLoadNeighbors(HnswElement element, Relation index, int m);
|
||||
|
||||
@@ -545,7 +545,7 @@ HnswLoadElementFromTuple(HnswElement element, HnswElementTuple etup, bool loadHe
|
||||
* Load an element and optionally get its distance from q
|
||||
*/
|
||||
void
|
||||
HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec)
|
||||
HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index, FmgrInfo *procinfo, Oid collation, bool loadVec, float *loadVecDistance)
|
||||
{
|
||||
Buffer buf;
|
||||
Page page;
|
||||
@@ -560,9 +560,6 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index,
|
||||
|
||||
Assert(HnswIsElementTuple(etup));
|
||||
|
||||
/* Load element */
|
||||
HnswLoadElementFromTuple(element, etup, true, loadVec);
|
||||
|
||||
/* Calculate distance */
|
||||
if (distance != NULL)
|
||||
{
|
||||
@@ -570,8 +567,14 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index,
|
||||
*distance = 0;
|
||||
else
|
||||
*distance = (float) DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->data)));
|
||||
|
||||
if (loadVecDistance != NULL && *distance < *loadVecDistance)
|
||||
loadVec = true;
|
||||
}
|
||||
|
||||
/* Load element */
|
||||
HnswLoadElementFromTuple(element, etup, true, loadVec);
|
||||
|
||||
UnlockReleaseBuffer(buf);
|
||||
}
|
||||
|
||||
@@ -599,7 +602,7 @@ HnswEntryCandidate(char *base, HnswElement entryPoint, Datum q, Relation index,
|
||||
if (index == NULL)
|
||||
hc->distance = GetCandidateDistance(base, hc, q, procinfo, collation);
|
||||
else
|
||||
HnswLoadElement(entryPoint, &hc->distance, &q, index, procinfo, collation, loadVec);
|
||||
HnswLoadElement(entryPoint, &hc->distance, &q, index, procinfo, collation, loadVec, NULL);
|
||||
return hc;
|
||||
}
|
||||
|
||||
@@ -801,7 +804,7 @@ HnswSearchLayer(char *base, Datum q, List *ep, int ef, int lc, Relation index, F
|
||||
if (index == NULL)
|
||||
eDistance = GetCandidateDistance(base, e, q, procinfo, collation);
|
||||
else
|
||||
HnswLoadElement(eElement, &eDistance, &q, index, procinfo, collation, inserting);
|
||||
HnswLoadElement(eElement, &eDistance, &q, index, procinfo, collation, inserting && wlen < ef, inserting ? &f->distance : NULL);
|
||||
|
||||
Assert(!eElement->deleted);
|
||||
|
||||
@@ -1102,7 +1105,7 @@ HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm
|
||||
HnswElement hc3Element = HnswPtrAccess(base, hc3->element);
|
||||
|
||||
if (HnswPtrIsNull(base, hc3Element->value))
|
||||
HnswLoadElement(hc3Element, &hc3->distance, &q, index, procinfo, collation, true);
|
||||
HnswLoadElement(hc3Element, &hc3->distance, &q, index, procinfo, collation, true, NULL);
|
||||
else
|
||||
hc3->distance = GetCandidateDistance(base, hc3, q, procinfo, collation);
|
||||
|
||||
|
||||
@@ -256,7 +256,7 @@ RepairGraphEntryPoint(HnswVacuumState * vacuumstate)
|
||||
LockPage(index, HNSW_UPDATE_LOCK, ShareLock);
|
||||
|
||||
/* Load element */
|
||||
HnswLoadElement(highestPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true);
|
||||
HnswLoadElement(highestPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true, NULL);
|
||||
|
||||
/* Repair if needed */
|
||||
if (NeedsUpdated(vacuumstate, highestPoint))
|
||||
@@ -294,7 +294,7 @@ RepairGraphEntryPoint(HnswVacuumState * vacuumstate)
|
||||
* is outdated, this can remove connections at higher levels in
|
||||
* the graph until they are repaired, but this should be fine.
|
||||
*/
|
||||
HnswLoadElement(entryPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true);
|
||||
HnswLoadElement(entryPoint, NULL, NULL, index, vacuumstate->procinfo, vacuumstate->collation, true, NULL);
|
||||
|
||||
if (NeedsUpdated(vacuumstate, entryPoint))
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user