Improved performance of HNSW index scans from disk [skip ci]

This commit is contained in:
Andrew Kane
2025-09-24 18:25:01 -07:00
parent e7899b1250
commit 52184bfa10
4 changed files with 80 additions and 48 deletions

View File

@@ -1,3 +1,7 @@
## 0.8.2 (unreleased)
- Improved performance of HNSW index scans from disk
## 0.8.1 (2025-09-04)
- Added support for Postgres 18 rc1

View File

@@ -1267,6 +1267,7 @@ Thanks to:
- [k-means++: The Advantage of Careful Seeding](https://theory.stanford.edu/~sergei/papers/kMeansPP-soda.pdf)
- [Concept Decompositions for Large Sparse Text Data using Clustering](https://www.cs.utexas.edu/users/inderjit/public_papers/concept_mlj.pdf)
- [Efficient and Robust Approximate Nearest Neighbor Search using Hierarchical Navigable Small World Graphs](https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf)
- [Turbocharging Vector Databases using Modern SSDs](https://www.vldb.org/pvldb/volumes/18/paper/Turbocharging%20Vector%20Databases%20using%20Modern%20SSDs)
## History

View File

@@ -131,6 +131,69 @@ HnswBuildAppendPage(Relation index, Buffer *buf, Page *page, ForkNumber forkNum)
HnswInitPage(*buf, *page);
}
static void
WriteElement(char *base, HnswElement element, HnswElementTuple etup, HnswNeighborTuple ntup, HnswBuildState * buildstate, Size maxSize, Buffer *buf, Page *page, Relation index, ForkNumber forkNum)
{
Size etupSize;
Size ntupSize;
Size combinedSize;
Pointer valuePtr;
/* Skip if already added */
if (BlockNumberIsValid(element->neighborPage))
return;
valuePtr = HnswPtrAccess(base, element->value);
/* Zero memory for each element */
MemSet(etup, 0, HNSW_TUPLE_ALLOC_SIZE);
/* Calculate sizes */
etupSize = HNSW_ELEMENT_TUPLE_SIZE(VARSIZE_ANY(valuePtr));
ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, buildstate->m);
combinedSize = etupSize + ntupSize + sizeof(ItemIdData);
/* Initial size check */
if (etupSize > HNSW_TUPLE_ALLOC_SIZE)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("index tuple too large")));
HnswSetElementTuple(base, etup, element);
/* Keep element and neighbors on the same page if possible */
if (PageGetFreeSpace(*page) < etupSize || (combinedSize <= maxSize && PageGetFreeSpace(*page) < combinedSize))
HnswBuildAppendPage(index, buf, page, forkNum);
/* Calculate offsets */
element->blkno = BufferGetBlockNumber(*buf);
element->offno = OffsetNumberNext(PageGetMaxOffsetNumber(*page));
if (combinedSize <= maxSize)
{
element->neighborPage = element->blkno;
element->neighborOffno = OffsetNumberNext(element->offno);
}
else
{
element->neighborPage = element->blkno + 1;
element->neighborOffno = FirstOffsetNumber;
}
ItemPointerSet(&etup->neighbortid, element->neighborPage, element->neighborOffno);
/* Add element */
if (PageAddItem(*page, (Item) etup, etupSize, InvalidOffsetNumber, false, false) != element->offno)
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
/* Add new page if needed */
if (PageGetFreeSpace(*page) < ntupSize)
HnswBuildAppendPage(index, buf, page, forkNum);
/* Add placeholder for neighbors */
if (PageAddItem(*page, (Item) ntup, ntupSize, InvalidOffsetNumber, false, false) != element->neighborOffno)
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
}
/*
* Create graph pages
*/
@@ -164,61 +227,24 @@ CreateGraphPages(HnswBuildState * buildstate)
while (!HnswPtrIsNull(base, iter))
{
HnswElement element = HnswPtrAccess(base, iter);
Size etupSize;
Size ntupSize;
Size combinedSize;
Pointer valuePtr = HnswPtrAccess(base, element->value);
/* Update iterator */
iter = element->next;
/* Zero memory for each element */
MemSet(etup, 0, HNSW_TUPLE_ALLOC_SIZE);
WriteElement(base, element, etup, ntup, buildstate, maxSize, &buf, &page, index, forkNum);
/* Calculate sizes */
etupSize = HNSW_ELEMENT_TUPLE_SIZE(VARSIZE_ANY(valuePtr));
ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, buildstate->m);
combinedSize = etupSize + ntupSize + sizeof(ItemIdData);
/* Initial size check */
if (etupSize > HNSW_TUPLE_ALLOC_SIZE)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("index tuple too large")));
HnswSetElementTuple(base, etup, element);
/* Keep element and neighbors on the same page if possible */
if (PageGetFreeSpace(page) < etupSize || (combinedSize <= maxSize && PageGetFreeSpace(page) < combinedSize))
HnswBuildAppendPage(index, &buf, &page, forkNum);
/* Calculate offsets */
element->blkno = BufferGetBlockNumber(buf);
element->offno = OffsetNumberNext(PageGetMaxOffsetNumber(page));
if (combinedSize <= maxSize)
for (int lc = element->level; lc >= 0; lc--)
{
element->neighborPage = element->blkno;
element->neighborOffno = OffsetNumberNext(element->offno);
}
else
HnswNeighborArray *neighbors = HnswGetNeighbors(base, element, lc);
for (int i = 0; i < neighbors->length; i++)
{
element->neighborPage = element->blkno + 1;
element->neighborOffno = FirstOffsetNumber;
HnswCandidate *hc = &neighbors->items[i];
HnswElement hce = HnswPtrAccess(base, hc->element);
WriteElement(base, hce, etup, ntup, buildstate, maxSize, &buf, &page, index, forkNum);
}
}
ItemPointerSet(&etup->neighbortid, element->neighborPage, element->neighborOffno);
/* Add element */
if (PageAddItem(page, (Item) etup, etupSize, InvalidOffsetNumber, false, false) != element->offno)
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
/* Add new page if needed */
if (PageGetFreeSpace(page) < ntupSize)
HnswBuildAppendPage(index, &buf, &page, forkNum);
/* Add placeholder for neighbors */
if (PageAddItem(page, (Item) ntup, ntupSize, InvalidOffsetNumber, false, false) != element->neighborOffno)
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
}
insertPage = BufferGetBlockNumber(buf);

View File

@@ -253,6 +253,7 @@ HnswInitElement(char *base, ItemPointer heaptid, int m, double ml, int maxLevel,
element->deleted = 0;
/* Start at one to make it easier to find issues */
element->version = 1;
element->neighborPage = InvalidBlockNumber;
HnswInitNeighbors(base, element, m, allocator);