mirror of
https://github.com/pgvector/pgvector.git
synced 2026-06-06 05:51:21 +08:00
Improved performance of HNSW index scans from disk [skip ci]
This commit is contained in:
@@ -1,3 +1,7 @@
|
||||
## 0.8.2 (unreleased)
|
||||
|
||||
- Improved performance of HNSW index scans from disk
|
||||
|
||||
## 0.8.1 (2025-09-04)
|
||||
|
||||
- Added support for Postgres 18 rc1
|
||||
|
||||
@@ -1267,6 +1267,7 @@ Thanks to:
|
||||
- [k-means++: The Advantage of Careful Seeding](https://theory.stanford.edu/~sergei/papers/kMeansPP-soda.pdf)
|
||||
- [Concept Decompositions for Large Sparse Text Data using Clustering](https://www.cs.utexas.edu/users/inderjit/public_papers/concept_mlj.pdf)
|
||||
- [Efficient and Robust Approximate Nearest Neighbor Search using Hierarchical Navigable Small World Graphs](https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf)
|
||||
- [Turbocharging Vector Databases using Modern SSDs](https://www.vldb.org/pvldb/volumes/18/paper/Turbocharging%20Vector%20Databases%20using%20Modern%20SSDs)
|
||||
|
||||
## History
|
||||
|
||||
|
||||
120
src/hnswbuild.c
120
src/hnswbuild.c
@@ -131,6 +131,69 @@ HnswBuildAppendPage(Relation index, Buffer *buf, Page *page, ForkNumber forkNum)
|
||||
HnswInitPage(*buf, *page);
|
||||
}
|
||||
|
||||
static void
|
||||
WriteElement(char *base, HnswElement element, HnswElementTuple etup, HnswNeighborTuple ntup, HnswBuildState * buildstate, Size maxSize, Buffer *buf, Page *page, Relation index, ForkNumber forkNum)
|
||||
{
|
||||
Size etupSize;
|
||||
Size ntupSize;
|
||||
Size combinedSize;
|
||||
Pointer valuePtr;
|
||||
|
||||
/* Skip if already added */
|
||||
if (BlockNumberIsValid(element->neighborPage))
|
||||
return;
|
||||
|
||||
valuePtr = HnswPtrAccess(base, element->value);
|
||||
|
||||
/* Zero memory for each element */
|
||||
MemSet(etup, 0, HNSW_TUPLE_ALLOC_SIZE);
|
||||
|
||||
/* Calculate sizes */
|
||||
etupSize = HNSW_ELEMENT_TUPLE_SIZE(VARSIZE_ANY(valuePtr));
|
||||
ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, buildstate->m);
|
||||
combinedSize = etupSize + ntupSize + sizeof(ItemIdData);
|
||||
|
||||
/* Initial size check */
|
||||
if (etupSize > HNSW_TUPLE_ALLOC_SIZE)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||
errmsg("index tuple too large")));
|
||||
|
||||
HnswSetElementTuple(base, etup, element);
|
||||
|
||||
/* Keep element and neighbors on the same page if possible */
|
||||
if (PageGetFreeSpace(*page) < etupSize || (combinedSize <= maxSize && PageGetFreeSpace(*page) < combinedSize))
|
||||
HnswBuildAppendPage(index, buf, page, forkNum);
|
||||
|
||||
/* Calculate offsets */
|
||||
element->blkno = BufferGetBlockNumber(*buf);
|
||||
element->offno = OffsetNumberNext(PageGetMaxOffsetNumber(*page));
|
||||
if (combinedSize <= maxSize)
|
||||
{
|
||||
element->neighborPage = element->blkno;
|
||||
element->neighborOffno = OffsetNumberNext(element->offno);
|
||||
}
|
||||
else
|
||||
{
|
||||
element->neighborPage = element->blkno + 1;
|
||||
element->neighborOffno = FirstOffsetNumber;
|
||||
}
|
||||
|
||||
ItemPointerSet(&etup->neighbortid, element->neighborPage, element->neighborOffno);
|
||||
|
||||
/* Add element */
|
||||
if (PageAddItem(*page, (Item) etup, etupSize, InvalidOffsetNumber, false, false) != element->offno)
|
||||
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
|
||||
|
||||
/* Add new page if needed */
|
||||
if (PageGetFreeSpace(*page) < ntupSize)
|
||||
HnswBuildAppendPage(index, buf, page, forkNum);
|
||||
|
||||
/* Add placeholder for neighbors */
|
||||
if (PageAddItem(*page, (Item) ntup, ntupSize, InvalidOffsetNumber, false, false) != element->neighborOffno)
|
||||
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
|
||||
}
|
||||
|
||||
/*
|
||||
* Create graph pages
|
||||
*/
|
||||
@@ -164,61 +227,24 @@ CreateGraphPages(HnswBuildState * buildstate)
|
||||
while (!HnswPtrIsNull(base, iter))
|
||||
{
|
||||
HnswElement element = HnswPtrAccess(base, iter);
|
||||
Size etupSize;
|
||||
Size ntupSize;
|
||||
Size combinedSize;
|
||||
Pointer valuePtr = HnswPtrAccess(base, element->value);
|
||||
|
||||
/* Update iterator */
|
||||
iter = element->next;
|
||||
|
||||
/* Zero memory for each element */
|
||||
MemSet(etup, 0, HNSW_TUPLE_ALLOC_SIZE);
|
||||
WriteElement(base, element, etup, ntup, buildstate, maxSize, &buf, &page, index, forkNum);
|
||||
|
||||
/* Calculate sizes */
|
||||
etupSize = HNSW_ELEMENT_TUPLE_SIZE(VARSIZE_ANY(valuePtr));
|
||||
ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, buildstate->m);
|
||||
combinedSize = etupSize + ntupSize + sizeof(ItemIdData);
|
||||
|
||||
/* Initial size check */
|
||||
if (etupSize > HNSW_TUPLE_ALLOC_SIZE)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||
errmsg("index tuple too large")));
|
||||
|
||||
HnswSetElementTuple(base, etup, element);
|
||||
|
||||
/* Keep element and neighbors on the same page if possible */
|
||||
if (PageGetFreeSpace(page) < etupSize || (combinedSize <= maxSize && PageGetFreeSpace(page) < combinedSize))
|
||||
HnswBuildAppendPage(index, &buf, &page, forkNum);
|
||||
|
||||
/* Calculate offsets */
|
||||
element->blkno = BufferGetBlockNumber(buf);
|
||||
element->offno = OffsetNumberNext(PageGetMaxOffsetNumber(page));
|
||||
if (combinedSize <= maxSize)
|
||||
for (int lc = element->level; lc >= 0; lc--)
|
||||
{
|
||||
element->neighborPage = element->blkno;
|
||||
element->neighborOffno = OffsetNumberNext(element->offno);
|
||||
}
|
||||
else
|
||||
HnswNeighborArray *neighbors = HnswGetNeighbors(base, element, lc);
|
||||
|
||||
for (int i = 0; i < neighbors->length; i++)
|
||||
{
|
||||
element->neighborPage = element->blkno + 1;
|
||||
element->neighborOffno = FirstOffsetNumber;
|
||||
HnswCandidate *hc = &neighbors->items[i];
|
||||
HnswElement hce = HnswPtrAccess(base, hc->element);
|
||||
|
||||
WriteElement(base, hce, etup, ntup, buildstate, maxSize, &buf, &page, index, forkNum);
|
||||
}
|
||||
}
|
||||
|
||||
ItemPointerSet(&etup->neighbortid, element->neighborPage, element->neighborOffno);
|
||||
|
||||
/* Add element */
|
||||
if (PageAddItem(page, (Item) etup, etupSize, InvalidOffsetNumber, false, false) != element->offno)
|
||||
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
|
||||
|
||||
/* Add new page if needed */
|
||||
if (PageGetFreeSpace(page) < ntupSize)
|
||||
HnswBuildAppendPage(index, &buf, &page, forkNum);
|
||||
|
||||
/* Add placeholder for neighbors */
|
||||
if (PageAddItem(page, (Item) ntup, ntupSize, InvalidOffsetNumber, false, false) != element->neighborOffno)
|
||||
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
|
||||
}
|
||||
|
||||
insertPage = BufferGetBlockNumber(buf);
|
||||
|
||||
@@ -253,6 +253,7 @@ HnswInitElement(char *base, ItemPointer heaptid, int m, double ml, int maxLevel,
|
||||
element->deleted = 0;
|
||||
/* Start at one to make it easier to find issues */
|
||||
element->version = 1;
|
||||
element->neighborPage = InvalidBlockNumber;
|
||||
|
||||
HnswInitNeighbors(base, element, m, allocator);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user