mirror of
https://github.com/pgvector/pgvector.git
synced 2026-06-06 05:51:21 +08:00
Improved performance of HNSW index scans from disk [skip ci]
This commit is contained in:
@@ -1,3 +1,7 @@
|
|||||||
|
## 0.8.2 (unreleased)
|
||||||
|
|
||||||
|
- Improved performance of HNSW index scans from disk
|
||||||
|
|
||||||
## 0.8.1 (2025-09-04)
|
## 0.8.1 (2025-09-04)
|
||||||
|
|
||||||
- Added support for Postgres 18 rc1
|
- Added support for Postgres 18 rc1
|
||||||
|
|||||||
@@ -1267,6 +1267,7 @@ Thanks to:
|
|||||||
- [k-means++: The Advantage of Careful Seeding](https://theory.stanford.edu/~sergei/papers/kMeansPP-soda.pdf)
|
- [k-means++: The Advantage of Careful Seeding](https://theory.stanford.edu/~sergei/papers/kMeansPP-soda.pdf)
|
||||||
- [Concept Decompositions for Large Sparse Text Data using Clustering](https://www.cs.utexas.edu/users/inderjit/public_papers/concept_mlj.pdf)
|
- [Concept Decompositions for Large Sparse Text Data using Clustering](https://www.cs.utexas.edu/users/inderjit/public_papers/concept_mlj.pdf)
|
||||||
- [Efficient and Robust Approximate Nearest Neighbor Search using Hierarchical Navigable Small World Graphs](https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf)
|
- [Efficient and Robust Approximate Nearest Neighbor Search using Hierarchical Navigable Small World Graphs](https://arxiv.org/ftp/arxiv/papers/1603/1603.09320.pdf)
|
||||||
|
- [Turbocharging Vector Databases using Modern SSDs](https://www.vldb.org/pvldb/volumes/18/paper/Turbocharging%20Vector%20Databases%20using%20Modern%20SSDs)
|
||||||
|
|
||||||
## History
|
## History
|
||||||
|
|
||||||
|
|||||||
122
src/hnswbuild.c
122
src/hnswbuild.c
@@ -131,6 +131,69 @@ HnswBuildAppendPage(Relation index, Buffer *buf, Page *page, ForkNumber forkNum)
|
|||||||
HnswInitPage(*buf, *page);
|
HnswInitPage(*buf, *page);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
WriteElement(char *base, HnswElement element, HnswElementTuple etup, HnswNeighborTuple ntup, HnswBuildState * buildstate, Size maxSize, Buffer *buf, Page *page, Relation index, ForkNumber forkNum)
|
||||||
|
{
|
||||||
|
Size etupSize;
|
||||||
|
Size ntupSize;
|
||||||
|
Size combinedSize;
|
||||||
|
Pointer valuePtr;
|
||||||
|
|
||||||
|
/* Skip if already added */
|
||||||
|
if (BlockNumberIsValid(element->neighborPage))
|
||||||
|
return;
|
||||||
|
|
||||||
|
valuePtr = HnswPtrAccess(base, element->value);
|
||||||
|
|
||||||
|
/* Zero memory for each element */
|
||||||
|
MemSet(etup, 0, HNSW_TUPLE_ALLOC_SIZE);
|
||||||
|
|
||||||
|
/* Calculate sizes */
|
||||||
|
etupSize = HNSW_ELEMENT_TUPLE_SIZE(VARSIZE_ANY(valuePtr));
|
||||||
|
ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, buildstate->m);
|
||||||
|
combinedSize = etupSize + ntupSize + sizeof(ItemIdData);
|
||||||
|
|
||||||
|
/* Initial size check */
|
||||||
|
if (etupSize > HNSW_TUPLE_ALLOC_SIZE)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||||
|
errmsg("index tuple too large")));
|
||||||
|
|
||||||
|
HnswSetElementTuple(base, etup, element);
|
||||||
|
|
||||||
|
/* Keep element and neighbors on the same page if possible */
|
||||||
|
if (PageGetFreeSpace(*page) < etupSize || (combinedSize <= maxSize && PageGetFreeSpace(*page) < combinedSize))
|
||||||
|
HnswBuildAppendPage(index, buf, page, forkNum);
|
||||||
|
|
||||||
|
/* Calculate offsets */
|
||||||
|
element->blkno = BufferGetBlockNumber(*buf);
|
||||||
|
element->offno = OffsetNumberNext(PageGetMaxOffsetNumber(*page));
|
||||||
|
if (combinedSize <= maxSize)
|
||||||
|
{
|
||||||
|
element->neighborPage = element->blkno;
|
||||||
|
element->neighborOffno = OffsetNumberNext(element->offno);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
element->neighborPage = element->blkno + 1;
|
||||||
|
element->neighborOffno = FirstOffsetNumber;
|
||||||
|
}
|
||||||
|
|
||||||
|
ItemPointerSet(&etup->neighbortid, element->neighborPage, element->neighborOffno);
|
||||||
|
|
||||||
|
/* Add element */
|
||||||
|
if (PageAddItem(*page, (Item) etup, etupSize, InvalidOffsetNumber, false, false) != element->offno)
|
||||||
|
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
|
||||||
|
|
||||||
|
/* Add new page if needed */
|
||||||
|
if (PageGetFreeSpace(*page) < ntupSize)
|
||||||
|
HnswBuildAppendPage(index, buf, page, forkNum);
|
||||||
|
|
||||||
|
/* Add placeholder for neighbors */
|
||||||
|
if (PageAddItem(*page, (Item) ntup, ntupSize, InvalidOffsetNumber, false, false) != element->neighborOffno)
|
||||||
|
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create graph pages
|
* Create graph pages
|
||||||
*/
|
*/
|
||||||
@@ -164,61 +227,24 @@ CreateGraphPages(HnswBuildState * buildstate)
|
|||||||
while (!HnswPtrIsNull(base, iter))
|
while (!HnswPtrIsNull(base, iter))
|
||||||
{
|
{
|
||||||
HnswElement element = HnswPtrAccess(base, iter);
|
HnswElement element = HnswPtrAccess(base, iter);
|
||||||
Size etupSize;
|
|
||||||
Size ntupSize;
|
|
||||||
Size combinedSize;
|
|
||||||
Pointer valuePtr = HnswPtrAccess(base, element->value);
|
|
||||||
|
|
||||||
/* Update iterator */
|
/* Update iterator */
|
||||||
iter = element->next;
|
iter = element->next;
|
||||||
|
|
||||||
/* Zero memory for each element */
|
WriteElement(base, element, etup, ntup, buildstate, maxSize, &buf, &page, index, forkNum);
|
||||||
MemSet(etup, 0, HNSW_TUPLE_ALLOC_SIZE);
|
|
||||||
|
|
||||||
/* Calculate sizes */
|
for (int lc = element->level; lc >= 0; lc--)
|
||||||
etupSize = HNSW_ELEMENT_TUPLE_SIZE(VARSIZE_ANY(valuePtr));
|
|
||||||
ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, buildstate->m);
|
|
||||||
combinedSize = etupSize + ntupSize + sizeof(ItemIdData);
|
|
||||||
|
|
||||||
/* Initial size check */
|
|
||||||
if (etupSize > HNSW_TUPLE_ALLOC_SIZE)
|
|
||||||
ereport(ERROR,
|
|
||||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
|
||||||
errmsg("index tuple too large")));
|
|
||||||
|
|
||||||
HnswSetElementTuple(base, etup, element);
|
|
||||||
|
|
||||||
/* Keep element and neighbors on the same page if possible */
|
|
||||||
if (PageGetFreeSpace(page) < etupSize || (combinedSize <= maxSize && PageGetFreeSpace(page) < combinedSize))
|
|
||||||
HnswBuildAppendPage(index, &buf, &page, forkNum);
|
|
||||||
|
|
||||||
/* Calculate offsets */
|
|
||||||
element->blkno = BufferGetBlockNumber(buf);
|
|
||||||
element->offno = OffsetNumberNext(PageGetMaxOffsetNumber(page));
|
|
||||||
if (combinedSize <= maxSize)
|
|
||||||
{
|
{
|
||||||
element->neighborPage = element->blkno;
|
HnswNeighborArray *neighbors = HnswGetNeighbors(base, element, lc);
|
||||||
element->neighborOffno = OffsetNumberNext(element->offno);
|
|
||||||
|
for (int i = 0; i < neighbors->length; i++)
|
||||||
|
{
|
||||||
|
HnswCandidate *hc = &neighbors->items[i];
|
||||||
|
HnswElement hce = HnswPtrAccess(base, hc->element);
|
||||||
|
|
||||||
|
WriteElement(base, hce, etup, ntup, buildstate, maxSize, &buf, &page, index, forkNum);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
|
||||||
{
|
|
||||||
element->neighborPage = element->blkno + 1;
|
|
||||||
element->neighborOffno = FirstOffsetNumber;
|
|
||||||
}
|
|
||||||
|
|
||||||
ItemPointerSet(&etup->neighbortid, element->neighborPage, element->neighborOffno);
|
|
||||||
|
|
||||||
/* Add element */
|
|
||||||
if (PageAddItem(page, (Item) etup, etupSize, InvalidOffsetNumber, false, false) != element->offno)
|
|
||||||
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
|
|
||||||
|
|
||||||
/* Add new page if needed */
|
|
||||||
if (PageGetFreeSpace(page) < ntupSize)
|
|
||||||
HnswBuildAppendPage(index, &buf, &page, forkNum);
|
|
||||||
|
|
||||||
/* Add placeholder for neighbors */
|
|
||||||
if (PageAddItem(page, (Item) ntup, ntupSize, InvalidOffsetNumber, false, false) != element->neighborOffno)
|
|
||||||
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
insertPage = BufferGetBlockNumber(buf);
|
insertPage = BufferGetBlockNumber(buf);
|
||||||
|
|||||||
@@ -253,6 +253,7 @@ HnswInitElement(char *base, ItemPointer heaptid, int m, double ml, int maxLevel,
|
|||||||
element->deleted = 0;
|
element->deleted = 0;
|
||||||
/* Start at one to make it easier to find issues */
|
/* Start at one to make it easier to find issues */
|
||||||
element->version = 1;
|
element->version = 1;
|
||||||
|
element->neighborPage = InvalidBlockNumber;
|
||||||
|
|
||||||
HnswInitNeighbors(base, element, m, allocator);
|
HnswInitNeighbors(base, element, m, allocator);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user