Reduced index size

This commit is contained in:
Andrew Kane
2023-08-01 19:17:39 -07:00
parent 95eded091f
commit d3e08fdf99
8 changed files with 430 additions and 233 deletions

View File

@@ -38,13 +38,18 @@
#define HNSW_MIN_EF_SEARCH 10
#define HNSW_MAX_EF_SEARCH 1000
#define HNSW_ELEMENT_TUPLE_TYPE 1
#define HNSW_NEIGHBOR_TUPLE_TYPE 2
#define HNSW_HEAPTIDS 10
/* Build phases */
/* PROGRESS_CREATEIDX_SUBPHASE_INITIALIZE is 1 */
#define PROGRESS_HNSW_PHASE_LOAD 2
#define HNSW_ELEMENT_TUPLE_SIZE(_dim) (offsetof(HnswElementTupleData, vec) + VECTOR_SIZE(_dim))
#define HNSW_ELEMENT_TUPLE_SIZE(_dim) MAXALIGN(offsetof(HnswElementTupleData, vec) + VECTOR_SIZE(_dim))
#define HNSW_NEIGHBOR_TUPLE_SIZE(level, m) MAXALIGN(offsetof(HnswNeighborTupleData, neighbors) + ((level) + 2) * (m) * sizeof(HnswNeighborTupleItem))
#define HNSW_NEIGHBOR_COUNT(itemid) ((ItemIdGetLength(itemid) - offsetof(HnswNeighborTupleData, neighbors)) / sizeof(HnswNeighborTupleItem))
#define HnswPageGetOpaque(page) ((HnswPageOpaque) PageGetSpecialPointer(page))
#define HnswPageGetMeta(page) ((HnswMetaPageData *) PageGetContents(page))
@@ -60,6 +65,9 @@
#define list_sort(list, cmp) list_qsort(list, cmp)
#endif
#define HnswIsElementTuple(tup) ((tup)->type == HNSW_ELEMENT_TUPLE_TYPE)
#define HnswIsNeighborTuple(tup) ((tup)->type == HNSW_NEIGHBOR_TUPLE_TYPE)
#define GetLayerM(m, layer) (layer == 0 ? m * 2 : m)
#define HnswGetMl(m) (1 / log(m))
@@ -77,6 +85,7 @@ typedef struct HnswElementData
BlockNumber blkno;
OffsetNumber offno;
BlockNumber neighborPage;
OffsetNumber neighborOffno;
Vector *vec;
} HnswElementData;
@@ -175,21 +184,32 @@ typedef HnswPageOpaqueData * HnswPageOpaque;
typedef struct HnswElementTupleData
{
ItemPointerData heaptids[HNSW_HEAPTIDS];
uint8 type;
uint8 level;
uint8 deleted;
uint16 unused;
BlockNumber neighborPage;
uint8 unused;
ItemPointerData heaptids[HNSW_HEAPTIDS];
ItemPointerData neighbortid;
uint16 unused2;
Vector vec;
} HnswElementTupleData;
typedef HnswElementTupleData * HnswElementTuple;
typedef struct HnswNeighborTupleData
typedef struct HnswNeighborTupleItem
{
ItemPointerData indextid;
uint16 unused;
float distance;
} HnswNeighborTupleItem;
typedef struct HnswNeighborTupleData
{
uint8 type;
uint8 unused;
uint16 unused2;
uint32 unused3;
HnswNeighborTupleItem neighbors[FLEXIBLE_ARRAY_MEMBER];
} HnswNeighborTupleData;
typedef HnswNeighborTupleData * HnswNeighborTuple;
@@ -236,14 +256,14 @@ Buffer HnswNewBuffer(Relation index, ForkNumber forkNum);
void HnswInitPage(Buffer buf, Page page);
void HnswInitRegisterPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state);
void HnswInit(void);
List *SearchLayer(Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, bool inserting, BlockNumber *skipPage);
List *SearchLayer(Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, bool inserting, BlockNumber *skipPage, OffsetNumber *skipOffno);
HnswElement GetEntryPoint(Relation index);
HnswElement HnswInitElement(ItemPointer tid, int m, double ml, int maxLevel);
void HnswFreeElement(HnswElement element);
HnswElement HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, FmgrInfo *procinfo, Oid collation, int m, int efConstruction, List **updates, bool vacuuming);
HnswCandidate *EntryCandidate(HnswElement em, Datum q, Relation rel, FmgrInfo *procinfo, Oid collation, bool loadvec);
void UpdateMetaPage(Relation index, bool updateEntry, HnswElement entryPoint, BlockNumber insertPage, ForkNumber forkNum);
void AddNeighborsToPage(Relation index, Page page, HnswElement e, HnswNeighborTuple neighbor, Size neighborsz, int m);
void HnswSetNeighborTuple(HnswNeighborTuple ntup, HnswElement e, int m);
void HnswAddHeapTid(HnswElement element, ItemPointer heaptid);
void HnswInitNeighbors(HnswElement element, int m);
bool HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid, Relation heapRel);
@@ -267,6 +287,6 @@ bool hnswgettuple(IndexScanDesc scan, ScanDirection dir);
void hnswendscan(IndexScanDesc scan);
/* Ensure fits in uint8 */
#define HnswGetMaxLevel(m) Min(((BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(HnswPageOpaqueData))) / (MAXALIGN(sizeof(HnswNeighborTupleData)) + sizeof(ItemIdData)) / m) - 2, 255)
#define HnswGetMaxLevel(m) Min(((BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(HnswPageOpaqueData)) - offsetof(HnswNeighborTupleData, neighbors) - sizeof(ItemIdData)) / (sizeof(HnswNeighborTupleItem)) / m) - 2, 255)
#endif

View File

@@ -67,6 +67,30 @@ CreateMetaPage(HnswBuildState * buildstate)
HnswCommitBuffer(buf, state);
}
/*
* Add a new page
*/
static void
HnswBuildAppendPage(Relation index, Buffer *buf, Page *page, GenericXLogState **state, ForkNumber forkNum)
{
/* Add a new page */
Buffer newbuf = HnswNewBuffer(index, forkNum);
/* Update previous page */
HnswPageGetOpaque(*page)->nextblkno = BufferGetBlockNumber(newbuf);
/* Commit */
MarkBufferDirty(*buf);
GenericXLogFinish(*state);
UnlockReleaseBuffer(*buf);
/* Prepare new page */
*buf = newbuf;
*state = GenericXLogStart(index);
*page = GenericXLogRegisterBuffer(*state, *buf, GENERIC_XLOG_FULL_IMAGE);
HnswInitPage(*buf, *page);
}
/*
* Create element pages
*/
@@ -76,23 +100,23 @@ CreateElementPages(HnswBuildState * buildstate)
Relation index = buildstate->index;
ForkNumber forkNum = buildstate->forkNum;
int dimensions = buildstate->dimensions;
Size elementsz;
HnswElementTuple element;
int elementsPerPage;
BlockNumber neighborPage;
Size etupSize;
Size maxSize;
HnswElementTuple etup;
HnswNeighborTuple ntup;
BlockNumber insertPage;
Buffer buf;
Page page;
GenericXLogState *state;
ListCell *lc;
/* Allocate once */
elementsz = MAXALIGN(HNSW_ELEMENT_TUPLE_SIZE(dimensions));
element = palloc0(elementsz);
/* Calculate sizes */
maxSize = BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(HnswPageOpaqueData));
etupSize = HNSW_ELEMENT_TUPLE_SIZE(dimensions);
/* Calculate starting neighbor page */
elementsPerPage = (BLCKSZ - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(HnswPageOpaqueData))) / (elementsz + sizeof(ItemIdData));
neighborPage = HNSW_HEAD_BLKNO + (int) ceil(list_length(buildstate->elements) / (double) elementsPerPage);
/* Allocate once */
etup = palloc0(etupSize);
ntup = palloc0(maxSize);
/* Prepare first page */
buf = HnswNewBuffer(index, forkNum);
@@ -102,45 +126,46 @@ CreateElementPages(HnswBuildState * buildstate)
foreach(lc, buildstate->elements)
{
HnswElement e = lfirst(lc);
HnswElement element = lfirst(lc);
Size ntupSize;
Size combinedSize;
/* Calculate neighbor page */
/* Will be rechecked later */
e->neighborPage = neighborPage++;
HnswSetElementTuple(etup, element);
/* Set item data */
HnswSetElementTuple(element, e);
element->neighborPage = e->neighborPage;
/* Calculate sizes */
ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, buildstate->m);
combinedSize = etupSize + ntupSize + sizeof(ItemIdData);
/* Ensure free space */
if (PageGetFreeSpace(page) < elementsz)
/* Keep element and neighbors on the same page if possible */
if (PageGetFreeSpace(page) < etupSize || (combinedSize <= maxSize && PageGetFreeSpace(page) < combinedSize))
HnswBuildAppendPage(index, &buf, &page, &state, forkNum);
/* Calculate offsets */
element->blkno = BufferGetBlockNumber(buf);
element->offno = OffsetNumberNext(PageGetMaxOffsetNumber(page));
if (combinedSize <= maxSize)
{
/* Add a new page */
Buffer newbuf = HnswNewBuffer(index, forkNum);
/* Update previous page */
HnswPageGetOpaque(page)->nextblkno = BufferGetBlockNumber(newbuf);
/* Commit */
MarkBufferDirty(buf);
GenericXLogFinish(state);
UnlockReleaseBuffer(buf);
/* Can take a while, so ensure we can interrupt */
/* Needs to be called when no buffer locks are held */
CHECK_FOR_INTERRUPTS();
/* Prepare new page */
buf = newbuf;
state = GenericXLogStart(index);
page = GenericXLogRegisterBuffer(state, buf, GENERIC_XLOG_FULL_IMAGE);
HnswInitPage(buf, page);
element->neighborPage = element->blkno;
element->neighborOffno = OffsetNumberNext(element->offno);
}
else
{
element->neighborPage = element->blkno + 1;
element->neighborOffno = FirstOffsetNumber;
}
/* Add the item */
e->blkno = BufferGetBlockNumber(buf);
e->offno = PageAddItem(page, (Item) element, elementsz, InvalidOffsetNumber, false, false);
if (e->offno == InvalidOffsetNumber)
ItemPointerSet(&etup->neighbortid, element->neighborPage, element->neighborOffno);
/* Add element */
if (PageAddItem(page, (Item) etup, etupSize, InvalidOffsetNumber, false, false) != element->offno)
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
/* Add new page if needed */
if (PageGetFreeSpace(page) < ntupSize)
HnswBuildAppendPage(index, &buf, &page, &state, forkNum);
/* Add placeholder for neighbors */
if (PageAddItem(page, (Item) ntup, ntupSize, InvalidOffsetNumber, false, false) != element->neighborOffno)
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
}
@@ -162,13 +187,12 @@ CreateNeighborPages(HnswBuildState * buildstate)
{
Relation index = buildstate->index;
ForkNumber forkNum = buildstate->forkNum;
Size neighborsz;
HnswNeighborTuple neighbor;
int m = buildstate->m;
ListCell *lc;
HnswNeighborTuple ntup;
/* Allocate once */
neighborsz = MAXALIGN(sizeof(HnswNeighborTupleData));
neighbor = palloc0(neighborsz);
ntup = palloc0(BLCKSZ);
foreach(lc, buildstate->elements)
{
@@ -176,23 +200,17 @@ CreateNeighborPages(HnswBuildState * buildstate)
Buffer buf;
Page page;
GenericXLogState *state;
Size neighborsz = HNSW_NEIGHBOR_TUPLE_SIZE(e->level, m);
/* Can take a while, so ensure we can interrupt */
/* Needs to be called when no buffer locks are held */
CHECK_FOR_INTERRUPTS();
buf = HnswNewBuffer(index, forkNum);
/* Check block number */
if (BufferGetBlockNumber(buf) != e->neighborPage)
elog(ERROR, "expected neighbor page %d, got %d", e->neighborPage, BufferGetBlockNumber(buf));
/* Prepare page */
buf = ReadBufferExtended(index, forkNum, e->neighborPage, RBM_NORMAL, NULL);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
state = GenericXLogStart(index);
page = GenericXLogRegisterBuffer(state, buf, GENERIC_XLOG_FULL_IMAGE);
HnswInitPage(buf, page);
page = GenericXLogRegisterBuffer(state, buf, 0);
AddNeighborsToPage(index, page, e, neighbor, neighborsz, buildstate->m);
HnswSetNeighborTuple(ntup, e, m);
if (!PageIndexTupleOverwrite(page, e->neighborOffno, (Item) ntup, neighborsz))
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
/* Commit */
MarkBufferDirty(buf);

View File

@@ -34,7 +34,7 @@ GetInsertPage(Relation index)
* Check for a free offset
*/
static bool
HnswFreeOffset(Page page, OffsetNumber *freeOffno, BlockNumber *neighborPage)
HnswFreeOffset(Relation index, Buffer buf, Page page, HnswElement element, Size neighborsz, Buffer *nbuf, Page *npage, OffsetNumber *freeOffno, OffsetNumber *freeNeighborOffno)
{
OffsetNumber offno;
OffsetNumber maxoffno = PageGetMaxOffsetNumber(page);
@@ -43,17 +43,67 @@ HnswFreeOffset(Page page, OffsetNumber *freeOffno, BlockNumber *neighborPage)
{
HnswElementTuple item = (HnswElementTuple) PageGetItem(page, PageGetItemId(page, offno));
if (item->deleted)
/* Skip neighbor tuples */
if (!HnswIsElementTuple(item))
continue;
/* TODO Remove level check */
if (item->deleted && item->level == element->level)
{
*freeOffno = offno;
*neighborPage = item->neighborPage;
return true;
BlockNumber neighborPage = ItemPointerGetBlockNumber(&item->neighbortid);
OffsetNumber neighborOffno = ItemPointerGetOffsetNumber(&item->neighbortid);
ItemId itemid;
if (neighborPage == BufferGetBlockNumber(buf))
{
*nbuf = buf;
*npage = page;
}
else
{
*nbuf = ReadBuffer(index, neighborPage);
LockBuffer(*nbuf, BUFFER_LOCK_EXCLUSIVE);
/* Skip WAL for now */
*npage = BufferGetPage(*nbuf);
}
itemid = PageGetItemId(*npage, neighborOffno);
/* Check for space on neighbor tuple page */
if (PageGetFreeSpace(*npage) + ItemIdGetLength(itemid) - sizeof(ItemIdData) >= neighborsz)
{
*freeOffno = offno;
*freeNeighborOffno = neighborOffno;
return true;
}
else if (*nbuf != buf)
UnlockReleaseBuffer(*nbuf);
}
}
return false;
}
/*
* Add a new page
*/
static void
HnswInsertAppendPage(Relation index, Buffer *nbuf, Page *npage, GenericXLogState *state, Page page)
{
/* Add a new page */
LockRelationForExtension(index, ExclusiveLock);
*nbuf = HnswNewBuffer(index, MAIN_FORKNUM);
UnlockRelationForExtension(index, ExclusiveLock);
/* Init new page */
*npage = GenericXLogRegisterBuffer(state, *nbuf, GENERIC_XLOG_FULL_IMAGE);
HnswInitPage(*nbuf, *npage);
/* Update previous buffer */
HnswPageGetOpaque(page)->nextblkno = BufferGetBlockNumber(*nbuf);
}
/*
* Add to element and neighbor pages
*/
@@ -63,25 +113,32 @@ WriteNewElementPages(Relation index, HnswElement e, int m)
Buffer buf;
Page page;
GenericXLogState *state;
Size esize;
Size etupSize;
Size ntupSize;
Size combinedSize;
HnswElementTuple etup;
BlockNumber insertPage = GetInsertPage(index);
BlockNumber originalInsertPage = insertPage;
int dimensions = e->vec->dim;
Size nsize = MAXALIGN(sizeof(HnswNeighborTupleData));
HnswNeighborTuple ntup = palloc0(nsize);
HnswNeighborTuple ntup;
Buffer nbuf;
Page npage;
OffsetNumber freeOffno = InvalidOffsetNumber;
BlockNumber neighborPage = InvalidBlockNumber;
OffsetNumber freeNeighborOffno = InvalidOffsetNumber;
/* Get tuple size */
esize = MAXALIGN(HNSW_ELEMENT_TUPLE_SIZE(dimensions));
/* Calculate sizes */
etupSize = HNSW_ELEMENT_TUPLE_SIZE(dimensions);
ntupSize = HNSW_NEIGHBOR_TUPLE_SIZE(e->level, m);
combinedSize = etupSize + ntupSize + sizeof(ItemIdData);
/* Prepare tuple */
etup = palloc0(esize);
/* Prepare element tuple */
etup = palloc0(etupSize);
HnswSetElementTuple(etup, e);
/* Prepare neighbor tuple */
ntup = palloc0(ntupSize);
HnswSetNeighborTuple(ntup, e, m);
/* Find a page to insert the item */
for (;;)
{
@@ -91,8 +148,29 @@ WriteNewElementPages(Relation index, HnswElement e, int m)
state = GenericXLogStart(index);
page = GenericXLogRegisterBuffer(state, buf, 0);
if (HnswFreeOffset(page, &freeOffno, &neighborPage) || PageGetFreeSpace(page) >= esize)
/* Space for both */
if (PageGetFreeSpace(page) >= combinedSize)
{
nbuf = buf;
npage = page;
break;
}
/* Space for element but not neighbors and last page */
if (PageGetFreeSpace(page) >= etupSize && !BlockNumberIsValid(HnswPageGetOpaque(page)->nextblkno))
{
HnswInsertAppendPage(index, &nbuf, &npage, state, page);
break;
}
/* Space from deleted item */
if (HnswFreeOffset(index, buf, page, e, ntupSize, &nbuf, &npage, &freeOffno, &freeNeighborOffno))
{
if (nbuf != buf)
npage = GenericXLogRegisterBuffer(state, nbuf, 0);
break;
}
insertPage = HnswPageGetOpaque(page)->nextblkno;
@@ -107,28 +185,7 @@ WriteNewElementPages(Relation index, HnswElement e, int m)
Buffer newbuf;
Page newpage;
/*
* From ReadBufferExtended: Caller is responsible for ensuring
* that only one backend tries to extend a relation at the same
* time!
*/
LockRelationForExtension(index, ExclusiveLock);
/* Add a new page */
newbuf = HnswNewBuffer(index, MAIN_FORKNUM);
/* Unlock extend relation lock as early as possible */
UnlockRelationForExtension(index, ExclusiveLock);
/* Init new page */
newpage = GenericXLogRegisterBuffer(state, newbuf, GENERIC_XLOG_FULL_IMAGE);
HnswInitPage(newbuf, newpage);
/* Update insert page */
insertPage = BufferGetBlockNumber(newbuf);
/* Update previous buffer */
HnswPageGetOpaque(page)->nextblkno = insertPage;
HnswInsertAppendPage(index, &newbuf, &newpage, state, page);
/* Commit */
MarkBufferDirty(newbuf);
@@ -142,58 +199,67 @@ WriteNewElementPages(Relation index, HnswElement e, int m)
state = GenericXLogStart(index);
buf = newbuf;
page = GenericXLogRegisterBuffer(state, buf, 0);
/* Create new page for neighbors if needed */
if (PageGetFreeSpace(page) < combinedSize)
HnswInsertAppendPage(index, &nbuf, &npage, state, page);
else
{
nbuf = buf;
npage = page;
}
break;
}
}
if (OffsetNumberIsValid(freeOffno))
{
/* Reuse existing page */
nbuf = ReadBuffer(index, neighborPage);
LockBuffer(nbuf, BUFFER_LOCK_EXCLUSIVE);
}
else
{
/* Add new page */
LockRelationForExtension(index, ExclusiveLock);
nbuf = HnswNewBuffer(index, MAIN_FORKNUM);
UnlockRelationForExtension(index, ExclusiveLock);
}
npage = GenericXLogRegisterBuffer(state, nbuf, GENERIC_XLOG_FULL_IMAGE);
/* Overwrites existing page via InitPage */
HnswInitPage(nbuf, npage);
/* Update neighbors */
AddNeighborsToPage(index, npage, e, ntup, nsize, m);
e->blkno = BufferGetBlockNumber(buf);
e->neighborPage = BufferGetBlockNumber(nbuf);
/* Set neighbor page for element */
etup->neighborPage = e->neighborPage;
insertPage = e->neighborPage;
/* Add to next offset */
if (OffsetNumberIsValid(freeOffno))
{
e->offno = freeOffno;
if (!PageIndexTupleOverwrite(page, freeOffno, (Item) etup, esize))
e->neighborOffno = freeNeighborOffno;
}
else
{
e->offno = OffsetNumberNext(PageGetMaxOffsetNumber(page));
if (nbuf == buf)
e->neighborOffno = OffsetNumberNext(e->offno);
else
e->neighborOffno = FirstOffsetNumber;
}
ItemPointerSet(&etup->neighbortid, e->neighborPage, e->neighborOffno);
/* Add element and neighbors */
if (OffsetNumberIsValid(freeOffno))
{
if (!PageIndexTupleOverwrite(page, e->offno, (Item) etup, etupSize))
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
if (!PageIndexTupleOverwrite(npage, e->neighborOffno, (Item) ntup, ntupSize))
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
}
else
{
e->offno = PageAddItem(page, (Item) etup, esize, InvalidOffsetNumber, false, false);
if (e->offno == InvalidOffsetNumber)
if (PageAddItem(page, (Item) etup, etupSize, InvalidOffsetNumber, false, false) != e->offno)
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
if (PageAddItem(npage, (Item) ntup, ntupSize, InvalidOffsetNumber, false, false) != e->neighborOffno)
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
}
/* Commit */
MarkBufferDirty(buf);
MarkBufferDirty(nbuf);
if (nbuf != buf)
MarkBufferDirty(nbuf);
GenericXLogFinish(state);
UnlockReleaseBuffer(buf);
UnlockReleaseBuffer(nbuf);
if (nbuf != buf)
UnlockReleaseBuffer(nbuf);
/* Update the insert page */
if (insertPage != originalInsertPage)
@@ -201,12 +267,12 @@ WriteNewElementPages(Relation index, HnswElement e, int m)
}
/*
* Calculate offset number for update
* Calculate index for update
*/
static OffsetNumber
HnswGetOffsetNumber(HnswUpdate * update, int m)
static int
HnswGetIndex(HnswUpdate * update, int m)
{
return FirstOffsetNumber + (update->hc.element->level - update->level) * m + update->index;
return (update->hc.element->level - update->level) * m + update->index;
}
/*
@@ -215,36 +281,45 @@ HnswGetOffsetNumber(HnswUpdate * update, int m)
static void
UpdateNeighborPages(Relation index, HnswElement e, int m, List *updates)
{
Buffer buf;
Page page;
GenericXLogState *state;
ListCell *lc;
OffsetNumber offno;
Size neighborsz = MAXALIGN(sizeof(HnswNeighborTupleData));
HnswNeighborTuple neighbor = palloc0(neighborsz);
/* Could update multiple at once for same element */
/* but should only happen a low percent of time, so keep simple for now */
foreach(lc, updates)
{
Buffer buf;
Page page;
GenericXLogState *state;
HnswUpdate *update = lfirst(lc);
ItemId itemid;
Size neighborsz;
int idx;
OffsetNumber offno = update->hc.element->neighborOffno;
/* Register page */
buf = ReadBuffer(index, update->hc.element->neighborPage);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
state = GenericXLogStart(index);
page = GenericXLogRegisterBuffer(state, buf, 0);
offno = HnswGetOffsetNumber(update, m);
itemid = PageGetItemId(page, offno);
neighborsz = ItemIdGetLength(itemid);
idx = HnswGetIndex(update, m);
/* Make robust against issues */
if (offno <= PageGetMaxOffsetNumber(page))
if (idx < HNSW_NEIGHBOR_COUNT(itemid))
{
HnswNeighborTuple ntup = (HnswNeighborTuple) PageGetItem(page, itemid);
HnswNeighborTupleItem *neighbor = &ntup->neighbors[idx];
/* Set item data */
ItemPointerSet(&neighbor->indextid, e->blkno, e->offno);
neighbor->distance = update->hc.distance;
/* Update connections */
if (!PageIndexTupleOverwrite(page, offno, (Item) neighbor, neighborsz))
if (!PageIndexTupleOverwrite(page, offno, (Item) ntup, neighborsz))
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
/* Commit */
@@ -267,7 +342,7 @@ HnswAddDuplicate(Relation index, HnswElement element, HnswElement dup)
Buffer buf;
Page page;
GenericXLogState *state;
Size esize = MAXALIGN(HNSW_ELEMENT_TUPLE_SIZE(dup->vec->dim));
Size esize = HNSW_ELEMENT_TUPLE_SIZE(dup->vec->dim);
HnswElementTuple etup;
int i;

View File

@@ -27,12 +27,12 @@ GetScanItems(IndexScanDesc scan, Datum q)
for (int lc = entryPoint->level; lc >= 1; lc--)
{
w = SearchLayer(q, ep, 1, lc, index, procinfo, collation, false, NULL);
w = SearchLayer(q, ep, 1, lc, index, procinfo, collation, false, NULL, NULL);
ep = w;
}
/* TODO Return all visited elements at level 0, not just ef search */
so->w = SearchLayer(q, ep, hnsw_ef_search, 0, index, procinfo, collation, false, NULL);
so->w = SearchLayer(q, ep, hnsw_ef_search, 0, index, procinfo, collation, false, NULL, NULL);
}
/*

View File

@@ -212,6 +212,8 @@ HnswGetDistance(HnswElement a, HnswElement b, int lc, FmgrInfo *procinfo, Oid co
/* Look for cached distance */
if (a->neighbors != NULL)
{
Assert(a->level >= lc);
for (int i = 0; i < a->neighbors[lc].length; i++)
{
if (a->neighbors[lc].items[i].element == b)
@@ -221,6 +223,8 @@ HnswGetDistance(HnswElement a, HnswElement b, int lc, FmgrInfo *procinfo, Oid co
if (b->neighbors != NULL)
{
Assert(b->level >= lc);
for (int i = 0; i < b->neighbors[lc].length; i++)
{
if (b->neighbors[lc].items[i].element == a)
@@ -359,6 +363,55 @@ HnswAddHeapTid(HnswElement element, ItemPointer heaptid)
element->heaptids = lappend(element->heaptids, copy);
}
/*
* Load neighbors from page
*/
static void
LoadNeighborsFromPage(HnswElement element, Relation index, Page page)
{
int m = HnswGetM(index);
ItemId itemid = PageGetItemId(page, element->neighborOffno);
int neighborCount = (element->level + 2) * m;
HnswInitNeighbors(element, m);
/* If not, neighbor page represents new item */
/* Only caught if item has a different level */
/* TODO Use versioning to fix this? */
if (HNSW_NEIGHBOR_COUNT(itemid) == neighborCount)
{
HnswNeighborTuple ntup = (HnswNeighborTuple) PageGetItem(page, itemid);
Assert(HnswIsNeighborTuple(ntup));
for (int i = 0; i < neighborCount; i++)
{
HnswElement e;
int level;
HnswCandidate *hc;
HnswNeighborTupleItem *neighbor;
HnswNeighborArray *neighbors;
neighbor = &ntup->neighbors[i];
if (!ItemPointerIsValid(&neighbor->indextid))
continue;
e = CreateElementFromBlock(ItemPointerGetBlockNumber(&neighbor->indextid), ItemPointerGetOffsetNumber(&neighbor->indextid));
/* Calculate level based on offset */
level = element->level - i / m;
if (level < 0)
level = 0;
neighbors = &element->neighbors[level];
hc = &neighbors->items[neighbors->length++];
hc->element = e;
hc->distance = neighbor->distance;
}
}
}
/*
* Load an element and optionally get its distance from q
*/
@@ -376,6 +429,8 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index,
item = (HnswElementTuple) PageGetItem(page, PageGetItemId(page, element->offno));
Assert(HnswIsElementTuple(item));
/* Load element */
element->heaptids = NIL;
for (int i = 0; i < HNSW_HEAPTIDS; i++)
@@ -387,7 +442,8 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index,
HnswAddHeapTid(element, &item->heaptids[i]);
}
element->level = item->level;
element->neighborPage = item->neighborPage;
element->neighborPage = ItemPointerGetBlockNumber(&item->neighbortid);
element->neighborOffno = ItemPointerGetOffsetNumber(&item->neighbortid);
element->deleted = item->deleted;
if (loadvec)
@@ -400,6 +456,10 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index,
if (distance != NULL)
*distance = (float) DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&item->vec)));
/* Load neighbors if on same page */
if (element->neighborPage == element->blkno)
LoadNeighborsFromPage(element, index, page);
UnlockReleaseBuffer(buf);
}
@@ -512,53 +572,16 @@ HnswInitNeighbors(HnswElement element, int m)
* Load neighbors
*/
static void
LoadNeighbors(HnswCandidate * c, Relation index)
LoadNeighbors(HnswElement element, Relation index)
{
Buffer buf;
Page page;
OffsetNumber offno;
OffsetNumber maxoffno;
HnswNeighborTuple neighbor;
HnswNeighborArray *neighbors;
int m = HnswGetM(index);
buf = ReadBuffer(index, c->element->neighborPage);
buf = ReadBuffer(index, element->neighborPage);
LockBuffer(buf, BUFFER_LOCK_SHARE);
page = BufferGetPage(buf);
maxoffno = PageGetMaxOffsetNumber(page);
HnswInitNeighbors(c->element, m);
/* If not, neighbor page represents new item */
/* Only caught if item has a different level */
/* TODO Use versioning to fix this? */
if (maxoffno == (c->element->level + 2) * m)
{
for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno))
{
HnswElement element;
int level;
HnswCandidate *hc;
neighbor = (HnswNeighborTuple) PageGetItem(page, PageGetItemId(page, offno));
if (!ItemPointerIsValid(&neighbor->indextid))
continue;
element = CreateElementFromBlock(ItemPointerGetBlockNumber(&neighbor->indextid), ItemPointerGetOffsetNumber(&neighbor->indextid));
/* Calculate level based on offset */
level = c->element->level - (offno - FirstOffsetNumber) / m;
if (level < 0)
level = 0;
neighbors = &c->element->neighbors[level];
hc = &neighbors->items[neighbors->length];
hc->element = element;
hc->distance = neighbor->distance;
neighbors->length++;
}
}
LoadNeighborsFromPage(element, index, page);
UnlockReleaseBuffer(buf);
}
@@ -603,11 +626,14 @@ HnswFreeElement(HnswElement element)
}
/*
* Set element tuple, except for neighbor page
* Set element tuple, except for neighbor info
*/
void
HnswSetElementTuple(HnswElementTuple etup, HnswElement element)
{
etup->type = HNSW_ELEMENT_TUPLE_TYPE;
etup->level = element->level;
etup->deleted = 0;
for (int i = 0; i < HNSW_HEAPTIDS; i++)
{
if (i < list_length(element->heaptids))
@@ -615,8 +641,6 @@ HnswSetElementTuple(HnswElementTuple etup, HnswElement element)
else
ItemPointerSetInvalid(&etup->heaptids[i]);
}
etup->level = element->level;
etup->deleted = 0;
memcpy(&etup->vec, element->vec, VECTOR_SIZE(element->vec->dim));
}
@@ -650,7 +674,7 @@ AddToVisited(HTAB *v, HnswCandidate * hc, Relation index, bool *found)
* Algorithm 2 from paper
*/
List *
SearchLayer(Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, bool inserting, BlockNumber *skipPage)
SearchLayer(Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinfo, Oid collation, bool inserting, BlockNumber *skipPage, OffsetNumber *skipOffno)
{
ListCell *lc2;
@@ -699,7 +723,7 @@ SearchLayer(Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinf
break;
if (c->element->neighbors == NULL)
LoadNeighbors(c, index);
LoadNeighbors(c->element, index);
/* Get the neighborhood at layer lc */
neighborhood = &c->element->neighbors[lc];
@@ -731,7 +755,7 @@ SearchLayer(Datum q, List *ep, int ef, int lc, Relation index, FmgrInfo *procinf
continue;
/* Skip self for vacuuming update */
if (skipPage != NULL && e->element->neighborPage == *skipPage)
if (skipPage != NULL && e->element->neighborPage == *skipPage && e->element->neighborOffno == *skipOffno)
continue;
/* Stale read */
@@ -825,6 +849,7 @@ HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, F
Datum q = PointerGetDatum(element->vec);
HnswElement dup;
BlockNumber *skipPage = vacuuming ? &element->neighborPage : NULL;
OffsetNumber *skipOffno = vacuuming ? &element->neighborOffno : NULL;
/* Get entry point and level */
if (entryPoint != NULL)
@@ -837,7 +862,7 @@ HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, F
for (int lc = entryLevel; lc >= level + 1; lc--)
{
w = SearchLayer(q, ep, 1, lc, index, procinfo, collation, true, skipPage);
w = SearchLayer(q, ep, 1, lc, index, procinfo, collation, true, skipPage, skipOffno);
ep = w;
}
@@ -848,7 +873,7 @@ HnswInsertElement(HnswElement element, HnswElement entryPoint, Relation index, F
{
int lm = GetLayerM(m, lc);
w = SearchLayer(q, ep, efConstruction, lc, index, procinfo, collation, true, skipPage);
w = SearchLayer(q, ep, efConstruction, lc, index, procinfo, collation, true, skipPage, skipOffno);
newNeighbors[lc] = SelectNeighbors(w, lm, lc, procinfo, collation, NULL);
ep = w;
}
@@ -913,11 +938,15 @@ UpdateMetaPage(Relation index, bool updateEntry, HnswElement entryPoint, BlockNu
}
/*
* Add neighbors to page
* Set neighbor tuple
*/
void
AddNeighborsToPage(Relation index, Page page, HnswElement e, HnswNeighborTuple neighbor, Size neighborsz, int m)
HnswSetNeighborTuple(HnswNeighborTuple ntup, HnswElement e, int m)
{
int idx = 0;
ntup->type = HNSW_NEIGHBOR_TUPLE_TYPE;
for (int lc = e->level; lc >= 0; lc--)
{
HnswNeighborArray *neighbors = &e->neighbors[lc];
@@ -925,6 +954,8 @@ AddNeighborsToPage(Relation index, Page page, HnswElement e, HnswNeighborTuple n
for (int i = 0; i < lm; i++)
{
HnswNeighborTupleItem *neighbor = &ntup->neighbors[idx++];
if (i < neighbors->length)
{
HnswCandidate *hc = &neighbors->items[i];
@@ -937,9 +968,6 @@ AddNeighborsToPage(Relation index, Page page, HnswElement e, HnswNeighborTuple n
ItemPointerSetInvalid(&neighbor->indextid);
neighbor->distance = NAN;
}
if (PageAddItem(page, (Item) neighbor, neighborsz, InvalidOffsetNumber, false, false) == InvalidOffsetNumber)
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
}
}
}

View File

@@ -62,6 +62,10 @@ RemoveHeapTids(HnswVacuumState * vacuumstate)
int idx = 0;
bool itemUpdated = false;
/* Skip neighbor tuples */
if (!HnswIsElementTuple(item))
continue;
if (ItemPointerIsValid(&item->heaptids[0]))
{
for (int i = 0; i < HNSW_HEAPTIDS; i++)
@@ -81,7 +85,7 @@ RemoveHeapTids(HnswVacuumState * vacuumstate)
if (itemUpdated)
{
Size itemsz = MAXALIGN(HNSW_ELEMENT_TUPLE_SIZE(item->vec.dim));
Size itemsz = HNSW_ELEMENT_TUPLE_SIZE(item->vec.dim);
/* Mark rest as invalid */
for (int i = idx; i < HNSW_HEAPTIDS; i++)
@@ -137,25 +141,30 @@ NeedsUpdated(HnswVacuumState * vacuumstate, HnswElement element)
BufferAccessStrategy bas = vacuumstate->bas;
Buffer buf;
Page page;
OffsetNumber offno;
OffsetNumber maxoffno;
ItemId itemid;
int neighborCount;
HnswNeighborTuple ntup;
bool needsUpdated = false;
buf = ReadBufferExtended(index, MAIN_FORKNUM, element->neighborPage, RBM_NORMAL, bas);
LockBuffer(buf, BUFFER_LOCK_SHARE);
page = BufferGetPage(buf);
maxoffno = PageGetMaxOffsetNumber(page);
itemid = PageGetItemId(page, element->neighborOffno);
ntup = (HnswNeighborTuple) PageGetItem(page, itemid);
neighborCount = HNSW_NEIGHBOR_COUNT(itemid);
Assert(HnswIsNeighborTuple(ntup));
/* Check neighbors */
for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno))
for (int i = 0; i < neighborCount; i++)
{
HnswNeighborTuple ntup = (HnswNeighborTuple) PageGetItem(page, PageGetItemId(page, offno));
HnswNeighborTupleItem *neighbor = &ntup->neighbors[i];
if (!ItemPointerIsValid(&ntup->indextid))
if (!ItemPointerIsValid(&neighbor->indextid))
continue;
/* Check if in deleted list */
if (DeletedContains(vacuumstate->deleted, &ntup->indextid))
if (DeletedContains(vacuumstate->deleted, &neighbor->indextid))
{
needsUpdated = true;
break;
@@ -184,7 +193,7 @@ RepairGraphElement(HnswVacuumState * vacuumstate, HnswElement element)
HnswElement entryPoint;
BufferAccessStrategy bas = vacuumstate->bas;
HnswNeighborTuple ntup = vacuumstate->ntup;
Size nsize = vacuumstate->nsize;
Size neighborsz = HNSW_NEIGHBOR_TUPLE_SIZE(element->level, m);
/* Check if any neighbors point to deleted values */
if (!NeedsUpdated(vacuumstate, element))
@@ -217,13 +226,13 @@ RepairGraphElement(HnswVacuumState * vacuumstate, HnswElement element)
buf = ReadBufferExtended(index, MAIN_FORKNUM, element->neighborPage, RBM_NORMAL, bas);
LockBuffer(buf, BUFFER_LOCK_EXCLUSIVE);
state = GenericXLogStart(index);
page = GenericXLogRegisterBuffer(state, buf, GENERIC_XLOG_FULL_IMAGE);
/* Overwrites existing page via InitPage */
HnswInitPage(buf, page);
page = GenericXLogRegisterBuffer(state, buf, 0);
/* Update neighbors */
AddNeighborsToPage(index, page, element, ntup, nsize, m);
HnswSetNeighborTuple(ntup, element, m);
if (!PageIndexTupleOverwrite(page, element->neighborOffno, (Item) ntup, neighborsz))
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
/* Commit */
MarkBufferDirty(buf);
@@ -309,13 +318,18 @@ RepairGraph(HnswVacuumState * vacuumstate)
HnswElementTuple item = (HnswElementTuple) PageGetItem(page, PageGetItemId(page, offno));
HnswElement element;
/* Skip neighbor tuples */
if (!HnswIsElementTuple(item))
continue;
/* Skip updating neighbors if being deleted */
if (!ItemPointerIsValid(&item->heaptids[0]))
continue;
/* Create an element */
element = palloc(sizeof(HnswElementData));
element->neighborPage = item->neighborPage;
element->neighborPage = ItemPointerGetBlockNumber(&item->neighbortid);
element->neighborOffno = ItemPointerGetOffsetNumber(&item->neighbortid);
element->level = item->level;
element->blkno = blkno;
element->offno = offno;
@@ -381,30 +395,68 @@ MarkDeleted(HnswVacuumState * vacuumstate)
Size itemsz;
Buffer nbuf;
Page npage;
BlockNumber neighborPage;
OffsetNumber neighborOffno;
Size ntupsz;
HnswNeighborTuple ntup;
int neighborCount;
/* Skip neighbor tuples */
if (!HnswIsElementTuple(item))
continue;
if (ItemPointerIsValid(&item->heaptids[0]))
continue;
/* Calculate sizes */
itemsz = HNSW_ELEMENT_TUPLE_SIZE(item->vec.dim);
ntupsz = HNSW_NEIGHBOR_TUPLE_SIZE(item->level, vacuumstate->m);
neighborCount = (item->level + 2) * vacuumstate->m;
/* Get neighbor page */
neighborPage = ItemPointerGetBlockNumber(&item->neighbortid);
neighborOffno = ItemPointerGetOffsetNumber(&item->neighbortid);
if (neighborPage == blkno)
{
nbuf = buf;
npage = page;
}
else
{
nbuf = ReadBufferExtended(index, MAIN_FORKNUM, neighborPage, RBM_NORMAL, bas);
LockBuffer(nbuf, BUFFER_LOCK_EXCLUSIVE);
npage = GenericXLogRegisterBuffer(state, nbuf, 0);
}
ntup = (HnswNeighborTuple) PageGetItem(npage, PageGetItemId(npage, neighborOffno));
/* Overwrite element */
/* TODO Increment version? */
item->deleted = 1;
MemSet(&item->vec.x, 0, item->vec.dim * sizeof(float));
itemsz = MAXALIGN(HNSW_ELEMENT_TUPLE_SIZE(item->vec.dim));
/* Overwrite neighbors */
for (int i = 0; i < neighborCount; i++)
{
ItemPointerSetInvalid(&ntup->neighbors[i].indextid);
ntup->neighbors[i].distance = NAN;
}
if (!PageIndexTupleOverwrite(page, offno, (Item) item, itemsz))
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
/* Overwrite neighbors */
nbuf = ReadBufferExtended(index, MAIN_FORKNUM, item->neighborPage, RBM_NORMAL, bas);
LockBuffer(nbuf, BUFFER_LOCK_EXCLUSIVE);
npage = GenericXLogRegisterBuffer(state, nbuf, GENERIC_XLOG_FULL_IMAGE);
HnswInitPage(nbuf, npage);
if (!PageIndexTupleOverwrite(npage, neighborOffno, (Item) ntup, ntupsz))
elog(ERROR, "failed to add index item to \"%s\"", RelationGetRelationName(index));
/* Commit */
MarkBufferDirty(buf);
MarkBufferDirty(nbuf);
if (nbuf != buf)
MarkBufferDirty(nbuf);
GenericXLogFinish(state);
UnlockReleaseBuffer(nbuf);
if (nbuf != buf)
UnlockReleaseBuffer(nbuf);
/* Set to first free page */
if (!BlockNumberIsValid(insertPage))
@@ -445,8 +497,7 @@ InitVacuumState(HnswVacuumState * vacuumstate, IndexVacuumInfo *info, IndexBulkD
vacuumstate->bas = GetAccessStrategy(BAS_BULKREAD);
vacuumstate->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC);
vacuumstate->collation = index->rd_indcollation[0];
vacuumstate->nsize = MAXALIGN(sizeof(HnswNeighborTupleData));
vacuumstate->ntup = palloc0(vacuumstate->nsize);
vacuumstate->ntup = palloc0(BLCKSZ);
vacuumstate->tmpCtx = AllocSetContextCreate(CurrentMemoryContext,
"Hnsw vacuum temporary context",
ALLOCSET_DEFAULT_SIZES);

View File

@@ -38,6 +38,7 @@ $node->safe_psql("postgres",
# Check size
my $new_size = $node->safe_psql("postgres", "SELECT pg_total_relation_size('tst_v_idx');");
is($size, $new_size, "size does not change");
# TODO Improve
cmp_ok($new_size, "<=", $size * 1.75, "size does not increase too much");
done_testing();

View File

@@ -4,7 +4,8 @@ use PostgresNode;
use TestLib;
use Test::More;
my $dim = 768;
# Ensures elements and neighbors on both same and different pages
my $dim = 1900;
my $array_sql = join(",", ('random()') x $dim);
@@ -16,6 +17,9 @@ $node->start;
# Create table and index
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
$node->safe_psql("postgres", "CREATE TABLE tst (v vector($dim));");
$node->safe_psql("postgres",
"INSERT INTO tst SELECT ARRAY[$array_sql] FROM generate_series(1, 100) i;"
);
$node->safe_psql("postgres", "CREATE INDEX ON tst USING hnsw (v vector_l2_ops);");
$node->pgbench(
@@ -37,7 +41,7 @@ sub idx_scan
$node->safe_psql("postgres", "SELECT idx_scan FROM pg_stat_user_indexes WHERE indexrelid = 'tst_v_idx'::regclass;");
}
my $expected = 5 * 100 * 10;
my $expected = 100 + 5 * 100 * 10;
my $count = $node->safe_psql("postgres", "SELECT COUNT(*) FROM tst;");
is($count, $expected);