From aabe549ec61de91c9aa7cfd4a5aee585da435944 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Tue, 15 Feb 2022 20:21:29 -0800 Subject: [PATCH] Significantly improved index query performance [skip ci] --- CHANGELOG.md | 1 + src/ivfflat.h | 19 ++- src/ivfscan.c | 323 ++++++++++++++++++++++++++++++++++--------- test/t/007_stages.pl | 38 +++++ 4 files changed, 318 insertions(+), 63 deletions(-) create mode 100644 test/t/007_stages.pl diff --git a/CHANGELOG.md b/CHANGELOG.md index d9646d4..c57b770 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## 0.2.6 (unreleased) +- Significantly improved index query performance - Improved performance of index creation for Postgres < 12 ## 0.2.5 (2022-02-11) diff --git a/src/ivfflat.h b/src/ivfflat.h index bce7d42..0c6e426 100644 --- a/src/ivfflat.h +++ b/src/ivfflat.h @@ -167,12 +167,28 @@ typedef struct IvfflatScanList double distance; } IvfflatScanList; +typedef struct IvfflatScanItem +{ + pairingheap_node ph_node; + BlockNumber searchPage; + double distance; + ItemPointerData tid; +} IvfflatScanItem; + typedef struct IvfflatScanOpaqueData { int probes; - bool first; + int stage; Buffer buf; + /* Items */ + int maxItems; + int itemCount; + pairingheap *itemQueue; + IvfflatScanItem *items; + IvfflatScanItem **sortedItems; + bool heapFull; + /* Sorting */ Tuplesortstate *sortstate; TupleDesc tupdesc; @@ -186,6 +202,7 @@ typedef struct IvfflatScanOpaqueData /* Lists */ pairingheap *listQueue; + IvfflatScanList **sortedLists; IvfflatScanList lists[FLEXIBLE_ARRAY_MEMBER]; /* must come last */ } IvfflatScanOpaqueData; diff --git a/src/ivfscan.c b/src/ivfscan.c index e2171b0..970863a 100644 --- a/src/ivfscan.c +++ b/src/ivfscan.c @@ -30,6 +30,21 @@ CompareLists(const pairingheap_node *a, const pairingheap_node *b, void *arg) return 0; } +/* + * Compare item distances + */ +static int +CompareItems(const pairingheap_node *a, const pairingheap_node *b, void *arg) +{ + if (((const IvfflatScanItem *) a)->distance > ((const IvfflatScanItem *) b)->distance) + return 1; + + if (((const IvfflatScanItem *) a)->distance < ((const IvfflatScanItem *) b)->distance) + return -1; + + return ItemPointerCompare(&((IvfflatScanItem *) a)->tid, &((IvfflatScanItem *) b)->tid); +} + /* * Get lists and sort by distance */ @@ -44,6 +59,7 @@ GetScanLists(IndexScanDesc scan, Datum value) BlockNumber nextblkno = IVFFLAT_HEAD_BLKNO; int listCount = 0; IvfflatScanOpaque so = (IvfflatScanOpaque) scan->opaque; + int i; double distance; IvfflatScanList *scanlist; double maxDistance = DBL_MAX; @@ -97,6 +113,140 @@ GetScanLists(IndexScanDesc scan, Datum value) UnlockReleaseBuffer(cbuf); } + + for (i = 0; i < so->probes; i++) + so->sortedLists[i] = (IvfflatScanList *) pairingheap_remove_first(so->listQueue); + + Assert(pairingheap_is_empty(so->listQueue)); +} + +/* + * Get items + */ +static void +GetScanItemsQuick(IndexScanDesc scan, Datum value) +{ + IvfflatScanOpaque so = (IvfflatScanOpaque) scan->opaque; + Buffer buf; + Page page; + IndexTuple itup; + BlockNumber searchPage; + OffsetNumber offno; + OffsetNumber maxoffno; + Datum datum; + bool isnull; + TupleDesc tupdesc = RelationGetDescr(scan->indexRelation); + int i; + double distance; + IvfflatScanItem *scanitem; + double maxDistance = DBL_MAX; + + /* + * Reuse same set of shared buffers for scan + * + * See postgres/src/backend/storage/buffer/README for description + */ + BufferAccessStrategy bas = GetAccessStrategy(BAS_BULKREAD); + + /* Search closest probes lists */ + for (i = 0; i < so->probes; i++) + { + /* Read closest lists first for performance */ + searchPage = so->sortedLists[i]->startPage; + + /* Search all entry pages for list */ + while (BlockNumberIsValid(searchPage)) + { + buf = ReadBufferExtended(scan->indexRelation, MAIN_FORKNUM, searchPage, RBM_NORMAL, bas); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = BufferGetPage(buf); + maxoffno = PageGetMaxOffsetNumber(page); + + for (offno = FirstOffsetNumber; offno <= maxoffno; offno = OffsetNumberNext(offno)) + { + itup = (IndexTuple) PageGetItem(page, PageGetItemId(page, offno)); + datum = index_getattr(itup, 1, tupdesc, &isnull); + distance = DatumGetFloat8(FunctionCall2Coll(so->procinfo, so->collation, datum, value)); + + if (so->itemCount < so->maxItems) + { + scanitem = &so->items[so->itemCount]; + scanitem->searchPage = searchPage; + scanitem->tid = itup->t_tid; + scanitem->distance = distance; + so->itemCount++; + + /* Add to heap */ + pairingheap_add(so->itemQueue, &scanitem->ph_node); + + /* Calculate max distance */ + if (so->itemCount == so->maxItems) + { + maxDistance = ((IvfflatScanItem *) pairingheap_first(so->itemQueue))->distance; + scanitem = &so->items[so->itemCount]; + } + } + else if (distance <= maxDistance) + { + /* Reuse */ + scanitem->searchPage = searchPage; + scanitem->tid = itup->t_tid; + scanitem->distance = distance; + pairingheap_add(so->itemQueue, &scanitem->ph_node); + + /* Remove */ + scanitem = (IvfflatScanItem *) pairingheap_remove_first(so->itemQueue); + + /* Update max distance */ + maxDistance = ((IvfflatScanItem *) pairingheap_first(so->itemQueue))->distance; + } + } + + searchPage = IvfflatPageGetOpaque(page)->nextblkno; + + UnlockReleaseBuffer(buf); + } + } + + for (i = 0; i < so->itemCount; i++) + so->sortedItems[i] = (IvfflatScanItem *) pairingheap_remove_first(so->itemQueue); + + Assert(pairingheap_is_empty(so->itemQueue)); +} + +/* + * Initialize sort + */ +static void +InitSort(IvfflatScanOpaque so) +{ + AttrNumber attNums[] = {1, 2}; + Oid sortOperators[] = {Float8LessOperator, TIDLessOperator}; + Oid sortCollations[] = {InvalidOid, InvalidOid}; + bool nullsFirstFlags[] = {false, false}; + + /* Create tuple description for sorting */ +#if PG_VERSION_NUM >= 120000 + so->tupdesc = CreateTemplateTupleDesc(3); +#else + so->tupdesc = CreateTemplateTupleDesc(3, false); +#endif + TupleDescInitEntry(so->tupdesc, (AttrNumber) 1, "distance", FLOAT8OID, -1, 0); + TupleDescInitEntry(so->tupdesc, (AttrNumber) 2, "tid", TIDOID, -1, 0); + TupleDescInitEntry(so->tupdesc, (AttrNumber) 3, "indexblkno", INT4OID, -1, 0); + + /* Prep sort */ +#if PG_VERSION_NUM >= 110000 + so->sortstate = tuplesort_begin_heap(so->tupdesc, sizeof(attNums) / sizeof(attNums[0]), attNums, sortOperators, sortCollations, nullsFirstFlags, work_mem, NULL, false); +#else + so->sortstate = tuplesort_begin_heap(so->tupdesc, sizeof(attNums) / sizeof(attNums[0]), attNums, sortOperators, sortCollations, nullsFirstFlags, work_mem, false); +#endif + +#if PG_VERSION_NUM >= 120000 + so->slot = MakeSingleTupleTableSlot(so->tupdesc, &TTSOpsMinimalTuple); +#else + so->slot = MakeSingleTupleTableSlot(so->tupdesc); +#endif } /* @@ -115,6 +265,7 @@ GetScanItems(IndexScanDesc scan, Datum value) Datum datum; bool isnull; TupleDesc tupdesc = RelationGetDescr(scan->indexRelation); + int i; #if PG_VERSION_NUM >= 120000 TupleTableSlot *slot = MakeSingleTupleTableSlot(so->tupdesc, &TTSOpsVirtual); @@ -130,9 +281,9 @@ GetScanItems(IndexScanDesc scan, Datum value) BufferAccessStrategy bas = GetAccessStrategy(BAS_BULKREAD); /* Search closest probes lists */ - while (!pairingheap_is_empty(so->listQueue)) + for (i = 0; i < so->probes; i++) { - searchPage = ((IvfflatScanList *) pairingheap_remove_first(so->listQueue))->startPage; + searchPage = so->sortedLists[i]->startPage; /* Search all entry pages for list */ while (BlockNumberIsValid(searchPage)) @@ -172,6 +323,7 @@ GetScanItems(IndexScanDesc scan, Datum value) } tuplesort_performsort(so->sortstate); + tuplesort_skiptuples(so->sortstate, so->maxItems, true); } /* @@ -180,24 +332,17 @@ GetScanItems(IndexScanDesc scan, Datum value) IndexScanDesc ivfflatbeginscan(Relation index, int nkeys, int norderbys) { - IndexScanDesc scan; - IvfflatScanOpaque so; - int lists; - AttrNumber attNums[] = {1}; - Oid sortOperators[] = {Float8LessOperator}; - Oid sortCollations[] = {InvalidOid}; - bool nullsFirstFlags[] = {false}; + IndexScanDesc scan = RelationGetIndexScan(index, nkeys, norderbys); + int lists = IvfflatGetLists(scan->indexRelation); int probes = ivfflat_probes; - - scan = RelationGetIndexScan(index, nkeys, norderbys); - lists = IvfflatGetLists(scan->indexRelation); + IvfflatScanOpaque so; if (probes > lists) probes = lists; so = (IvfflatScanOpaque) palloc(offsetof(IvfflatScanOpaqueData, lists) + probes * sizeof(IvfflatScanList)); so->buf = InvalidBuffer; - so->first = true; + so->stage = 0; so->probes = probes; /* Set support functions */ @@ -205,30 +350,16 @@ ivfflatbeginscan(Relation index, int nkeys, int norderbys) so->normprocinfo = IvfflatOptionalProcInfo(index, IVFFLAT_NORM_PROC); so->collation = index->rd_indcollation[0]; - /* Create tuple description for sorting */ -#if PG_VERSION_NUM >= 120000 - so->tupdesc = CreateTemplateTupleDesc(3); -#else - so->tupdesc = CreateTemplateTupleDesc(3, false); -#endif - TupleDescInitEntry(so->tupdesc, (AttrNumber) 1, "distance", FLOAT8OID, -1, 0); - TupleDescInitEntry(so->tupdesc, (AttrNumber) 2, "tid", TIDOID, -1, 0); - TupleDescInitEntry(so->tupdesc, (AttrNumber) 3, "indexblkno", INT4OID, -1, 0); - - /* Prep sort */ -#if PG_VERSION_NUM >= 110000 - so->sortstate = tuplesort_begin_heap(so->tupdesc, 1, attNums, sortOperators, sortCollations, nullsFirstFlags, work_mem, NULL, false); -#else - so->sortstate = tuplesort_begin_heap(so->tupdesc, 1, attNums, sortOperators, sortCollations, nullsFirstFlags, work_mem, false); -#endif - -#if PG_VERSION_NUM >= 120000 - so->slot = MakeSingleTupleTableSlot(so->tupdesc, &TTSOpsMinimalTuple); -#else - so->slot = MakeSingleTupleTableSlot(so->tupdesc); -#endif - so->listQueue = pairingheap_allocate(CompareLists, scan); + so->sortedLists = palloc(sizeof(IvfflatScanItem *) * probes); + + so->maxItems = 1024; + so->itemCount = 0; + so->itemQueue = pairingheap_allocate(CompareItems, scan); + so->items = palloc(sizeof(IvfflatScanItem) * so->maxItems + 1); + so->sortedItems = palloc(sizeof(IvfflatScanItem *) * so->maxItems); + + so->sortstate = NULL; scan->opaque = so; @@ -244,12 +375,14 @@ ivfflatrescan(IndexScanDesc scan, ScanKey keys, int nkeys, ScanKey orderbys, int IvfflatScanOpaque so = (IvfflatScanOpaque) scan->opaque; #if PG_VERSION_NUM >= 130000 - if (!so->first) + if (so->sortstate != NULL) tuplesort_reset(so->sortstate); #endif - so->first = true; + so->stage = 0; pairingheap_reset(so->listQueue); + pairingheap_reset(so->itemQueue); + so->itemCount = 0; if (keys && scan->numberOfKeys > 0) memmove(scan->keyData, keys, scan->numberOfKeys * sizeof(ScanKeyData)); @@ -272,7 +405,7 @@ ivfflatgettuple(IndexScanDesc scan, ScanDirection dir) */ Assert(ScanDirectionIsForward(dir)); - if (so->first) + if (so->stage == 0) { Datum value; @@ -294,42 +427,101 @@ ivfflatgettuple(IndexScanDesc scan, ScanDirection dir) } IvfflatBench("GetScanLists", GetScanLists(scan, value)); - IvfflatBench("GetScanItems", GetScanItems(scan, value)); - so->first = false; + IvfflatBench("GetScanItemsQuick", GetScanItemsQuick(scan, value)); + so->heapFull = so->itemCount == so->maxItems; + so->stage++; /* Clean up if we allocated a new value */ if (value != scan->orderByData->sk_argument) pfree(DatumGetPointer(value)); } -#if PG_VERSION_NUM >= 100000 - if (tuplesort_gettupleslot(so->sortstate, true, false, so->slot, NULL)) -#else - if (tuplesort_gettupleslot(so->sortstate, true, so->slot, NULL)) -#endif + if (so->stage == 1) { - ItemPointer tid = (ItemPointer) DatumGetPointer(slot_getattr(so->slot, 2, &so->isnull)); - BlockNumber indexblkno = DatumGetInt32(slot_getattr(so->slot, 3, &so->isnull)); + if (so->itemCount > 0) + { + IvfflatScanItem *scanitem; + + so->itemCount--; + + scanitem = so->sortedItems[so->itemCount]; #if PG_VERSION_NUM >= 120000 - scan->xs_heaptid = *tid; + scan->xs_heaptid = scanitem->tid; #else - scan->xs_ctup.t_self = *tid; + scan->xs_ctup.t_sef = scanitem->tid; #endif - if (BufferIsValid(so->buf)) - ReleaseBuffer(so->buf); + if (BufferIsValid(so->buf)) + ReleaseBuffer(so->buf); - /* - * An index scan must maintain a pin on the index page holding the - * item last returned by amgettuple - * - * https://www.postgresql.org/docs/current/index-locking.html - */ - so->buf = ReadBuffer(scan->indexRelation, indexblkno); + /* + * An index scan must maintain a pin on the index page holding the + * item last returned by amgettuple + * + * https://www.postgresql.org/docs/current/index-locking.html + */ + so->buf = ReadBuffer(scan->indexRelation, scanitem->searchPage); - scan->xs_recheckorderby = false; - return true; + scan->xs_recheckorderby = false; + return true; + } + else if (so->heapFull) + { + Datum value = scan->orderByData->sk_argument; + + if (so->normprocinfo != NULL) + { + /* No items will match if normalization fails */ + if (!IvfflatNormValue(so->normprocinfo, so->collation, &value, NULL)) + return false; + } + + if (so->sortstate == NULL) + InitSort(so); + + IvfflatBench("GetScanItems", GetScanItems(scan, value)); + so->stage++; + + /* Clean up if we allocated a new value */ + if (value != scan->orderByData->sk_argument) + pfree(DatumGetPointer(value)); + } + else + so->stage = 3; + } + + if (so->stage == 2) + { +#if PG_VERSION_NUM >= 100000 + if (tuplesort_gettupleslot(so->sortstate, true, false, so->slot, NULL)) +#else + if (tuplesort_gettupleslot(so->sortstate, true, so->slot, NULL)) +#endif + { + ItemPointer tid = (ItemPointer) DatumGetPointer(slot_getattr(so->slot, 2, &so->isnull)); + BlockNumber indexblkno = DatumGetInt32(slot_getattr(so->slot, 3, &so->isnull)); + +#if PG_VERSION_NUM >= 120000 + scan->xs_heaptid = *tid; +#else + scan->xs_ctup.t_self = *tid; +#endif + + if (BufferIsValid(so->buf)) + ReleaseBuffer(so->buf); + + /* + * An index scan must maintain a pin on the index page holding the + * item last returned by amgettuple + * + * https://www.postgresql.org/docs/current/index-locking.html + */ + so->buf = ReadBuffer(scan->indexRelation, indexblkno); + + scan->xs_recheckorderby = false; + return true; + } } return false; @@ -348,7 +540,14 @@ ivfflatendscan(IndexScanDesc scan) ReleaseBuffer(so->buf); pairingheap_free(so->listQueue); - tuplesort_end(so->sortstate); + pfree(so->sortedLists); + + if (so->sortstate != NULL) + tuplesort_end(so->sortstate); + + pairingheap_free(so->itemQueue); + pfree(so->items); + pfree(so->sortedItems); pfree(so); scan->opaque = NULL; diff --git a/test/t/007_stages.pl b/test/t/007_stages.pl new file mode 100644 index 0000000..cc670ce --- /dev/null +++ b/test/t/007_stages.pl @@ -0,0 +1,38 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 2; + +# Initialize node +my $node = get_new_node('node'); +$node->init; +$node->start; + +# Create table +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector(3));"); +$node->safe_psql("postgres", + "INSERT INTO tst SELECT i, ARRAY[i % 1000, i % 1000, i % 1000] FROM generate_series(1, 10000) i;" +); + +my @limits = (128, 2048); +my @expected = (); + +foreach (@limits) { + my $res = $node->safe_psql("postgres", "SELECT i, v FROM tst ORDER BY v <-> '[0,0,0]', i LIMIT $_;"); + push(@expected, $res); +} + +$node->safe_psql("postgres", "CREATE INDEX ON tst USING ivfflat (v) WITH (lists = 5);"); + +for my $i (0 .. $#limits) { + my $res = $node->safe_psql("postgres", qq( + SET enable_seqscan = off; + SET ivfflat.probes = 5; + WITH tmp AS ( + SELECT *, v <-> '[0,0,0]' AS d FROM tst ORDER BY v <-> '[0,0,0]' LIMIT $limits[$i] + ) SELECT i, v FROM tmp ORDER BY d, i; + )); + is($res, $expected[$i]); +}