From 0f36e15bea8ac6a8b3c5de2249611dac46f6e31a Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 28 Sep 2024 15:43:28 -0700 Subject: [PATCH] Improved cost estimation for IVFFlat [skip ci] --- src/ivfflat.c | 25 ++++++++++++------------- test/t/040_ivfflat_cost.pl | 2 +- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/src/ivfflat.c b/src/ivfflat.c index 986e19d..e2a8527 100644 --- a/src/ivfflat.c +++ b/src/ivfflat.c @@ -85,6 +85,8 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, MemSet(&costs, 0, sizeof(costs)); + genericcostestimate(root, path, loop_count, &costs); + index = index_open(path->indexinfo->indexoid, NoLock); IvfflatGetMetaPageInfo(index, &lists, NULL); index_close(index, NoLock); @@ -94,14 +96,9 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, if (ratio > 1.0) ratio = 1.0; - /* - * This gives us the subset of tuples to visit. This value is passed into - * the generic cost estimator to determine the number of pages to visit - * during the index scan. - */ - costs.numIndexTuples = path->indexinfo->tuples * ratio; - - genericcostestimate(root, path, loop_count, &costs); + /* Set startup cost since most work happens before first tuple is returned */ + costs.indexStartupCost = costs.indexTotalCost * ratio; + costs.numIndexPages *= ratio; get_tablespace_page_costs(path->indexinfo->reltablespace, NULL, &spc_seq_page_cost); @@ -109,23 +106,25 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, if (costs.numIndexPages > path->indexinfo->rel->pages && ratio < 0.5) { /* Change all page cost from random to sequential */ - costs.indexTotalCost -= costs.numIndexPages * (costs.spc_random_page_cost - spc_seq_page_cost); + costs.indexStartupCost -= costs.numIndexPages * (costs.spc_random_page_cost - spc_seq_page_cost); /* Remove cost of extra pages */ - costs.indexTotalCost -= (costs.numIndexPages - path->indexinfo->rel->pages) * spc_seq_page_cost; + costs.indexStartupCost -= (costs.numIndexPages - path->indexinfo->rel->pages) * spc_seq_page_cost; } else { /* Change some page cost from random to sequential */ - costs.indexTotalCost -= 0.5 * costs.numIndexPages * (costs.spc_random_page_cost - spc_seq_page_cost); + costs.indexStartupCost -= 0.5 * costs.numIndexPages * (costs.spc_random_page_cost - spc_seq_page_cost); } - /* Use total cost since most work happens before first tuple is returned */ - *indexStartupCost = costs.indexTotalCost; + *indexStartupCost = costs.indexStartupCost; *indexTotalCost = costs.indexTotalCost; *indexSelectivity = costs.indexSelectivity; *indexCorrelation = costs.indexCorrelation; *indexPages = costs.numIndexPages; + + Assert(*indexStartupCost > 0); + Assert(*indexTotalCost > 0); } /* diff --git a/test/t/040_ivfflat_cost.pl b/test/t/040_ivfflat_cost.pl index 1c311a3..2edf000 100644 --- a/test/t/040_ivfflat_cost.pl +++ b/test/t/040_ivfflat_cost.pl @@ -21,7 +21,7 @@ for my $dim (@dims) # Create table and index $node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector($dim));"); $node->safe_psql("postgres", - "INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, 5000) i;" + "INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, 6000) i;" ); $node->safe_psql("postgres", "CREATE INDEX idx ON tst USING ivfflat (v vector_l2_ops) WITH (lists = 5);"); $node->safe_psql("postgres", "ANALYZE tst;");