From 61b2ddeb8679c9f489fa383c1d56a351d470e30d Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sun, 22 Sep 2024 20:23:56 -0700 Subject: [PATCH] Adjust index scan cost based on selectivity [skip ci] --- src/ivfflat.c | 38 ++++++++++++++++++++++++++++++++- test/t/009_ivfflat_filtering.pl | 8 +++---- 2 files changed, 40 insertions(+), 6 deletions(-) diff --git a/src/ivfflat.c b/src/ivfflat.c index 4e9b9a4..1a8d036 100644 --- a/src/ivfflat.c +++ b/src/ivfflat.c @@ -57,6 +57,39 @@ ivfflatbuildphasename(int64 phasenum) } } +/* + * Estimate the number of probes needed to get requested tuples + */ +static int +EstimateProbes(PlannerInfo *root, IndexPath *path, int lists) +{ + double selectivity = 1; + ListCell *lc; + double tuplesPerList; + + /* Cannot estimate without limit */ + /* limit_tuples includes offset */ + if (root->limit_tuples < 0) + return 0; + + /* Get the selectivity of non-index conditions */ + foreach(lc, path->indexinfo->indrestrictinfo) + { + RestrictInfo *rinfo = lfirst(lc); + + /* Skip DEFAULT_INEQ_SEL since it may be a distance filter */ + if (rinfo->norm_selec >= 0 && rinfo->norm_selec <= 1 && rinfo->norm_selec != (Selectivity) DEFAULT_INEQ_SEL) + selectivity *= rinfo->norm_selec; + } + + tuplesPerList = path->indexinfo->tuples * selectivity / (double) lists; + if (tuplesPerList == 0) + return lists; + + /* No need to cap at this point */ + return root->limit_tuples / tuplesPerList; +} + /* * Estimate the cost of an index scan */ @@ -68,6 +101,7 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, { GenericCosts costs; int lists; + int probes; double ratio; double spc_seq_page_cost; Relation index; @@ -89,8 +123,10 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, IvfflatGetMetaPageInfo(index, &lists, NULL); index_close(index, NoLock); + probes = Max(ivfflat_probes, EstimateProbes(root, path, lists)); + /* Get the ratio of lists that we need to visit */ - ratio = ((double) ivfflat_probes) / lists; + ratio = ((double) probes) / lists; if (ratio > 1.0) ratio = 1.0; diff --git a/test/t/009_ivfflat_filtering.pl b/test/t/009_ivfflat_filtering.pl index efe0866..641b6ac 100644 --- a/test/t/009_ivfflat_filtering.pl +++ b/test/t/009_ivfflat_filtering.pl @@ -5,7 +5,7 @@ use PostgreSQL::Test::Utils; use Test::More; my $dim = 3; -my $nc = 50; +my $nc = 100; my $limit = 20; my $array_sql = join(",", ('random()') x $dim); @@ -102,15 +102,13 @@ $node->safe_psql("postgres", "CREATE INDEX attribute_idx ON tst (c);"); $explain = $node->safe_psql("postgres", qq( EXPLAIN ANALYZE SELECT i FROM tst WHERE c = $c ORDER BY v <-> '$query' LIMIT $limit; )); -# TODO Use attribute index -like($explain, qr/Index Scan using idx/); +like($explain, qr/Index Scan on attribute_idx/); # Test partial index $node->safe_psql("postgres", "CREATE INDEX partial_idx ON tst USING ivfflat (v vector_l2_ops) WITH (lists = 5) WHERE (c = $c);"); $explain = $node->safe_psql("postgres", qq( EXPLAIN ANALYZE SELECT i FROM tst WHERE c = $c ORDER BY v <-> '$query' LIMIT $limit; )); -# TODO Use partial index -like($explain, qr/Index Scan using idx/); +like($explain, qr/Index Scan using partial_idx/); done_testing();