From 8b06719ae992d21d171e46408e8dbd0c6259a206 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Sat, 20 Jan 2024 17:16:42 -0800 Subject: [PATCH] Do not use index if no limit or limit + offset > expected tuples unless enable_seqscan = off --- src/hnsw.c | 14 ++++++++ src/ivfflat.c | 14 ++++++++ test/t/019_ivfflat_limit.pl | 64 +++++++++++++++++++++++++++++++++++++ test/t/020_hnsw_limit.pl | 62 +++++++++++++++++++++++++++++++++++ 4 files changed, 154 insertions(+) create mode 100644 test/t/019_ivfflat_limit.pl create mode 100644 test/t/020_hnsw_limit.pl diff --git a/src/hnsw.c b/src/hnsw.c index 1719820..77cf5c4 100644 --- a/src/hnsw.c +++ b/src/hnsw.c @@ -94,6 +94,20 @@ hnswcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, return; } + /* + * Do not use index if no limit or limit + offset > ef_search unless + * enable_seqscan = off + */ + if (root->limit_tuples < 0 || root->limit_tuples > hnsw_ef_search) + { + *indexStartupCost = 1.0e10 - 1; + *indexTotalCost = 1.0e10 - 1; + *indexSelectivity = 0; + *indexCorrelation = 0; + *indexPages = 0; + return; + } + MemSet(&costs, 0, sizeof(costs)); index = index_open(path->indexinfo->indexoid, NoLock); diff --git a/src/ivfflat.c b/src/ivfflat.c index d6383f4..57f1511 100644 --- a/src/ivfflat.c +++ b/src/ivfflat.c @@ -105,6 +105,20 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count, */ costs.numIndexTuples = path->indexinfo->tuples * ratio; + /* + * Do not use index if no limit or limit + offset > expected tuples unless + * enable_seqscan = off + */ + if (root->limit_tuples < 0 || root->limit_tuples > costs.numIndexTuples) + { + *indexStartupCost = 1.0e10 - 1; + *indexTotalCost = 1.0e10 - 1; + *indexSelectivity = 0; + *indexCorrelation = 0; + *indexPages = 0; + return; + } + #if PG_VERSION_NUM >= 120000 genericcostestimate(root, path, loop_count, &costs); #else diff --git a/test/t/019_ivfflat_limit.pl b/test/t/019_ivfflat_limit.pl new file mode 100644 index 0000000..b109209 --- /dev/null +++ b/test/t/019_ivfflat_limit.pl @@ -0,0 +1,64 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +# Initialize node +my $node = get_new_node('node'); +$node->init; +$node->start; + +# Create table and index +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (v vector(3));"); +$node->safe_psql("postgres", + "INSERT INTO tst SELECT ARRAY[random(), random(), random()] FROM generate_series(1, 1000) i;" +); +$node->safe_psql("postgres", "CREATE INDEX ON tst USING ivfflat (v vector_l2_ops) WITH (lists = 10);"); + +# Test limit +my $explain = $node->safe_psql("postgres", qq( + EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 100; +)); +like($explain, qr/Index Scan/); + +# Test limit with probes +$explain = $node->safe_psql("postgres", qq( + SET ivfflat.probes = 2; + EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 200; +)); +like($explain, qr/Index Scan/); + +# Test limit + offset +$explain = $node->safe_psql("postgres", qq( + EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 90 OFFSET 10; +)); +like($explain, qr/Index Scan/); + +# Test limit > expected tuples +$explain = $node->safe_psql("postgres", qq( + EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 101; +)); +like($explain, qr/Seq Scan/); + +# Test limit > expected tuples with probes +$explain = $node->safe_psql("postgres", qq( + SET ivfflat.probes = 2; + EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 201; +)); +like($explain, qr/Seq Scan/); + +# Test limit + offset > expected tuples +$explain = $node->safe_psql("postgres", qq( + EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 91 OFFSET 10; +)); +like($explain, qr/Seq Scan/); + +# Test no limit +$explain = $node->safe_psql("postgres", qq( + EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]'; +)); +like($explain, qr/Seq Scan/); + +done_testing(); diff --git a/test/t/020_hnsw_limit.pl b/test/t/020_hnsw_limit.pl new file mode 100644 index 0000000..68ec877 --- /dev/null +++ b/test/t/020_hnsw_limit.pl @@ -0,0 +1,62 @@ +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; + +# Initialize node +my $node = get_new_node('node'); +$node->init; +$node->start; + +# Create table and index +$node->safe_psql("postgres", "CREATE EXTENSION vector;"); +$node->safe_psql("postgres", "CREATE TABLE tst (v vector(3));"); +$node->safe_psql("postgres", + "INSERT INTO tst SELECT ARRAY[random(), random(), random()] FROM generate_series(1, 1000) i;" +); +$node->safe_psql("postgres", "CREATE INDEX ON tst USING hnsw (v vector_l2_ops);"); + +# Test limit +my $explain = $node->safe_psql("postgres", qq( + EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 40; +)); +like($explain, qr/Index Scan/); + +# Test limit with CTE +$explain = $node->safe_psql("postgres", qq( + EXPLAIN ANALYZE WITH cte AS (SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 40) SELECT * FROM cte; +)); +like($explain, qr/Index Scan/); + +# Test limit + offset +$explain = $node->safe_psql("postgres", qq( + EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 30 OFFSET 10; +)); +like($explain, qr/Index Scan/); + +# Test limit > ef_search +$explain = $node->safe_psql("postgres", qq( + EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 41; +)); +like($explain, qr/Seq Scan/); + +# Test limit > ef_search with CTE +$explain = $node->safe_psql("postgres", qq( + EXPLAIN ANALYZE WITH cte AS (SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 41) SELECT * FROM cte; +)); +like($explain, qr/Seq Scan/); + +# Test limit + offset > ef_search +$explain = $node->safe_psql("postgres", qq( + EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 31 OFFSET 10; +)); +like($explain, qr/Seq Scan/); + +# Test no limit +$explain = $node->safe_psql("postgres", qq( + EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]'; +)); +like($explain, qr/Seq Scan/); + +done_testing();