Do not use index without limit or if limit + offset > expected tuples

This commit is contained in:
Andrew Kane
2024-01-20 15:16:30 -08:00
parent 042ddfdc8a
commit 63c4af0454
23 changed files with 177 additions and 45 deletions

View File

@@ -83,8 +83,11 @@ hnswcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
List *qinfos;
#endif
/* Never use index without order */
if (path->indexorderbys == NULL)
/*
* Never use index without order or limit, or if limit + offset >
* ef_search
*/
if (path->indexorderbys == NULL || root->limit_tuples < 0 || root->limit_tuples > hnsw_ef_search)
{
*indexStartupCost = DBL_MAX;
*indexTotalCost = DBL_MAX;

View File

@@ -76,8 +76,8 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
List *qinfos;
#endif
/* Never use index without order */
if (path->indexorderbys == NULL)
/* Never use index without order or limit */
if (path->indexorderbys == NULL || root->limit_tuples < 0)
{
*indexStartupCost = DBL_MAX;
*indexTotalCost = DBL_MAX;
@@ -105,6 +105,20 @@ ivfflatcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
*/
costs.numIndexTuples = path->indexinfo->tuples * ratio;
/*
* Do not use index if limit + offset > expected tuples unless
* enable_seqscan = off
*/
if (root->limit_tuples > costs.numIndexTuples)
{
*indexStartupCost = 1.0e10 - 1;
*indexTotalCost = 1.0e10 - 1;
*indexSelectivity = 0;
*indexCorrelation = 0;
*indexPages = 0;
return;
}
#if PG_VERSION_NUM >= 120000
genericcostestimate(root, path, loop_count, &costs);
#else

View File

@@ -3,7 +3,7 @@ CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val vector_cosine_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <=> '[3,3,3]';
SELECT * FROM t ORDER BY val <=> '[3,3,3]' LIMIT 5;
val
---------
[1,1,1]
@@ -11,13 +11,13 @@ SELECT * FROM t ORDER BY val <=> '[3,3,3]';
[1,2,4]
(3 rows)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]' LIMIT 5) t2;
count
-------
3
(1 row)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector)) t2;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector) LIMIT 5) t2;
count
-------
3

View File

@@ -3,7 +3,7 @@ CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val vector_ip_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <#> '[3,3,3]';
SELECT * FROM t ORDER BY val <#> '[3,3,3]' LIMIT 5;
val
---------
[1,2,4]
@@ -12,7 +12,7 @@ SELECT * FROM t ORDER BY val <#> '[3,3,3]';
[0,0,0]
(4 rows)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector)) t2;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector) LIMIT 5) t2;
count
-------
4

View File

@@ -3,7 +3,7 @@ CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val vector_l2_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SELECT * FROM t ORDER BY val <-> '[3,3,3]' LIMIT 5;
val
---------
[1,2,3]
@@ -12,7 +12,7 @@ SELECT * FROM t ORDER BY val <-> '[3,3,3]';
[0,0,0]
(4 rows)
SELECT * FROM t ORDER BY val <-> (SELECT NULL::vector);
SELECT * FROM t ORDER BY val <-> (SELECT NULL::vector) LIMIT 5;
val
---------
[0,0,0]
@@ -28,7 +28,7 @@ SELECT COUNT(*) FROM t;
(1 row)
TRUNCATE t;
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SELECT * FROM t ORDER BY val <-> '[3,3,3]' LIMIT 5;
val
-----
(0 rows)

View File

@@ -2,7 +2,7 @@ SET enable_seqscan = off;
CREATE UNLOGGED TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val vector_l2_ops);
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SELECT * FROM t ORDER BY val <-> '[3,3,3]' LIMIT 5;
val
---------
[1,2,3]

View File

@@ -3,7 +3,7 @@ CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING ivfflat (val vector_cosine_ops) WITH (lists = 1);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <=> '[3,3,3]';
SELECT * FROM t ORDER BY val <=> '[3,3,3]' LIMIT 5;
val
---------
[1,1,1]
@@ -11,13 +11,13 @@ SELECT * FROM t ORDER BY val <=> '[3,3,3]';
[1,2,4]
(3 rows)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]' LIMIT 5) t2;
count
-------
3
(1 row)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector)) t2;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector) LIMIT 5) t2;
count
-------
3

View File

@@ -3,7 +3,7 @@ CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING ivfflat (val vector_ip_ops) WITH (lists = 1);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <#> '[3,3,3]';
SELECT * FROM t ORDER BY val <#> '[3,3,3]' LIMIT 5;
val
---------
[1,2,4]
@@ -12,7 +12,7 @@ SELECT * FROM t ORDER BY val <#> '[3,3,3]';
[0,0,0]
(4 rows)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector)) t2;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector) LIMIT 5) t2;
count
-------
4

View File

@@ -3,7 +3,7 @@ CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING ivfflat (val vector_l2_ops) WITH (lists = 1);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SELECT * FROM t ORDER BY val <-> '[3,3,3]' LIMIT 5;
val
---------
[1,2,3]
@@ -12,7 +12,7 @@ SELECT * FROM t ORDER BY val <-> '[3,3,3]';
[0,0,0]
(4 rows)
SELECT * FROM t ORDER BY val <-> (SELECT NULL::vector);
SELECT * FROM t ORDER BY val <-> (SELECT NULL::vector) LIMIT 5;
val
---------
[0,0,0]
@@ -31,7 +31,7 @@ TRUNCATE t;
NOTICE: ivfflat index created with little data
DETAIL: This will cause low recall.
HINT: Drop the index until the table has more data.
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SELECT * FROM t ORDER BY val <-> '[3,3,3]' LIMIT 5;
val
-----
(0 rows)

View File

@@ -2,7 +2,7 @@ SET enable_seqscan = off;
CREATE UNLOGGED TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING ivfflat (val vector_l2_ops) WITH (lists = 1);
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SELECT * FROM t ORDER BY val <-> '[3,3,3]' LIMIT 5;
val
---------
[1,2,3]

View File

@@ -6,8 +6,8 @@ CREATE INDEX ON t USING hnsw (val vector_cosine_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <=> '[3,3,3]';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector)) t2;
SELECT * FROM t ORDER BY val <=> '[3,3,3]' LIMIT 5;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]' LIMIT 5) t2;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector) LIMIT 5) t2;
DROP TABLE t;

View File

@@ -6,7 +6,7 @@ CREATE INDEX ON t USING hnsw (val vector_ip_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <#> '[3,3,3]';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector)) t2;
SELECT * FROM t ORDER BY val <#> '[3,3,3]' LIMIT 5;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector) LIMIT 5) t2;
DROP TABLE t;

View File

@@ -6,11 +6,11 @@ CREATE INDEX ON t USING hnsw (val vector_l2_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SELECT * FROM t ORDER BY val <-> (SELECT NULL::vector);
SELECT * FROM t ORDER BY val <-> '[3,3,3]' LIMIT 5;
SELECT * FROM t ORDER BY val <-> (SELECT NULL::vector) LIMIT 5;
SELECT COUNT(*) FROM t;
TRUNCATE t;
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SELECT * FROM t ORDER BY val <-> '[3,3,3]' LIMIT 5;
DROP TABLE t;

View File

@@ -4,6 +4,6 @@ CREATE UNLOGGED TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val vector_l2_ops);
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SELECT * FROM t ORDER BY val <-> '[3,3,3]' LIMIT 5;
DROP TABLE t;

View File

@@ -6,8 +6,8 @@ CREATE INDEX ON t USING ivfflat (val vector_cosine_ops) WITH (lists = 1);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <=> '[3,3,3]';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector)) t2;
SELECT * FROM t ORDER BY val <=> '[3,3,3]' LIMIT 5;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]' LIMIT 5) t2;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector) LIMIT 5) t2;
DROP TABLE t;

View File

@@ -6,7 +6,7 @@ CREATE INDEX ON t USING ivfflat (val vector_ip_ops) WITH (lists = 1);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <#> '[3,3,3]';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector)) t2;
SELECT * FROM t ORDER BY val <#> '[3,3,3]' LIMIT 5;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector) LIMIT 5) t2;
DROP TABLE t;

View File

@@ -6,11 +6,11 @@ CREATE INDEX ON t USING ivfflat (val vector_l2_ops) WITH (lists = 1);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SELECT * FROM t ORDER BY val <-> (SELECT NULL::vector);
SELECT * FROM t ORDER BY val <-> '[3,3,3]' LIMIT 5;
SELECT * FROM t ORDER BY val <-> (SELECT NULL::vector) LIMIT 5;
SELECT COUNT(*) FROM t;
TRUNCATE t;
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SELECT * FROM t ORDER BY val <-> '[3,3,3]' LIMIT 5;
DROP TABLE t;

View File

@@ -4,6 +4,6 @@ CREATE UNLOGGED TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING ivfflat (val vector_l2_ops) WITH (lists = 1);
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SELECT * FROM t ORDER BY val <-> '[3,3,3]' LIMIT 5;
DROP TABLE t;

View File

@@ -49,7 +49,7 @@ is(idx_scan(), 0);
$count = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SET ivfflat.probes = 100;
SELECT COUNT(*) FROM (SELECT v FROM tst ORDER BY v <-> (SELECT v FROM tst LIMIT 1)) t;
SELECT COUNT(*) FROM (SELECT v FROM tst ORDER BY v <-> (SELECT v FROM tst LIMIT 1) LIMIT 20000) t;
));
is($count, $expected);
is(idx_scan(), 1);

View File

@@ -42,7 +42,7 @@ for my $i (1 .. 20)
my $count = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SELECT COUNT(*) FROM (SELECT v FROM tst ORDER BY v <-> (SELECT v FROM tst LIMIT 1)) t;
SELECT COUNT(*) FROM (SELECT v FROM tst ORDER BY v <-> (SELECT v FROM tst LIMIT 1) LIMIT 20) t;
));
is($count, 10);
@@ -63,7 +63,7 @@ $node->pgbench(
my $count = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SET hnsw.ef_search = 1000;
SELECT COUNT(*) FROM (SELECT v FROM tst ORDER BY v <-> (SELECT v FROM tst LIMIT 1)) t;
SELECT COUNT(*) FROM (SELECT v FROM tst ORDER BY v <-> (SELECT v FROM tst LIMIT 1) LIMIT 1000) t;
));
# Elements may lose all incoming connections with the HNSW algorithm
# Vacuuming can fix this if one of the elements neighbors is deleted

View File

@@ -23,10 +23,11 @@ sub insert_vectors
sub test_duplicates
{
# TODO Improve
my $res = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SET hnsw.ef_search = 1;
SELECT COUNT(*) FROM (SELECT * FROM tst ORDER BY v <-> '[1,1,1]') t;
SET hnsw.ef_search = 10;
SELECT COUNT(*) FROM (SELECT * FROM tst ORDER BY v <-> '[1,1,1]' LIMIT 10) t;
));
is($res, 10);
}

View File

@@ -0,0 +1,64 @@
use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More;
# Initialize node
my $node = get_new_node('node');
$node->init;
$node->start;
# Create table and index
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
$node->safe_psql("postgres", "CREATE TABLE tst (v vector(3));");
$node->safe_psql("postgres",
"INSERT INTO tst SELECT ARRAY[random(), random(), random()] FROM generate_series(1, 1000) i;"
);
$node->safe_psql("postgres", "CREATE INDEX ON tst USING ivfflat (v vector_l2_ops) WITH (lists = 10);");
# Test limit
my $explain = $node->safe_psql("postgres", qq(
EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 100;
));
like($explain, qr/Index Scan/);
# Test limit + offset
$explain = $node->safe_psql("postgres", qq(
EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 90 OFFSET 10;
));
like($explain, qr/Index Scan/);
# Test limit with probes
$explain = $node->safe_psql("postgres", qq(
SET ivfflat.probes = 2;
EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 200;
));
like($explain, qr/Index Scan/);
# Test limit > expected tuples
$explain = $node->safe_psql("postgres", qq(
EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 101;
));
like($explain, qr/Seq Scan/);
# Test limit + offset > expected tuples
$explain = $node->safe_psql("postgres", qq(
EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 91 OFFSET 10;
));
like($explain, qr/Seq Scan/);
# Test limit > expected tuples with probes
$explain = $node->safe_psql("postgres", qq(
SET ivfflat.probes = 2;
EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 201;
));
like($explain, qr/Seq Scan/);
# Test no limit
$explain = $node->safe_psql("postgres", qq(
EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]';
));
like($explain, qr/Seq Scan/);
done_testing();

50
test/t/020_hnsw_limit.pl Normal file
View File

@@ -0,0 +1,50 @@
use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More;
# Initialize node
my $node = get_new_node('node');
$node->init;
$node->start;
# Create table and index
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
$node->safe_psql("postgres", "CREATE TABLE tst (v vector(3));");
$node->safe_psql("postgres",
"INSERT INTO tst SELECT ARRAY[random(), random(), random()] FROM generate_series(1, 1000) i;"
);
$node->safe_psql("postgres", "CREATE INDEX ON tst USING hnsw (v vector_l2_ops);");
# Test limit
my $explain = $node->safe_psql("postgres", qq(
EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 40;
));
like($explain, qr/Index Scan/);
# Test limit + offset
$explain = $node->safe_psql("postgres", qq(
EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 30 OFFSET 10;
));
like($explain, qr/Index Scan/);
# Test limit > ef_search
$explain = $node->safe_psql("postgres", qq(
EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 41;
));
like($explain, qr/Seq Scan/);
# Test limit + offset > ef_search
$explain = $node->safe_psql("postgres", qq(
EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]' LIMIT 31 OFFSET 10;
));
like($explain, qr/Seq Scan/);
# Test no limit
$explain = $node->safe_psql("postgres", qq(
EXPLAIN ANALYZE SELECT * FROM tst ORDER BY v <-> '[1,2,3]';
));
like($explain, qr/Seq Scan/);
done_testing();