mirror of
https://github.com/pgvector/pgvector.git
synced 2026-07-03 03:00:56 +08:00
Added HNSW index type - #181
This commit is contained in:
26
test/expected/hnsw_cosine.out
Normal file
26
test/expected/hnsw_cosine.out
Normal file
@@ -0,0 +1,26 @@
|
||||
SET enable_seqscan = off;
|
||||
CREATE TABLE t (val vector(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val vector_cosine_ops);
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
SELECT * FROM t ORDER BY val <=> '[3,3,3]';
|
||||
val
|
||||
---------
|
||||
[1,1,1]
|
||||
[1,2,3]
|
||||
[1,2,4]
|
||||
(3 rows)
|
||||
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2;
|
||||
count
|
||||
-------
|
||||
3
|
||||
(1 row)
|
||||
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector)) t2;
|
||||
count
|
||||
-------
|
||||
3
|
||||
(1 row)
|
||||
|
||||
DROP TABLE t;
|
||||
21
test/expected/hnsw_ip.out
Normal file
21
test/expected/hnsw_ip.out
Normal file
@@ -0,0 +1,21 @@
|
||||
SET enable_seqscan = off;
|
||||
CREATE TABLE t (val vector(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val vector_ip_ops);
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
SELECT * FROM t ORDER BY val <#> '[3,3,3]';
|
||||
val
|
||||
---------
|
||||
[1,2,4]
|
||||
[1,2,3]
|
||||
[1,1,1]
|
||||
[0,0,0]
|
||||
(4 rows)
|
||||
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector)) t2;
|
||||
count
|
||||
-------
|
||||
4
|
||||
(1 row)
|
||||
|
||||
DROP TABLE t;
|
||||
30
test/expected/hnsw_l2.out
Normal file
30
test/expected/hnsw_l2.out
Normal file
@@ -0,0 +1,30 @@
|
||||
SET enable_seqscan = off;
|
||||
CREATE TABLE t (val vector(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val vector_l2_ops);
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
|
||||
val
|
||||
---------
|
||||
[1,2,3]
|
||||
[1,2,4]
|
||||
[1,1,1]
|
||||
[0,0,0]
|
||||
(4 rows)
|
||||
|
||||
SELECT * FROM t ORDER BY val <-> (SELECT NULL::vector);
|
||||
val
|
||||
---------
|
||||
[0,0,0]
|
||||
[1,1,1]
|
||||
[1,2,3]
|
||||
[1,2,4]
|
||||
(4 rows)
|
||||
|
||||
SELECT COUNT(*) FROM t;
|
||||
count
|
||||
-------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
DROP TABLE t;
|
||||
25
test/expected/hnsw_options.out
Normal file
25
test/expected/hnsw_options.out
Normal file
@@ -0,0 +1,25 @@
|
||||
SET enable_seqscan = off;
|
||||
CREATE TABLE t (val vector(3));
|
||||
CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (m = 3);
|
||||
ERROR: value 3 out of bounds for option "m"
|
||||
DETAIL: Valid values are between "4" and "100".
|
||||
CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (m = 101);
|
||||
ERROR: value 101 out of bounds for option "m"
|
||||
DETAIL: Valid values are between "4" and "100".
|
||||
CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (ef_construction = 9);
|
||||
ERROR: value 9 out of bounds for option "ef_construction"
|
||||
DETAIL: Valid values are between "10" and "1000".
|
||||
CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (ef_construction = 1001);
|
||||
ERROR: value 1001 out of bounds for option "ef_construction"
|
||||
DETAIL: Valid values are between "10" and "1000".
|
||||
SHOW hnsw.ef_search;
|
||||
hnsw.ef_search
|
||||
----------------
|
||||
40
|
||||
(1 row)
|
||||
|
||||
SET hnsw.ef_search = 9;
|
||||
ERROR: 9 is outside the valid range for parameter "hnsw.ef_search" (10 .. 1000)
|
||||
SET hnsw.ef_search = 1001;
|
||||
ERROR: 1001 is outside the valid range for parameter "hnsw.ef_search" (10 .. 1000)
|
||||
DROP TABLE t;
|
||||
13
test/expected/hnsw_unlogged.out
Normal file
13
test/expected/hnsw_unlogged.out
Normal file
@@ -0,0 +1,13 @@
|
||||
SET enable_seqscan = off;
|
||||
CREATE UNLOGGED TABLE t (val vector(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val vector_l2_ops);
|
||||
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
|
||||
val
|
||||
---------
|
||||
[1,2,3]
|
||||
[1,1,1]
|
||||
[0,0,0]
|
||||
(3 rows)
|
||||
|
||||
DROP TABLE t;
|
||||
13
test/sql/hnsw_cosine.sql
Normal file
13
test/sql/hnsw_cosine.sql
Normal file
@@ -0,0 +1,13 @@
|
||||
SET enable_seqscan = off;
|
||||
|
||||
CREATE TABLE t (val vector(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val vector_cosine_ops);
|
||||
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
|
||||
SELECT * FROM t ORDER BY val <=> '[3,3,3]';
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2;
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::vector)) t2;
|
||||
|
||||
DROP TABLE t;
|
||||
12
test/sql/hnsw_ip.sql
Normal file
12
test/sql/hnsw_ip.sql
Normal file
@@ -0,0 +1,12 @@
|
||||
SET enable_seqscan = off;
|
||||
|
||||
CREATE TABLE t (val vector(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val vector_ip_ops);
|
||||
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
|
||||
SELECT * FROM t ORDER BY val <#> '[3,3,3]';
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::vector)) t2;
|
||||
|
||||
DROP TABLE t;
|
||||
13
test/sql/hnsw_l2.sql
Normal file
13
test/sql/hnsw_l2.sql
Normal file
@@ -0,0 +1,13 @@
|
||||
SET enable_seqscan = off;
|
||||
|
||||
CREATE TABLE t (val vector(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val vector_l2_ops);
|
||||
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
|
||||
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
|
||||
SELECT * FROM t ORDER BY val <-> (SELECT NULL::vector);
|
||||
SELECT COUNT(*) FROM t;
|
||||
|
||||
DROP TABLE t;
|
||||
14
test/sql/hnsw_options.sql
Normal file
14
test/sql/hnsw_options.sql
Normal file
@@ -0,0 +1,14 @@
|
||||
SET enable_seqscan = off;
|
||||
|
||||
CREATE TABLE t (val vector(3));
|
||||
CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (m = 3);
|
||||
CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (m = 101);
|
||||
CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (ef_construction = 9);
|
||||
CREATE INDEX ON t USING hnsw (val vector_l2_ops) WITH (ef_construction = 1001);
|
||||
|
||||
SHOW hnsw.ef_search;
|
||||
|
||||
SET hnsw.ef_search = 9;
|
||||
SET hnsw.ef_search = 1001;
|
||||
|
||||
DROP TABLE t;
|
||||
9
test/sql/hnsw_unlogged.sql
Normal file
9
test/sql/hnsw_unlogged.sql
Normal file
@@ -0,0 +1,9 @@
|
||||
SET enable_seqscan = off;
|
||||
|
||||
CREATE UNLOGGED TABLE t (val vector(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val vector_l2_ops);
|
||||
|
||||
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
|
||||
|
||||
DROP TABLE t;
|
||||
99
test/t/010_hnsw_wal.pl
Normal file
99
test/t/010_hnsw_wal.pl
Normal file
@@ -0,0 +1,99 @@
|
||||
# Based on postgres/contrib/bloom/t/001_wal.pl
|
||||
|
||||
# Test generic xlog record work for hnsw index replication.
|
||||
use strict;
|
||||
use warnings;
|
||||
use PostgresNode;
|
||||
use TestLib;
|
||||
use Test::More;
|
||||
|
||||
my $dim = 32;
|
||||
|
||||
my $node_primary;
|
||||
my $node_replica;
|
||||
|
||||
# Run few queries on both primary and replica and check their results match.
|
||||
sub test_index_replay
|
||||
{
|
||||
my ($test_name) = @_;
|
||||
|
||||
# Wait for replica to catch up
|
||||
my $applname = $node_replica->name;
|
||||
|
||||
my $server_version_num = $node_primary->safe_psql("postgres", "SHOW server_version_num");
|
||||
my $caughtup_query = "SELECT pg_current_wal_lsn() <= replay_lsn FROM pg_stat_replication WHERE application_name = '$applname';";
|
||||
$node_primary->poll_query_until('postgres', $caughtup_query)
|
||||
or die "Timed out while waiting for replica 1 to catch up";
|
||||
|
||||
my @r = ();
|
||||
for (1 .. $dim) {
|
||||
push(@r, rand());
|
||||
}
|
||||
my $sql = join(",", @r);
|
||||
|
||||
my $queries = qq(
|
||||
SET enable_seqscan = off;
|
||||
SELECT * FROM tst ORDER BY v <-> '[$sql]' LIMIT 10;
|
||||
);
|
||||
|
||||
# Run test queries and compare their result
|
||||
my $primary_result = $node_primary->safe_psql("postgres", $queries);
|
||||
my $replica_result = $node_replica->safe_psql("postgres", $queries);
|
||||
|
||||
is($primary_result, $replica_result, "$test_name: query result matches");
|
||||
return;
|
||||
}
|
||||
|
||||
# Use ARRAY[random(), random(), random(), ...] over
|
||||
# SELECT array_agg(random()) FROM generate_series(1, $dim)
|
||||
# to generate different values for each row
|
||||
my $array_sql = join(",", ('random()') x $dim);
|
||||
|
||||
# Initialize primary node
|
||||
$node_primary = get_new_node('primary');
|
||||
$node_primary->init(allows_streaming => 1);
|
||||
if ($dim > 32) {
|
||||
# TODO use wal_keep_segments for Postgres < 13
|
||||
$node_primary->append_conf('postgresql.conf', qq(wal_keep_size = 1GB));
|
||||
}
|
||||
if ($dim > 1500) {
|
||||
$node_primary->append_conf('postgresql.conf', qq(maintenance_work_mem = 128MB));
|
||||
}
|
||||
$node_primary->start;
|
||||
my $backup_name = 'my_backup';
|
||||
|
||||
# Take backup
|
||||
$node_primary->backup($backup_name);
|
||||
|
||||
# Create streaming replica linking to primary
|
||||
$node_replica = get_new_node('replica');
|
||||
$node_replica->init_from_backup($node_primary, $backup_name,
|
||||
has_streaming => 1);
|
||||
$node_replica->start;
|
||||
|
||||
# Create hnsw index on primary
|
||||
$node_primary->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node_primary->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector($dim));");
|
||||
$node_primary->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i % 10, ARRAY[$array_sql] FROM generate_series(1, 1000) i;"
|
||||
);
|
||||
$node_primary->safe_psql("postgres", "CREATE INDEX ON tst USING hnsw (v vector_l2_ops);");
|
||||
|
||||
# Test that queries give same result
|
||||
test_index_replay('initial');
|
||||
|
||||
# Run 10 cycles of table modification. Run test queries after each modification.
|
||||
for my $i (1 .. 10)
|
||||
{
|
||||
$node_primary->safe_psql("postgres", "DELETE FROM tst WHERE i = $i;");
|
||||
test_index_replay("delete $i");
|
||||
$node_primary->safe_psql("postgres", "VACUUM tst;");
|
||||
test_index_replay("vacuum $i");
|
||||
my ($start, $end) = (1001 + ($i - 1) * 100, 1000 + $i * 100);
|
||||
$node_primary->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i % 10, ARRAY[$array_sql] FROM generate_series($start, $end) i;"
|
||||
);
|
||||
test_index_replay("insert $i");
|
||||
}
|
||||
|
||||
done_testing();
|
||||
43
test/t/011_hnsw_vacuum.pl
Normal file
43
test/t/011_hnsw_vacuum.pl
Normal file
@@ -0,0 +1,43 @@
|
||||
use strict;
|
||||
use warnings;
|
||||
use PostgresNode;
|
||||
use TestLib;
|
||||
use Test::More;
|
||||
|
||||
my $dim = 3;
|
||||
|
||||
my @r = ();
|
||||
for (1 .. $dim) {
|
||||
my $v = int(rand(1000)) + 1;
|
||||
push(@r, "i % $v");
|
||||
}
|
||||
my $array_sql = join(", ", @r);
|
||||
|
||||
# Initialize node
|
||||
my $node = get_new_node('node');
|
||||
$node->init;
|
||||
$node->start;
|
||||
|
||||
# Create table and index
|
||||
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector($dim));");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i % 10, ARRAY[$array_sql] FROM generate_series(1, 10000) i;"
|
||||
);
|
||||
$node->safe_psql("postgres", "CREATE INDEX ON tst USING hnsw (v vector_l2_ops);");
|
||||
|
||||
# Get size
|
||||
my $size = $node->safe_psql("postgres", "SELECT pg_total_relation_size('tst_v_idx');");
|
||||
|
||||
# Delete all, vacuum, and insert same data
|
||||
$node->safe_psql("postgres", "DELETE FROM tst;");
|
||||
$node->safe_psql("postgres", "VACUUM tst;");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i % 10, ARRAY[$array_sql] FROM generate_series(1, 10000) i;"
|
||||
);
|
||||
|
||||
# Check size
|
||||
my $new_size = $node->safe_psql("postgres", "SELECT pg_total_relation_size('tst_v_idx');");
|
||||
cmp_ok($new_size, "<=", $size * 1.01, "size does not increase too much");
|
||||
|
||||
done_testing();
|
||||
96
test/t/012_hnsw_build_recall.pl
Normal file
96
test/t/012_hnsw_build_recall.pl
Normal file
@@ -0,0 +1,96 @@
|
||||
use strict;
|
||||
use warnings;
|
||||
use PostgresNode;
|
||||
use TestLib;
|
||||
use Test::More;
|
||||
|
||||
my $node;
|
||||
my @queries = ();
|
||||
my @expected;
|
||||
my $limit = 20;
|
||||
|
||||
sub test_recall
|
||||
{
|
||||
my ($min, $operator) = @_;
|
||||
my $correct = 0;
|
||||
my $total = 0;
|
||||
|
||||
my $explain = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
EXPLAIN ANALYZE SELECT i FROM tst ORDER BY v $operator '$queries[0]' LIMIT $limit;
|
||||
));
|
||||
like($explain, qr/Index Scan/);
|
||||
|
||||
for my $i (0 .. $#queries) {
|
||||
my $actual = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
SELECT i FROM tst ORDER BY v $operator '$queries[$i]' LIMIT $limit;
|
||||
));
|
||||
my @actual_ids = split("\n", $actual);
|
||||
my %actual_set = map { $_ => 1 } @actual_ids;
|
||||
|
||||
my @expected_ids = split("\n", $expected[$i]);
|
||||
|
||||
foreach (@expected_ids) {
|
||||
if (exists($actual_set{$_})) {
|
||||
$correct++;
|
||||
}
|
||||
$total++;
|
||||
}
|
||||
}
|
||||
|
||||
cmp_ok($correct / $total, ">=", $min, $operator);
|
||||
}
|
||||
|
||||
# Initialize node
|
||||
$node = get_new_node('node');
|
||||
$node->init;
|
||||
$node->start;
|
||||
|
||||
# Create table
|
||||
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector(3));");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i, ARRAY[random(), random(), random()] FROM generate_series(1, 10000) i;"
|
||||
);
|
||||
|
||||
# Generate queries
|
||||
for (1..20) {
|
||||
my $r1 = rand();
|
||||
my $r2 = rand();
|
||||
my $r3 = rand();
|
||||
push(@queries, "[$r1,$r2,$r3]");
|
||||
}
|
||||
|
||||
# Check each index type
|
||||
my @operators = ("<->", "<#>", "<=>");
|
||||
|
||||
foreach (@operators) {
|
||||
my $operator = $_;
|
||||
|
||||
# Get exact results
|
||||
@expected = ();
|
||||
foreach (@queries) {
|
||||
my $res = $node->safe_psql("postgres", "SELECT i FROM tst ORDER BY v $operator '$_' LIMIT $limit;");
|
||||
push(@expected, $res);
|
||||
}
|
||||
|
||||
# Add index
|
||||
my $opclass;
|
||||
if ($operator eq "<->") {
|
||||
$opclass = "vector_l2_ops";
|
||||
} elsif ($operator eq "<#>") {
|
||||
$opclass = "vector_ip_ops";
|
||||
} else {
|
||||
$opclass = "vector_cosine_ops";
|
||||
}
|
||||
$node->safe_psql("postgres", "CREATE INDEX ON tst USING hnsw (v $opclass);");
|
||||
|
||||
if ($operator eq "<#>") {
|
||||
test_recall(0.80, $operator);
|
||||
} else {
|
||||
test_recall(0.99, $operator);
|
||||
}
|
||||
}
|
||||
|
||||
done_testing();
|
||||
103
test/t/013_hnsw_insert_recall.pl
Normal file
103
test/t/013_hnsw_insert_recall.pl
Normal file
@@ -0,0 +1,103 @@
|
||||
use strict;
|
||||
use warnings;
|
||||
use PostgresNode;
|
||||
use TestLib;
|
||||
use Test::More;
|
||||
|
||||
my $node;
|
||||
my @queries = ();
|
||||
my @expected;
|
||||
my $limit = 20;
|
||||
|
||||
sub test_recall
|
||||
{
|
||||
my ($min, $operator) = @_;
|
||||
my $correct = 0;
|
||||
my $total = 0;
|
||||
|
||||
my $explain = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
EXPLAIN ANALYZE SELECT i FROM tst ORDER BY v $operator '$queries[0]' LIMIT $limit;
|
||||
));
|
||||
like($explain, qr/Index Scan/);
|
||||
|
||||
for my $i (0 .. $#queries) {
|
||||
my $actual = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
SELECT i FROM tst ORDER BY v $operator '$queries[$i]' LIMIT $limit;
|
||||
));
|
||||
my @actual_ids = split("\n", $actual);
|
||||
my %actual_set = map { $_ => 1 } @actual_ids;
|
||||
|
||||
my @expected_ids = split("\n", $expected[$i]);
|
||||
|
||||
foreach (@expected_ids) {
|
||||
if (exists($actual_set{$_})) {
|
||||
$correct++;
|
||||
}
|
||||
$total++;
|
||||
}
|
||||
}
|
||||
|
||||
cmp_ok($correct / $total, ">=", $min, $operator);
|
||||
}
|
||||
|
||||
# Initialize node
|
||||
$node = get_new_node('node');
|
||||
$node->init;
|
||||
$node->start;
|
||||
|
||||
# Create table
|
||||
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v vector(3));");
|
||||
|
||||
# Generate queries
|
||||
for (1..20) {
|
||||
my $r1 = rand();
|
||||
my $r2 = rand();
|
||||
my $r3 = rand();
|
||||
push(@queries, "[$r1,$r2,$r3]");
|
||||
}
|
||||
|
||||
# Check each index type
|
||||
my @operators = ("<->", "<#>", "<=>");
|
||||
|
||||
foreach (@operators) {
|
||||
my $operator = $_;
|
||||
|
||||
# Add index
|
||||
my $opclass;
|
||||
if ($operator eq "<->") {
|
||||
$opclass = "vector_l2_ops";
|
||||
} elsif ($operator eq "<#>") {
|
||||
$opclass = "vector_ip_ops";
|
||||
} else {
|
||||
$opclass = "vector_cosine_ops";
|
||||
}
|
||||
$node->safe_psql("postgres", "CREATE INDEX idx ON tst USING hnsw (v $opclass);");
|
||||
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i, ARRAY[random(), random(), random()] FROM generate_series(1, 10000) i;"
|
||||
);
|
||||
|
||||
# Get exact results
|
||||
@expected = ();
|
||||
foreach (@queries) {
|
||||
my $res = $node->safe_psql("postgres", qq(
|
||||
SET enable_indexscan = off;
|
||||
SELECT i FROM tst ORDER BY v $operator '$_' LIMIT $limit;
|
||||
));
|
||||
push(@expected, $res);
|
||||
}
|
||||
|
||||
if ($operator eq "<#>") {
|
||||
test_recall(0.80, $operator);
|
||||
} else {
|
||||
test_recall(0.99, $operator);
|
||||
}
|
||||
|
||||
$node->safe_psql("postgres", "DROP INDEX idx;");
|
||||
$node->safe_psql("postgres", "TRUNCATE tst;");
|
||||
}
|
||||
|
||||
done_testing();
|
||||
58
test/t/014_hnsw_inserts.pl
Normal file
58
test/t/014_hnsw_inserts.pl
Normal file
@@ -0,0 +1,58 @@
|
||||
use strict;
|
||||
use warnings;
|
||||
use PostgresNode;
|
||||
use TestLib;
|
||||
use Test::More;
|
||||
|
||||
# Ensures elements and neighbors on both same and different pages
|
||||
my $dim = 1900;
|
||||
|
||||
my $array_sql = join(",", ('random()') x $dim);
|
||||
|
||||
# Initialize node
|
||||
my $node = get_new_node('node');
|
||||
$node->init;
|
||||
$node->start;
|
||||
|
||||
# Create table and index
|
||||
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (v vector($dim));");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT ARRAY[$array_sql] FROM generate_series(1, 100) i;"
|
||||
);
|
||||
$node->safe_psql("postgres", "CREATE INDEX ON tst USING hnsw (v vector_l2_ops);");
|
||||
|
||||
$node->pgbench(
|
||||
"--no-vacuum --client=5 --transactions=100",
|
||||
0,
|
||||
[qr{actually processed}],
|
||||
[qr{^$}],
|
||||
"concurrent INSERTs",
|
||||
{
|
||||
"007_inserts" => "INSERT INTO tst SELECT ARRAY[$array_sql] FROM generate_series(1, 10) i;"
|
||||
}
|
||||
);
|
||||
|
||||
sub idx_scan
|
||||
{
|
||||
# Stats do not update instantaneously
|
||||
# https://www.postgresql.org/docs/current/monitoring-stats.html#MONITORING-STATS-VIEWS
|
||||
sleep(1);
|
||||
$node->safe_psql("postgres", "SELECT idx_scan FROM pg_stat_user_indexes WHERE indexrelid = 'tst_v_idx'::regclass;");
|
||||
}
|
||||
|
||||
my $expected = 100 + 5 * 100 * 10;
|
||||
|
||||
my $count = $node->safe_psql("postgres", "SELECT COUNT(*) FROM tst;");
|
||||
is($count, $expected);
|
||||
is(idx_scan(), 0);
|
||||
|
||||
$count = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
SET hnsw.ef_search = 400;
|
||||
SELECT COUNT(*) FROM (SELECT v FROM tst ORDER BY v <-> (SELECT v FROM tst LIMIT 1)) t;
|
||||
));
|
||||
is($count, 400);
|
||||
is(idx_scan(), 1);
|
||||
|
||||
done_testing();
|
||||
Reference in New Issue
Block a user