Added more TAP tests [skip ci]

This commit is contained in:
Andrew Kane
2024-10-13 19:14:09 -07:00
parent 1066b4be60
commit f52abfc26c
3 changed files with 272 additions and 0 deletions

View File

@@ -0,0 +1,113 @@
use strict;
use warnings FATAL => 'all';
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
my $node;
my @queries = ();
my @expected;
my $limit = 20;
my $dim = 10;
my $array_sql = join(",", ('(random() * 255)::int - 128') x $dim);
sub test_recall
{
my ($min, $operator) = @_;
my $correct = 0;
my $total = 0;
my $explain = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
EXPLAIN ANALYZE SELECT i FROM tst ORDER BY v $operator '$queries[0]' LIMIT $limit;
));
like($explain, qr/Index Scan/);
for my $i (0 .. $#queries)
{
my $actual = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SELECT i FROM tst ORDER BY v $operator '$queries[$i]' LIMIT $limit;
));
my @actual_ids = split("\n", $actual);
my %actual_set = map { $_ => 1 } @actual_ids;
my @expected_ids = split("\n", $expected[$i]);
foreach (@expected_ids)
{
if (exists($actual_set{$_}))
{
$correct++;
}
$total++;
}
}
cmp_ok($correct / $total, ">=", $min, $operator);
}
# Initialize node
$node = PostgreSQL::Test::Cluster->new('node');
$node->init;
$node->start;
# Create table
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
$node->safe_psql("postgres", "CREATE TABLE tst (i serial, v intvec($dim));");
# Generate queries
for (1 .. 20)
{
my @r = ();
for (1 .. $dim)
{
push(@r, int(rand(256)) - 128);
}
push(@queries, "[" . join(",", @r) . "]");
}
# Check each index type
my @operators = ("<->", "<#>", "<=>", "<+>");
my @opclasses = ("intvec_l2_ops", "intvec_ip_ops", "intvec_cosine_ops", "intvec_l1_ops");
for my $i (0 .. $#operators)
{
my $operator = $operators[$i];
my $opclass = $opclasses[$i];
# Add index
$node->safe_psql("postgres", "CREATE INDEX idx ON tst USING hnsw (v $opclass);");
# Use concurrent inserts
$node->pgbench(
"--no-vacuum --client=10 --transactions=1000",
0,
[qr{actually processed}],
[qr{^$}],
"concurrent INSERTs",
{
"040_hnsw_intvec_insert_recall_$opclass" => "INSERT INTO tst (v) VALUES (ARRAY[$array_sql]);"
}
);
# Get exact results
@expected = ();
foreach (@queries)
{
my $res = $node->safe_psql("postgres", qq(
SET enable_indexscan = off;
SELECT i FROM tst ORDER BY v $operator '$_' LIMIT $limit;
));
push(@expected, $res);
}
# Test approximate results
my $min = 0.98;
test_recall($min, $operator);
$node->safe_psql("postgres", "DROP INDEX idx;");
$node->safe_psql("postgres", "TRUNCATE tst;");
}
done_testing();

View File

@@ -0,0 +1,101 @@
use strict;
use warnings FATAL => 'all';
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
my $node;
my @queries = ();
my @expected;
my $limit = 20;
my $dim = 10;
my $array_sql = join(",", ('(random() * 255)::int - 128') x $dim);
sub test_recall
{
my ($min, $ef_search, $test_name) = @_;
my $correct = 0;
my $total = 0;
my $explain = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SET hnsw.ef_search = $ef_search;
EXPLAIN ANALYZE SELECT i FROM tst ORDER BY v <-> '$queries[0]' LIMIT $limit;
));
like($explain, qr/Index Scan/);
for my $i (0 .. $#queries)
{
my $actual = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SET hnsw.ef_search = $ef_search;
SELECT i FROM tst ORDER BY v <-> '$queries[$i]' LIMIT $limit;
));
my @actual_ids = split("\n", $actual);
my %actual_set = map { $_ => 1 } @actual_ids;
my @expected_ids = split("\n", $expected[$i]);
foreach (@expected_ids)
{
if (exists($actual_set{$_}))
{
$correct++;
}
$total++;
}
}
cmp_ok($correct / $total, ">=", $min, $test_name);
}
# Initialize node
$node = PostgreSQL::Test::Cluster->new('node');
$node->init;
$node->start;
# Create table
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v intvec($dim));");
$node->safe_psql("postgres", "ALTER TABLE tst SET (autovacuum_enabled = false);");
$node->safe_psql("postgres",
"INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, 10000) i;"
);
# Add index
$node->safe_psql("postgres", "CREATE INDEX ON tst USING hnsw (v intvec_l2_ops) WITH (m = 4, ef_construction = 8);");
# Delete data
$node->safe_psql("postgres", "DELETE FROM tst WHERE i > 2500;");
# Generate queries
for (1 .. 20)
{
my @r = ();
for (1 .. $dim)
{
push(@r, int(rand(256)) - 128);
}
push(@queries, "[" . join(",", @r) . "]");
}
# Get exact results
@expected = ();
foreach (@queries)
{
my $res = $node->safe_psql("postgres", qq(
SET enable_indexscan = off;
SELECT i FROM tst ORDER BY v <-> '$_' LIMIT $limit;
));
push(@expected, $res);
}
test_recall(0.18, $limit, "before vacuum");
test_recall(0.85, 100, "before vacuum");
# TODO Test concurrent inserts with vacuum
$node->safe_psql("postgres", "VACUUM tst;");
test_recall(0.85, $limit, "after vacuum");
done_testing();

View File

@@ -0,0 +1,58 @@
use strict;
use warnings FATAL => 'all';
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
# Initialize node
my $node = PostgreSQL::Test::Cluster->new('node');
$node->init;
$node->start;
# Create table
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
$node->safe_psql("postgres", "CREATE TABLE tst (v intvec(3));");
sub insert_vectors
{
for my $i (1 .. 20)
{
$node->safe_psql("postgres", "INSERT INTO tst VALUES ('[1,1,1]');");
}
}
sub test_duplicates
{
my $res = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SET hnsw.ef_search = 1;
SELECT COUNT(*) FROM (SELECT * FROM tst ORDER BY v <-> '[1,1,1]') t;
));
is($res, 10);
}
# Test duplicates with build
insert_vectors();
$node->safe_psql("postgres", "CREATE INDEX idx ON tst USING hnsw (v intvec_l2_ops);");
test_duplicates();
# Reset
$node->safe_psql("postgres", "TRUNCATE tst;");
# Test duplicates with inserts
insert_vectors();
test_duplicates();
# Test fallback path for inserts
$node->pgbench(
"--no-vacuum --client=5 --transactions=100",
0,
[qr{actually processed}],
[qr{^$}],
"concurrent INSERTs",
{
"042_hnsw_intvec_duplicates" => "INSERT INTO tst VALUES ('[1,1,1]');"
}
);
done_testing();