Improved recall test

This commit is contained in:
Andrew Kane
2022-02-11 00:43:01 -08:00
parent b64c7d6228
commit 02ae98791c

View File

@@ -5,29 +5,35 @@ use TestLib;
use Test::More tests => 2;
my $node;
my @expected_ids;
my @queries = ();
my @expected = ();
# TODO Run more queries to prevent flakiness
sub test_recall
{
my ($probes, $min) = @_;
my $correct = 0;
my $total = 0;
my $actual = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SET ivfflat.probes = $probes;
SELECT i FROM tst ORDER BY v <-> '[0.5,0.5,0.5]' LIMIT 10;
));
my @actual_ids = split("\n", $actual);
my %actual_set = map { $_ => 1 } @actual_ids;
for my $i (0 .. $#queries) {
my $actual = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SET ivfflat.probes = $probes;
SELECT i FROM tst ORDER BY v <-> '$queries[$i]' LIMIT 10;
));
my @actual_ids = split("\n", $actual);
my %actual_set = map { $_ => 1 } @actual_ids;
my $count = 0;
for my $el (@expected_ids) {
if (exists($actual_set{$el})) {
$count++;
my @expected_ids = split("\n", $expected[$i]);
foreach (@expected_ids) {
if (exists($actual_set{$_})) {
$correct++;
}
$total++;
}
}
cmp_ok($count / scalar(@expected_ids), ">=", $min);
cmp_ok($correct / $total, ">=", $min);
}
# Initialize node
@@ -42,15 +48,25 @@ $node->safe_psql("postgres",
"INSERT INTO tst SELECT i, ARRAY[random(), random(), random()] FROM generate_series(1, 100000) i;"
);
# Generate queries
for (1..10) {
my $r1 = rand();
my $r2 = rand();
my $r3 = rand();
push(@queries, "[$r1,$r2,$r3]");
}
# Get exact results
my $expected = $node->safe_psql("postgres", "SELECT i FROM tst ORDER BY v <-> '[0.5,0.5,0.5]' LIMIT 10;");
@expected_ids = split("\n", $expected);
foreach (@queries) {
my $res = $node->safe_psql("postgres", "SELECT i FROM tst ORDER BY v <-> '$_' LIMIT 10;");
push(@expected, $res);
}
# Add index
$node->safe_psql("postgres", "CREATE INDEX ON tst USING ivfflat (v);");
# Test approximate results
test_recall(1, 0.5);
test_recall(1, 0.8);
# Test probes
test_recall(100, 1.0);