Added halfvec type

This commit is contained in:
Andrew Kane
2024-04-02 13:55:45 -07:00
parent 1134e52762
commit 32a502c838
28 changed files with 1972 additions and 21 deletions

View File

@@ -46,6 +46,30 @@ SELECT '[1,2,3]'::vector::real[];
{1,2,3}
(1 row)
SELECT '[1,2,3]'::vector::halfvec;
halfvec
---------
[1,2,3]
(1 row)
SELECT '[1,2,3]'::halfvec::vector;
vector
---------
[1,2,3]
(1 row)
SELECT '[1,2,3]'::vector::halfvec(2);
ERROR: expected 2 dimensions, not 3
SELECT '[1,2,3]'::halfvec::vector(2);
ERROR: expected 2 dimensions, not 3
SELECT '[65520]'::vector::halfvec;
ERROR: infinite value not allowed in halfvec
SELECT '[1e-8]'::vector::halfvec;
halfvec
---------
[0]
(1 row)
SELECT array_agg(n)::vector FROM generate_series(1, 16001) n;
ERROR: vector cannot have more than 16000 dimensions
SELECT array_to_vector(array_agg(n), 16001, false) FROM generate_series(1, 16001) n;

View File

@@ -1,15 +1,15 @@
CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE TABLE t2 (val vector(3));
CREATE TABLE t (val vector(3), val2 halfvec(3));
INSERT INTO t (val, val2) VALUES ('[0,0,0]', '[0,0,0]'), ('[1,2,3]', '[1,2,3]'), ('[1,1,1]', '[1,1,1]'), (NULL, NULL);
CREATE TABLE t2 (val vector(3), val2 halfvec(3));
\copy t TO 'results/data.bin' WITH (FORMAT binary)
\copy t2 FROM 'results/data.bin' WITH (FORMAT binary)
SELECT * FROM t2 ORDER BY val;
val
---------
[0,0,0]
[1,1,1]
[1,2,3]
val | val2
---------+---------
[0,0,0] | [0,0,0]
[1,1,1] | [1,1,1]
[1,2,3] | [1,2,3]
|
(4 rows)
DROP TABLE t;

View File

@@ -0,0 +1,104 @@
SELECT l2_distance('[0,0]'::halfvec, '[3,4]');
l2_distance
-------------
5
(1 row)
SELECT l2_distance('[0,0]'::halfvec, '[0,1]');
l2_distance
-------------
1
(1 row)
SELECT l2_distance('[1,2]'::halfvec, '[3]');
ERROR: different halfvec dimensions 2 and 1
SELECT '[0,0]'::halfvec <-> '[3,4]';
?column?
----------
5
(1 row)
SELECT inner_product('[1,2]'::halfvec, '[3,4]');
inner_product
---------------
11
(1 row)
SELECT inner_product('[1,2]'::halfvec, '[3]');
ERROR: different halfvec dimensions 2 and 1
SELECT inner_product('[65504]'::halfvec, '[65504]');
inner_product
---------------
4290774016
(1 row)
SELECT '[1,2]'::halfvec <#> '[3,4]';
?column?
----------
-11
(1 row)
SELECT cosine_distance('[1,2]'::halfvec, '[2,4]');
cosine_distance
-----------------
0
(1 row)
SELECT cosine_distance('[1,2]'::halfvec, '[0,0]');
cosine_distance
-----------------
NaN
(1 row)
SELECT cosine_distance('[1,1]'::halfvec, '[1,1]');
cosine_distance
-----------------
0
(1 row)
SELECT cosine_distance('[1,0]'::halfvec, '[0,2]');
cosine_distance
-----------------
1
(1 row)
SELECT cosine_distance('[1,1]'::halfvec, '[-1,-1]');
cosine_distance
-----------------
2
(1 row)
SELECT cosine_distance('[1,2]'::halfvec, '[3]');
ERROR: different halfvec dimensions 2 and 1
SELECT cosine_distance('[1,1]'::halfvec, '[1.1,1.1]');
cosine_distance
-----------------
0
(1 row)
SELECT cosine_distance('[1,1]'::halfvec, '[-1.1,-1.1]');
cosine_distance
-----------------
2
(1 row)
SELECT '[1,2]'::halfvec <=> '[2,4]';
?column?
----------
0
(1 row)
SELECT l1_distance('[0,0]'::halfvec, '[3,4]');
l1_distance
-------------
7
(1 row)
SELECT l1_distance('[0,0]'::halfvec, '[0,1]');
l1_distance
-------------
1
(1 row)
SELECT l1_distance('[1,2]'::halfvec, '[3]');
ERROR: different halfvec dimensions 2 and 1

View File

@@ -0,0 +1,147 @@
SELECT '[1,2,3]'::halfvec;
halfvec
---------
[1,2,3]
(1 row)
SELECT '[-1,-2,-3]'::halfvec;
halfvec
------------
[-1,-2,-3]
(1 row)
SELECT '[1.,2.,3.]'::halfvec;
halfvec
---------
[1,2,3]
(1 row)
SELECT ' [ 1, 2 , 3 ] '::halfvec;
halfvec
---------
[1,2,3]
(1 row)
SELECT '[1.23456]'::halfvec;
halfvec
------------
[1.234375]
(1 row)
SELECT '[hello,1]'::halfvec;
ERROR: invalid input syntax for type halfvec: "[hello,1]"
LINE 1: SELECT '[hello,1]'::halfvec;
^
SELECT '[NaN,1]'::halfvec;
ERROR: NaN not allowed in halfvec
LINE 1: SELECT '[NaN,1]'::halfvec;
^
SELECT '[Infinity,1]'::halfvec;
ERROR: infinite value not allowed in halfvec
LINE 1: SELECT '[Infinity,1]'::halfvec;
^
SELECT '[-Infinity,1]'::halfvec;
ERROR: infinite value not allowed in halfvec
LINE 1: SELECT '[-Infinity,1]'::halfvec;
^
SELECT '[65519,-65519]'::halfvec;
halfvec
----------------
[65504,-65504]
(1 row)
SELECT '[65520,-65520]'::halfvec;
ERROR: value out of range: overflow
LINE 1: SELECT '[65520,-65520]'::halfvec;
^
SELECT '[1e-8,-1e-8]'::halfvec;
ERROR: value out of range: underflow
LINE 1: SELECT '[1e-8,-1e-8]'::halfvec;
^
SELECT '[4e38,1]'::halfvec;
ERROR: infinite value not allowed in halfvec
LINE 1: SELECT '[4e38,1]'::halfvec;
^
SELECT '[1,2,3'::halfvec;
ERROR: malformed halfvec literal: "[1,2,3"
LINE 1: SELECT '[1,2,3'::halfvec;
^
DETAIL: Unexpected end of input.
SELECT '[1,2,3]9'::halfvec;
ERROR: malformed halfvec literal: "[1,2,3]9"
LINE 1: SELECT '[1,2,3]9'::halfvec;
^
DETAIL: Junk after closing right brace.
SELECT '1,2,3'::halfvec;
ERROR: malformed halfvec literal: "1,2,3"
LINE 1: SELECT '1,2,3'::halfvec;
^
DETAIL: Vector contents must start with "[".
SELECT ''::halfvec;
ERROR: malformed halfvec literal: ""
LINE 1: SELECT ''::halfvec;
^
DETAIL: Vector contents must start with "[".
SELECT '['::halfvec;
ERROR: malformed halfvec literal: "["
LINE 1: SELECT '['::halfvec;
^
DETAIL: Unexpected end of input.
SELECT '[,'::halfvec;
ERROR: malformed halfvec literal: "[,"
LINE 1: SELECT '[,'::halfvec;
^
DETAIL: Unexpected end of input.
SELECT '[]'::halfvec;
ERROR: halfvec must have at least 1 dimension
LINE 1: SELECT '[]'::halfvec;
^
SELECT '[1,]'::halfvec;
ERROR: invalid input syntax for type halfvec: "[1,]"
LINE 1: SELECT '[1,]'::halfvec;
^
SELECT '[1a]'::halfvec;
ERROR: invalid input syntax for type halfvec: "[1a]"
LINE 1: SELECT '[1a]'::halfvec;
^
SELECT '[1,,3]'::halfvec;
ERROR: malformed halfvec literal: "[1,,3]"
LINE 1: SELECT '[1,,3]'::halfvec;
^
SELECT '[1, ,3]'::halfvec;
ERROR: invalid input syntax for type halfvec: "[1, ,3]"
LINE 1: SELECT '[1, ,3]'::halfvec;
^
SELECT '[1,2,3]'::halfvec(3);
halfvec
---------
[1,2,3]
(1 row)
SELECT '[1,2,3]'::halfvec(2);
ERROR: expected 2 dimensions, not 3
SELECT '[1,2,3]'::halfvec(3, 2);
ERROR: invalid type modifier
LINE 1: SELECT '[1,2,3]'::halfvec(3, 2);
^
SELECT '[1,2,3]'::halfvec('a');
ERROR: invalid input syntax for type integer: "a"
LINE 1: SELECT '[1,2,3]'::halfvec('a');
^
SELECT '[1,2,3]'::halfvec(0);
ERROR: dimensions for type halfvec must be at least 1
LINE 1: SELECT '[1,2,3]'::halfvec(0);
^
SELECT '[1,2,3]'::halfvec(16001);
ERROR: dimensions for type halfvec cannot exceed 16000
LINE 1: SELECT '[1,2,3]'::halfvec(16001);
^
SELECT unnest('{"[1,2,3]", "[4,5,6]"}'::halfvec[]);
unnest
---------
[1,2,3]
[4,5,6]
(2 rows)
SELECT '{"[1,2,3]"}'::halfvec(2)[];
ERROR: expected 2 dimensions, not 3

View File

@@ -0,0 +1,26 @@
SET enable_seqscan = off;
CREATE TABLE t (val halfvec(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val halfvec_cosine_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <=> '[3,3,3]';
val
---------
[1,1,1]
[1,2,3]
[1,2,4]
(3 rows)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2;
count
-------
3
(1 row)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::halfvec)) t2;
count
-------
3
(1 row)
DROP TABLE t;

View File

@@ -0,0 +1,21 @@
SET enable_seqscan = off;
CREATE TABLE t (val halfvec(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val halfvec_ip_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <#> '[3,3,3]';
val
---------
[1,2,4]
[1,2,3]
[1,1,1]
[0,0,0]
(4 rows)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::halfvec)) t2;
count
-------
4
(1 row)
DROP TABLE t;

View File

@@ -0,0 +1,33 @@
SET enable_seqscan = off;
CREATE TABLE t (val halfvec(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val halfvec_l2_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
val
---------
[1,2,3]
[1,2,4]
[1,1,1]
[0,0,0]
(4 rows)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::halfvec)) t2;
count
-------
4
(1 row)
SELECT COUNT(*) FROM t;
count
-------
5
(1 row)
TRUNCATE t;
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
val
-----
(0 rows)
DROP TABLE t;

View File

@@ -10,6 +10,12 @@ SELECT '{-Infinity}'::real[]::vector;
SELECT '{}'::real[]::vector;
SELECT '{{1}}'::real[]::vector;
SELECT '[1,2,3]'::vector::real[];
SELECT '[1,2,3]'::vector::halfvec;
SELECT '[1,2,3]'::halfvec::vector;
SELECT '[1,2,3]'::vector::halfvec(2);
SELECT '[1,2,3]'::halfvec::vector(2);
SELECT '[65520]'::vector::halfvec;
SELECT '[1e-8]'::vector::halfvec;
SELECT array_agg(n)::vector FROM generate_series(1, 16001) n;
SELECT array_to_vector(array_agg(n), 16001, false) FROM generate_series(1, 16001) n;

View File

@@ -1,7 +1,7 @@
CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE TABLE t (val vector(3), val2 halfvec(3));
INSERT INTO t (val, val2) VALUES ('[0,0,0]', '[0,0,0]'), ('[1,2,3]', '[1,2,3]'), ('[1,1,1]', '[1,1,1]'), (NULL, NULL);
CREATE TABLE t2 (val vector(3));
CREATE TABLE t2 (val vector(3), val2 halfvec(3));
\copy t TO 'results/data.bin' WITH (FORMAT binary)
\copy t2 FROM 'results/data.bin' WITH (FORMAT binary)

View File

@@ -0,0 +1,23 @@
SELECT l2_distance('[0,0]'::halfvec, '[3,4]');
SELECT l2_distance('[0,0]'::halfvec, '[0,1]');
SELECT l2_distance('[1,2]'::halfvec, '[3]');
SELECT '[0,0]'::halfvec <-> '[3,4]';
SELECT inner_product('[1,2]'::halfvec, '[3,4]');
SELECT inner_product('[1,2]'::halfvec, '[3]');
SELECT inner_product('[65504]'::halfvec, '[65504]');
SELECT '[1,2]'::halfvec <#> '[3,4]';
SELECT cosine_distance('[1,2]'::halfvec, '[2,4]');
SELECT cosine_distance('[1,2]'::halfvec, '[0,0]');
SELECT cosine_distance('[1,1]'::halfvec, '[1,1]');
SELECT cosine_distance('[1,0]'::halfvec, '[0,2]');
SELECT cosine_distance('[1,1]'::halfvec, '[-1,-1]');
SELECT cosine_distance('[1,2]'::halfvec, '[3]');
SELECT cosine_distance('[1,1]'::halfvec, '[1.1,1.1]');
SELECT cosine_distance('[1,1]'::halfvec, '[-1.1,-1.1]');
SELECT '[1,2]'::halfvec <=> '[2,4]';
SELECT l1_distance('[0,0]'::halfvec, '[3,4]');
SELECT l1_distance('[0,0]'::halfvec, '[0,1]');
SELECT l1_distance('[1,2]'::halfvec, '[3]');

View File

@@ -0,0 +1,34 @@
SELECT '[1,2,3]'::halfvec;
SELECT '[-1,-2,-3]'::halfvec;
SELECT '[1.,2.,3.]'::halfvec;
SELECT ' [ 1, 2 , 3 ] '::halfvec;
SELECT '[1.23456]'::halfvec;
SELECT '[hello,1]'::halfvec;
SELECT '[NaN,1]'::halfvec;
SELECT '[Infinity,1]'::halfvec;
SELECT '[-Infinity,1]'::halfvec;
SELECT '[65519,-65519]'::halfvec;
SELECT '[65520,-65520]'::halfvec;
SELECT '[1e-8,-1e-8]'::halfvec;
SELECT '[4e38,1]'::halfvec;
SELECT '[1,2,3'::halfvec;
SELECT '[1,2,3]9'::halfvec;
SELECT '1,2,3'::halfvec;
SELECT ''::halfvec;
SELECT '['::halfvec;
SELECT '[,'::halfvec;
SELECT '[]'::halfvec;
SELECT '[1,]'::halfvec;
SELECT '[1a]'::halfvec;
SELECT '[1,,3]'::halfvec;
SELECT '[1, ,3]'::halfvec;
SELECT '[1,2,3]'::halfvec(3);
SELECT '[1,2,3]'::halfvec(2);
SELECT '[1,2,3]'::halfvec(3, 2);
SELECT '[1,2,3]'::halfvec('a');
SELECT '[1,2,3]'::halfvec(0);
SELECT '[1,2,3]'::halfvec(16001);
SELECT unnest('{"[1,2,3]", "[4,5,6]"}'::halfvec[]);
SELECT '{"[1,2,3]"}'::halfvec(2)[];

View File

@@ -0,0 +1,13 @@
SET enable_seqscan = off;
CREATE TABLE t (val halfvec(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val halfvec_cosine_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <=> '[3,3,3]';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::halfvec)) t2;
DROP TABLE t;

View File

@@ -0,0 +1,12 @@
SET enable_seqscan = off;
CREATE TABLE t (val halfvec(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val halfvec_ip_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <#> '[3,3,3]';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::halfvec)) t2;
DROP TABLE t;

View File

@@ -0,0 +1,16 @@
SET enable_seqscan = off;
CREATE TABLE t (val halfvec(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val halfvec_l2_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::halfvec)) t2;
SELECT COUNT(*) FROM t;
TRUNCATE t;
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
DROP TABLE t;

View File

@@ -0,0 +1,132 @@
use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More;
my $node;
my @queries = ();
my @expected;
my $limit = 20;
my $dim = 10;
my $array_sql = join(",", ('random()') x $dim);
sub test_recall
{
my ($min, $operator) = @_;
my $correct = 0;
my $total = 0;
my $explain = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
EXPLAIN ANALYZE SELECT i FROM tst ORDER BY v $operator '$queries[0]' LIMIT $limit;
));
like($explain, qr/Index Scan/);
for my $i (0 .. $#queries)
{
my $actual = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SELECT i FROM tst ORDER BY v $operator '$queries[$i]' LIMIT $limit;
));
my @actual_ids = split("\n", $actual);
my %actual_set = map { $_ => 1 } @actual_ids;
my @expected_ids = split("\n", $expected[$i]);
foreach (@expected_ids)
{
if (exists($actual_set{$_}))
{
$correct++;
}
$total++;
}
}
cmp_ok($correct / $total, ">=", $min, $operator);
}
# Initialize node
$node = get_new_node('node');
$node->init;
$node->start;
# Create table
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v halfvec($dim));");
$node->safe_psql("postgres",
"INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, 10000) i;"
);
# Generate queries
for (1 .. 20)
{
my @r = ();
for (1 .. $dim)
{
push(@r, rand());
}
push(@queries, "[" . join(",", @r) . "]");
}
# Check each index type
my @operators = ("<->", "<#>", "<=>");
my @opclasses = ("halfvec_l2_ops", "halfvec_ip_ops", "halfvec_cosine_ops");
for my $i (0 .. $#operators)
{
my $operator = $operators[$i];
my $opclass = $opclasses[$i];
# Get exact results
@expected = ();
foreach (@queries)
{
my $res = $node->safe_psql("postgres", "SELECT i FROM tst ORDER BY v $operator '$_' LIMIT $limit;");
push(@expected, $res);
}
# Build index serially
$node->safe_psql("postgres", qq(
SET max_parallel_maintenance_workers = 0;
CREATE INDEX idx ON tst USING hnsw (v $opclass);
));
# Test approximate results
my $min = $operator eq "<#>" ? 0.95 : 0.99;
test_recall($min, $operator);
$node->safe_psql("postgres", "DROP INDEX idx;");
# Build index in parallel in memory
my ($ret, $stdout, $stderr) = $node->psql("postgres", qq(
SET client_min_messages = DEBUG;
SET min_parallel_table_scan_size = 1;
CREATE INDEX idx ON tst USING hnsw (v $opclass);
));
is($ret, 0, $stderr);
like($stderr, qr/using \d+ parallel workers/);
# Test approximate results
test_recall($min, $operator);
$node->safe_psql("postgres", "DROP INDEX idx;");
# Build index in parallel on disk
# Set parallel_workers on table to use workers with low maintenance_work_mem
($ret, $stdout, $stderr) = $node->psql("postgres", qq(
ALTER TABLE tst SET (parallel_workers = 2);
SET client_min_messages = DEBUG;
SET maintenance_work_mem = '4MB';
CREATE INDEX idx ON tst USING hnsw (v $opclass);
ALTER TABLE tst RESET (parallel_workers);
));
is($ret, 0, $stderr);
like($stderr, qr/using \d+ parallel workers/);
like($stderr, qr/hnsw graph no longer fits into maintenance_work_mem/);
$node->safe_psql("postgres", "DROP INDEX idx;");
}
done_testing();