mirror of
https://github.com/pgvector/pgvector.git
synced 2026-07-05 04:00:55 +08:00
Added halfvec type
This commit is contained in:
@@ -46,6 +46,30 @@ SELECT '[1,2,3]'::vector::real[];
|
||||
{1,2,3}
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::vector::halfvec;
|
||||
halfvec
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::halfvec::vector;
|
||||
vector
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::vector::halfvec(2);
|
||||
ERROR: expected 2 dimensions, not 3
|
||||
SELECT '[1,2,3]'::halfvec::vector(2);
|
||||
ERROR: expected 2 dimensions, not 3
|
||||
SELECT '[65520]'::vector::halfvec;
|
||||
ERROR: infinite value not allowed in halfvec
|
||||
SELECT '[1e-8]'::vector::halfvec;
|
||||
halfvec
|
||||
---------
|
||||
[0]
|
||||
(1 row)
|
||||
|
||||
SELECT array_agg(n)::vector FROM generate_series(1, 16001) n;
|
||||
ERROR: vector cannot have more than 16000 dimensions
|
||||
SELECT array_to_vector(array_agg(n), 16001, false) FROM generate_series(1, 16001) n;
|
||||
|
||||
@@ -1,15 +1,15 @@
|
||||
CREATE TABLE t (val vector(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE TABLE t2 (val vector(3));
|
||||
CREATE TABLE t (val vector(3), val2 halfvec(3));
|
||||
INSERT INTO t (val, val2) VALUES ('[0,0,0]', '[0,0,0]'), ('[1,2,3]', '[1,2,3]'), ('[1,1,1]', '[1,1,1]'), (NULL, NULL);
|
||||
CREATE TABLE t2 (val vector(3), val2 halfvec(3));
|
||||
\copy t TO 'results/data.bin' WITH (FORMAT binary)
|
||||
\copy t2 FROM 'results/data.bin' WITH (FORMAT binary)
|
||||
SELECT * FROM t2 ORDER BY val;
|
||||
val
|
||||
---------
|
||||
[0,0,0]
|
||||
[1,1,1]
|
||||
[1,2,3]
|
||||
|
||||
val | val2
|
||||
---------+---------
|
||||
[0,0,0] | [0,0,0]
|
||||
[1,1,1] | [1,1,1]
|
||||
[1,2,3] | [1,2,3]
|
||||
|
|
||||
(4 rows)
|
||||
|
||||
DROP TABLE t;
|
||||
|
||||
104
test/expected/halfvec_functions.out
Normal file
104
test/expected/halfvec_functions.out
Normal file
@@ -0,0 +1,104 @@
|
||||
SELECT l2_distance('[0,0]'::halfvec, '[3,4]');
|
||||
l2_distance
|
||||
-------------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
SELECT l2_distance('[0,0]'::halfvec, '[0,1]');
|
||||
l2_distance
|
||||
-------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT l2_distance('[1,2]'::halfvec, '[3]');
|
||||
ERROR: different halfvec dimensions 2 and 1
|
||||
SELECT '[0,0]'::halfvec <-> '[3,4]';
|
||||
?column?
|
||||
----------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
SELECT inner_product('[1,2]'::halfvec, '[3,4]');
|
||||
inner_product
|
||||
---------------
|
||||
11
|
||||
(1 row)
|
||||
|
||||
SELECT inner_product('[1,2]'::halfvec, '[3]');
|
||||
ERROR: different halfvec dimensions 2 and 1
|
||||
SELECT inner_product('[65504]'::halfvec, '[65504]');
|
||||
inner_product
|
||||
---------------
|
||||
4290774016
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2]'::halfvec <#> '[3,4]';
|
||||
?column?
|
||||
----------
|
||||
-11
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2]'::halfvec, '[2,4]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2]'::halfvec, '[0,0]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
NaN
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,1]'::halfvec, '[1,1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,0]'::halfvec, '[0,2]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,1]'::halfvec, '[-1,-1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2]'::halfvec, '[3]');
|
||||
ERROR: different halfvec dimensions 2 and 1
|
||||
SELECT cosine_distance('[1,1]'::halfvec, '[1.1,1.1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,1]'::halfvec, '[-1.1,-1.1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2]'::halfvec <=> '[2,4]';
|
||||
?column?
|
||||
----------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT l1_distance('[0,0]'::halfvec, '[3,4]');
|
||||
l1_distance
|
||||
-------------
|
||||
7
|
||||
(1 row)
|
||||
|
||||
SELECT l1_distance('[0,0]'::halfvec, '[0,1]');
|
||||
l1_distance
|
||||
-------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT l1_distance('[1,2]'::halfvec, '[3]');
|
||||
ERROR: different halfvec dimensions 2 and 1
|
||||
147
test/expected/halfvec_input.out
Normal file
147
test/expected/halfvec_input.out
Normal file
@@ -0,0 +1,147 @@
|
||||
SELECT '[1,2,3]'::halfvec;
|
||||
halfvec
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[-1,-2,-3]'::halfvec;
|
||||
halfvec
|
||||
------------
|
||||
[-1,-2,-3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1.,2.,3.]'::halfvec;
|
||||
halfvec
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT ' [ 1, 2 , 3 ] '::halfvec;
|
||||
halfvec
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1.23456]'::halfvec;
|
||||
halfvec
|
||||
------------
|
||||
[1.234375]
|
||||
(1 row)
|
||||
|
||||
SELECT '[hello,1]'::halfvec;
|
||||
ERROR: invalid input syntax for type halfvec: "[hello,1]"
|
||||
LINE 1: SELECT '[hello,1]'::halfvec;
|
||||
^
|
||||
SELECT '[NaN,1]'::halfvec;
|
||||
ERROR: NaN not allowed in halfvec
|
||||
LINE 1: SELECT '[NaN,1]'::halfvec;
|
||||
^
|
||||
SELECT '[Infinity,1]'::halfvec;
|
||||
ERROR: infinite value not allowed in halfvec
|
||||
LINE 1: SELECT '[Infinity,1]'::halfvec;
|
||||
^
|
||||
SELECT '[-Infinity,1]'::halfvec;
|
||||
ERROR: infinite value not allowed in halfvec
|
||||
LINE 1: SELECT '[-Infinity,1]'::halfvec;
|
||||
^
|
||||
SELECT '[65519,-65519]'::halfvec;
|
||||
halfvec
|
||||
----------------
|
||||
[65504,-65504]
|
||||
(1 row)
|
||||
|
||||
SELECT '[65520,-65520]'::halfvec;
|
||||
ERROR: value out of range: overflow
|
||||
LINE 1: SELECT '[65520,-65520]'::halfvec;
|
||||
^
|
||||
SELECT '[1e-8,-1e-8]'::halfvec;
|
||||
ERROR: value out of range: underflow
|
||||
LINE 1: SELECT '[1e-8,-1e-8]'::halfvec;
|
||||
^
|
||||
SELECT '[4e38,1]'::halfvec;
|
||||
ERROR: infinite value not allowed in halfvec
|
||||
LINE 1: SELECT '[4e38,1]'::halfvec;
|
||||
^
|
||||
SELECT '[1,2,3'::halfvec;
|
||||
ERROR: malformed halfvec literal: "[1,2,3"
|
||||
LINE 1: SELECT '[1,2,3'::halfvec;
|
||||
^
|
||||
DETAIL: Unexpected end of input.
|
||||
SELECT '[1,2,3]9'::halfvec;
|
||||
ERROR: malformed halfvec literal: "[1,2,3]9"
|
||||
LINE 1: SELECT '[1,2,3]9'::halfvec;
|
||||
^
|
||||
DETAIL: Junk after closing right brace.
|
||||
SELECT '1,2,3'::halfvec;
|
||||
ERROR: malformed halfvec literal: "1,2,3"
|
||||
LINE 1: SELECT '1,2,3'::halfvec;
|
||||
^
|
||||
DETAIL: Vector contents must start with "[".
|
||||
SELECT ''::halfvec;
|
||||
ERROR: malformed halfvec literal: ""
|
||||
LINE 1: SELECT ''::halfvec;
|
||||
^
|
||||
DETAIL: Vector contents must start with "[".
|
||||
SELECT '['::halfvec;
|
||||
ERROR: malformed halfvec literal: "["
|
||||
LINE 1: SELECT '['::halfvec;
|
||||
^
|
||||
DETAIL: Unexpected end of input.
|
||||
SELECT '[,'::halfvec;
|
||||
ERROR: malformed halfvec literal: "[,"
|
||||
LINE 1: SELECT '[,'::halfvec;
|
||||
^
|
||||
DETAIL: Unexpected end of input.
|
||||
SELECT '[]'::halfvec;
|
||||
ERROR: halfvec must have at least 1 dimension
|
||||
LINE 1: SELECT '[]'::halfvec;
|
||||
^
|
||||
SELECT '[1,]'::halfvec;
|
||||
ERROR: invalid input syntax for type halfvec: "[1,]"
|
||||
LINE 1: SELECT '[1,]'::halfvec;
|
||||
^
|
||||
SELECT '[1a]'::halfvec;
|
||||
ERROR: invalid input syntax for type halfvec: "[1a]"
|
||||
LINE 1: SELECT '[1a]'::halfvec;
|
||||
^
|
||||
SELECT '[1,,3]'::halfvec;
|
||||
ERROR: malformed halfvec literal: "[1,,3]"
|
||||
LINE 1: SELECT '[1,,3]'::halfvec;
|
||||
^
|
||||
SELECT '[1, ,3]'::halfvec;
|
||||
ERROR: invalid input syntax for type halfvec: "[1, ,3]"
|
||||
LINE 1: SELECT '[1, ,3]'::halfvec;
|
||||
^
|
||||
SELECT '[1,2,3]'::halfvec(3);
|
||||
halfvec
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::halfvec(2);
|
||||
ERROR: expected 2 dimensions, not 3
|
||||
SELECT '[1,2,3]'::halfvec(3, 2);
|
||||
ERROR: invalid type modifier
|
||||
LINE 1: SELECT '[1,2,3]'::halfvec(3, 2);
|
||||
^
|
||||
SELECT '[1,2,3]'::halfvec('a');
|
||||
ERROR: invalid input syntax for type integer: "a"
|
||||
LINE 1: SELECT '[1,2,3]'::halfvec('a');
|
||||
^
|
||||
SELECT '[1,2,3]'::halfvec(0);
|
||||
ERROR: dimensions for type halfvec must be at least 1
|
||||
LINE 1: SELECT '[1,2,3]'::halfvec(0);
|
||||
^
|
||||
SELECT '[1,2,3]'::halfvec(16001);
|
||||
ERROR: dimensions for type halfvec cannot exceed 16000
|
||||
LINE 1: SELECT '[1,2,3]'::halfvec(16001);
|
||||
^
|
||||
SELECT unnest('{"[1,2,3]", "[4,5,6]"}'::halfvec[]);
|
||||
unnest
|
||||
---------
|
||||
[1,2,3]
|
||||
[4,5,6]
|
||||
(2 rows)
|
||||
|
||||
SELECT '{"[1,2,3]"}'::halfvec(2)[];
|
||||
ERROR: expected 2 dimensions, not 3
|
||||
26
test/expected/hnsw_halfvec_cosine.out
Normal file
26
test/expected/hnsw_halfvec_cosine.out
Normal file
@@ -0,0 +1,26 @@
|
||||
SET enable_seqscan = off;
|
||||
CREATE TABLE t (val halfvec(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val halfvec_cosine_ops);
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
SELECT * FROM t ORDER BY val <=> '[3,3,3]';
|
||||
val
|
||||
---------
|
||||
[1,1,1]
|
||||
[1,2,3]
|
||||
[1,2,4]
|
||||
(3 rows)
|
||||
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2;
|
||||
count
|
||||
-------
|
||||
3
|
||||
(1 row)
|
||||
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::halfvec)) t2;
|
||||
count
|
||||
-------
|
||||
3
|
||||
(1 row)
|
||||
|
||||
DROP TABLE t;
|
||||
21
test/expected/hnsw_halfvec_ip.out
Normal file
21
test/expected/hnsw_halfvec_ip.out
Normal file
@@ -0,0 +1,21 @@
|
||||
SET enable_seqscan = off;
|
||||
CREATE TABLE t (val halfvec(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val halfvec_ip_ops);
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
SELECT * FROM t ORDER BY val <#> '[3,3,3]';
|
||||
val
|
||||
---------
|
||||
[1,2,4]
|
||||
[1,2,3]
|
||||
[1,1,1]
|
||||
[0,0,0]
|
||||
(4 rows)
|
||||
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::halfvec)) t2;
|
||||
count
|
||||
-------
|
||||
4
|
||||
(1 row)
|
||||
|
||||
DROP TABLE t;
|
||||
33
test/expected/hnsw_halfvec_l2.out
Normal file
33
test/expected/hnsw_halfvec_l2.out
Normal file
@@ -0,0 +1,33 @@
|
||||
SET enable_seqscan = off;
|
||||
CREATE TABLE t (val halfvec(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val halfvec_l2_ops);
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
|
||||
val
|
||||
---------
|
||||
[1,2,3]
|
||||
[1,2,4]
|
||||
[1,1,1]
|
||||
[0,0,0]
|
||||
(4 rows)
|
||||
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::halfvec)) t2;
|
||||
count
|
||||
-------
|
||||
4
|
||||
(1 row)
|
||||
|
||||
SELECT COUNT(*) FROM t;
|
||||
count
|
||||
-------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
TRUNCATE t;
|
||||
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
|
||||
val
|
||||
-----
|
||||
(0 rows)
|
||||
|
||||
DROP TABLE t;
|
||||
@@ -10,6 +10,12 @@ SELECT '{-Infinity}'::real[]::vector;
|
||||
SELECT '{}'::real[]::vector;
|
||||
SELECT '{{1}}'::real[]::vector;
|
||||
SELECT '[1,2,3]'::vector::real[];
|
||||
SELECT '[1,2,3]'::vector::halfvec;
|
||||
SELECT '[1,2,3]'::halfvec::vector;
|
||||
SELECT '[1,2,3]'::vector::halfvec(2);
|
||||
SELECT '[1,2,3]'::halfvec::vector(2);
|
||||
SELECT '[65520]'::vector::halfvec;
|
||||
SELECT '[1e-8]'::vector::halfvec;
|
||||
SELECT array_agg(n)::vector FROM generate_series(1, 16001) n;
|
||||
SELECT array_to_vector(array_agg(n), 16001, false) FROM generate_series(1, 16001) n;
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
CREATE TABLE t (val vector(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE TABLE t (val vector(3), val2 halfvec(3));
|
||||
INSERT INTO t (val, val2) VALUES ('[0,0,0]', '[0,0,0]'), ('[1,2,3]', '[1,2,3]'), ('[1,1,1]', '[1,1,1]'), (NULL, NULL);
|
||||
|
||||
CREATE TABLE t2 (val vector(3));
|
||||
CREATE TABLE t2 (val vector(3), val2 halfvec(3));
|
||||
|
||||
\copy t TO 'results/data.bin' WITH (FORMAT binary)
|
||||
\copy t2 FROM 'results/data.bin' WITH (FORMAT binary)
|
||||
|
||||
23
test/sql/halfvec_functions.sql
Normal file
23
test/sql/halfvec_functions.sql
Normal file
@@ -0,0 +1,23 @@
|
||||
SELECT l2_distance('[0,0]'::halfvec, '[3,4]');
|
||||
SELECT l2_distance('[0,0]'::halfvec, '[0,1]');
|
||||
SELECT l2_distance('[1,2]'::halfvec, '[3]');
|
||||
SELECT '[0,0]'::halfvec <-> '[3,4]';
|
||||
|
||||
SELECT inner_product('[1,2]'::halfvec, '[3,4]');
|
||||
SELECT inner_product('[1,2]'::halfvec, '[3]');
|
||||
SELECT inner_product('[65504]'::halfvec, '[65504]');
|
||||
SELECT '[1,2]'::halfvec <#> '[3,4]';
|
||||
|
||||
SELECT cosine_distance('[1,2]'::halfvec, '[2,4]');
|
||||
SELECT cosine_distance('[1,2]'::halfvec, '[0,0]');
|
||||
SELECT cosine_distance('[1,1]'::halfvec, '[1,1]');
|
||||
SELECT cosine_distance('[1,0]'::halfvec, '[0,2]');
|
||||
SELECT cosine_distance('[1,1]'::halfvec, '[-1,-1]');
|
||||
SELECT cosine_distance('[1,2]'::halfvec, '[3]');
|
||||
SELECT cosine_distance('[1,1]'::halfvec, '[1.1,1.1]');
|
||||
SELECT cosine_distance('[1,1]'::halfvec, '[-1.1,-1.1]');
|
||||
SELECT '[1,2]'::halfvec <=> '[2,4]';
|
||||
|
||||
SELECT l1_distance('[0,0]'::halfvec, '[3,4]');
|
||||
SELECT l1_distance('[0,0]'::halfvec, '[0,1]');
|
||||
SELECT l1_distance('[1,2]'::halfvec, '[3]');
|
||||
34
test/sql/halfvec_input.sql
Normal file
34
test/sql/halfvec_input.sql
Normal file
@@ -0,0 +1,34 @@
|
||||
SELECT '[1,2,3]'::halfvec;
|
||||
SELECT '[-1,-2,-3]'::halfvec;
|
||||
SELECT '[1.,2.,3.]'::halfvec;
|
||||
SELECT ' [ 1, 2 , 3 ] '::halfvec;
|
||||
SELECT '[1.23456]'::halfvec;
|
||||
SELECT '[hello,1]'::halfvec;
|
||||
SELECT '[NaN,1]'::halfvec;
|
||||
SELECT '[Infinity,1]'::halfvec;
|
||||
SELECT '[-Infinity,1]'::halfvec;
|
||||
SELECT '[65519,-65519]'::halfvec;
|
||||
SELECT '[65520,-65520]'::halfvec;
|
||||
SELECT '[1e-8,-1e-8]'::halfvec;
|
||||
SELECT '[4e38,1]'::halfvec;
|
||||
SELECT '[1,2,3'::halfvec;
|
||||
SELECT '[1,2,3]9'::halfvec;
|
||||
SELECT '1,2,3'::halfvec;
|
||||
SELECT ''::halfvec;
|
||||
SELECT '['::halfvec;
|
||||
SELECT '[,'::halfvec;
|
||||
SELECT '[]'::halfvec;
|
||||
SELECT '[1,]'::halfvec;
|
||||
SELECT '[1a]'::halfvec;
|
||||
SELECT '[1,,3]'::halfvec;
|
||||
SELECT '[1, ,3]'::halfvec;
|
||||
|
||||
SELECT '[1,2,3]'::halfvec(3);
|
||||
SELECT '[1,2,3]'::halfvec(2);
|
||||
SELECT '[1,2,3]'::halfvec(3, 2);
|
||||
SELECT '[1,2,3]'::halfvec('a');
|
||||
SELECT '[1,2,3]'::halfvec(0);
|
||||
SELECT '[1,2,3]'::halfvec(16001);
|
||||
|
||||
SELECT unnest('{"[1,2,3]", "[4,5,6]"}'::halfvec[]);
|
||||
SELECT '{"[1,2,3]"}'::halfvec(2)[];
|
||||
13
test/sql/hnsw_halfvec_cosine.sql
Normal file
13
test/sql/hnsw_halfvec_cosine.sql
Normal file
@@ -0,0 +1,13 @@
|
||||
SET enable_seqscan = off;
|
||||
|
||||
CREATE TABLE t (val halfvec(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val halfvec_cosine_ops);
|
||||
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
|
||||
SELECT * FROM t ORDER BY val <=> '[3,3,3]';
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2;
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::halfvec)) t2;
|
||||
|
||||
DROP TABLE t;
|
||||
12
test/sql/hnsw_halfvec_ip.sql
Normal file
12
test/sql/hnsw_halfvec_ip.sql
Normal file
@@ -0,0 +1,12 @@
|
||||
SET enable_seqscan = off;
|
||||
|
||||
CREATE TABLE t (val halfvec(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val halfvec_ip_ops);
|
||||
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
|
||||
SELECT * FROM t ORDER BY val <#> '[3,3,3]';
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::halfvec)) t2;
|
||||
|
||||
DROP TABLE t;
|
||||
16
test/sql/hnsw_halfvec_l2.sql
Normal file
16
test/sql/hnsw_halfvec_l2.sql
Normal file
@@ -0,0 +1,16 @@
|
||||
SET enable_seqscan = off;
|
||||
|
||||
CREATE TABLE t (val halfvec(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val halfvec_l2_ops);
|
||||
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
|
||||
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::halfvec)) t2;
|
||||
SELECT COUNT(*) FROM t;
|
||||
|
||||
TRUNCATE t;
|
||||
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
|
||||
|
||||
DROP TABLE t;
|
||||
132
test/t/021_hnsw_halfvec_build_recall.pl
Normal file
132
test/t/021_hnsw_halfvec_build_recall.pl
Normal file
@@ -0,0 +1,132 @@
|
||||
use strict;
|
||||
use warnings;
|
||||
use PostgresNode;
|
||||
use TestLib;
|
||||
use Test::More;
|
||||
|
||||
my $node;
|
||||
my @queries = ();
|
||||
my @expected;
|
||||
my $limit = 20;
|
||||
my $dim = 10;
|
||||
my $array_sql = join(",", ('random()') x $dim);
|
||||
|
||||
sub test_recall
|
||||
{
|
||||
my ($min, $operator) = @_;
|
||||
my $correct = 0;
|
||||
my $total = 0;
|
||||
|
||||
my $explain = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
EXPLAIN ANALYZE SELECT i FROM tst ORDER BY v $operator '$queries[0]' LIMIT $limit;
|
||||
));
|
||||
like($explain, qr/Index Scan/);
|
||||
|
||||
for my $i (0 .. $#queries)
|
||||
{
|
||||
my $actual = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
SELECT i FROM tst ORDER BY v $operator '$queries[$i]' LIMIT $limit;
|
||||
));
|
||||
my @actual_ids = split("\n", $actual);
|
||||
my %actual_set = map { $_ => 1 } @actual_ids;
|
||||
|
||||
my @expected_ids = split("\n", $expected[$i]);
|
||||
|
||||
foreach (@expected_ids)
|
||||
{
|
||||
if (exists($actual_set{$_}))
|
||||
{
|
||||
$correct++;
|
||||
}
|
||||
$total++;
|
||||
}
|
||||
}
|
||||
|
||||
cmp_ok($correct / $total, ">=", $min, $operator);
|
||||
}
|
||||
|
||||
# Initialize node
|
||||
$node = get_new_node('node');
|
||||
$node->init;
|
||||
$node->start;
|
||||
|
||||
# Create table
|
||||
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v halfvec($dim));");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, 10000) i;"
|
||||
);
|
||||
|
||||
# Generate queries
|
||||
for (1 .. 20)
|
||||
{
|
||||
my @r = ();
|
||||
for (1 .. $dim)
|
||||
{
|
||||
push(@r, rand());
|
||||
}
|
||||
push(@queries, "[" . join(",", @r) . "]");
|
||||
}
|
||||
|
||||
# Check each index type
|
||||
my @operators = ("<->", "<#>", "<=>");
|
||||
my @opclasses = ("halfvec_l2_ops", "halfvec_ip_ops", "halfvec_cosine_ops");
|
||||
|
||||
for my $i (0 .. $#operators)
|
||||
{
|
||||
my $operator = $operators[$i];
|
||||
my $opclass = $opclasses[$i];
|
||||
|
||||
# Get exact results
|
||||
@expected = ();
|
||||
foreach (@queries)
|
||||
{
|
||||
my $res = $node->safe_psql("postgres", "SELECT i FROM tst ORDER BY v $operator '$_' LIMIT $limit;");
|
||||
push(@expected, $res);
|
||||
}
|
||||
|
||||
# Build index serially
|
||||
$node->safe_psql("postgres", qq(
|
||||
SET max_parallel_maintenance_workers = 0;
|
||||
CREATE INDEX idx ON tst USING hnsw (v $opclass);
|
||||
));
|
||||
|
||||
# Test approximate results
|
||||
my $min = $operator eq "<#>" ? 0.95 : 0.99;
|
||||
test_recall($min, $operator);
|
||||
|
||||
$node->safe_psql("postgres", "DROP INDEX idx;");
|
||||
|
||||
# Build index in parallel in memory
|
||||
my ($ret, $stdout, $stderr) = $node->psql("postgres", qq(
|
||||
SET client_min_messages = DEBUG;
|
||||
SET min_parallel_table_scan_size = 1;
|
||||
CREATE INDEX idx ON tst USING hnsw (v $opclass);
|
||||
));
|
||||
is($ret, 0, $stderr);
|
||||
like($stderr, qr/using \d+ parallel workers/);
|
||||
|
||||
# Test approximate results
|
||||
test_recall($min, $operator);
|
||||
|
||||
$node->safe_psql("postgres", "DROP INDEX idx;");
|
||||
|
||||
# Build index in parallel on disk
|
||||
# Set parallel_workers on table to use workers with low maintenance_work_mem
|
||||
($ret, $stdout, $stderr) = $node->psql("postgres", qq(
|
||||
ALTER TABLE tst SET (parallel_workers = 2);
|
||||
SET client_min_messages = DEBUG;
|
||||
SET maintenance_work_mem = '4MB';
|
||||
CREATE INDEX idx ON tst USING hnsw (v $opclass);
|
||||
ALTER TABLE tst RESET (parallel_workers);
|
||||
));
|
||||
is($ret, 0, $stderr);
|
||||
like($stderr, qr/using \d+ parallel workers/);
|
||||
like($stderr, qr/hnsw graph no longer fits into maintenance_work_mem/);
|
||||
|
||||
$node->safe_psql("postgres", "DROP INDEX idx;");
|
||||
}
|
||||
|
||||
done_testing();
|
||||
Reference in New Issue
Block a user