mirror of
https://github.com/pgvector/pgvector.git
synced 2026-07-03 11:10:56 +08:00
Added intvec type
This commit is contained in:
@@ -1,15 +1,15 @@
|
||||
CREATE TABLE t (val vector(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE TABLE t2 (val vector(3));
|
||||
CREATE TABLE t (val vector(3), val2 intvec(3));
|
||||
INSERT INTO t (val, val2) VALUES ('[0,0,0]', '[0,0,0]'), ('[1,2,3]', '[1,2,3]'), ('[1,1,1]', '[1,1,1]'), (NULL, NULL);
|
||||
CREATE TABLE t2 (val vector(3), val2 intvec(3));
|
||||
\copy t TO 'results/data.bin' WITH (FORMAT binary)
|
||||
\copy t2 FROM 'results/data.bin' WITH (FORMAT binary)
|
||||
SELECT * FROM t2 ORDER BY val;
|
||||
val
|
||||
---------
|
||||
[0,0,0]
|
||||
[1,1,1]
|
||||
[1,2,3]
|
||||
|
||||
val | val2
|
||||
---------+---------
|
||||
[0,0,0] | [0,0,0]
|
||||
[1,1,1] | [1,1,1]
|
||||
[1,2,3] | [1,2,3]
|
||||
|
|
||||
(4 rows)
|
||||
|
||||
DROP TABLE t;
|
||||
|
||||
@@ -104,105 +104,105 @@ SELECT vector_norm('[3e37,4e37]')::real;
|
||||
5e+37
|
||||
(1 row)
|
||||
|
||||
SELECT l2_distance('[0,0]', '[3,4]');
|
||||
SELECT l2_distance('[0,0]'::vector, '[3,4]');
|
||||
l2_distance
|
||||
-------------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
SELECT l2_distance('[0,0]', '[0,1]');
|
||||
SELECT l2_distance('[0,0]'::vector, '[0,1]');
|
||||
l2_distance
|
||||
-------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT l2_distance('[1,2]', '[3]');
|
||||
SELECT l2_distance('[1,2]'::vector, '[3]');
|
||||
ERROR: different vector dimensions 2 and 1
|
||||
SELECT l2_distance('[3e38]', '[-3e38]');
|
||||
SELECT l2_distance('[3e38]'::vector, '[-3e38]');
|
||||
l2_distance
|
||||
-------------
|
||||
Infinity
|
||||
(1 row)
|
||||
|
||||
SELECT inner_product('[1,2]', '[3,4]');
|
||||
SELECT inner_product('[1,2]'::vector, '[3,4]');
|
||||
inner_product
|
||||
---------------
|
||||
11
|
||||
(1 row)
|
||||
|
||||
SELECT inner_product('[1,2]', '[3]');
|
||||
SELECT inner_product('[1,2]'::vector, '[3]');
|
||||
ERROR: different vector dimensions 2 and 1
|
||||
SELECT inner_product('[3e38]', '[3e38]');
|
||||
SELECT inner_product('[3e38]'::vector, '[3e38]');
|
||||
inner_product
|
||||
---------------
|
||||
Infinity
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2]', '[2,4]');
|
||||
SELECT cosine_distance('[1,2]'::vector, '[2,4]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2]', '[0,0]');
|
||||
SELECT cosine_distance('[1,2]'::vector, '[0,0]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
NaN
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,1]', '[1,1]');
|
||||
SELECT cosine_distance('[1,1]'::vector, '[1,1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,0]', '[0,2]');
|
||||
SELECT cosine_distance('[1,0]'::vector, '[0,2]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,1]', '[-1,-1]');
|
||||
SELECT cosine_distance('[1,1]'::vector, '[-1,-1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2]', '[3]');
|
||||
SELECT cosine_distance('[1,2]'::vector, '[3]');
|
||||
ERROR: different vector dimensions 2 and 1
|
||||
SELECT cosine_distance('[1,1]', '[1.1,1.1]');
|
||||
SELECT cosine_distance('[1,1]'::vector, '[1.1,1.1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,1]', '[-1.1,-1.1]');
|
||||
SELECT cosine_distance('[1,1]'::vector, '[-1.1,-1.1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[3e38]', '[3e38]');
|
||||
SELECT cosine_distance('[3e38]'::vector, '[3e38]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
NaN
|
||||
(1 row)
|
||||
|
||||
SELECT l1_distance('[0,0]', '[3,4]');
|
||||
SELECT l1_distance('[0,0]'::vector, '[3,4]');
|
||||
l1_distance
|
||||
-------------
|
||||
7
|
||||
(1 row)
|
||||
|
||||
SELECT l1_distance('[0,0]', '[0,1]');
|
||||
SELECT l1_distance('[0,0]'::vector, '[0,1]');
|
||||
l1_distance
|
||||
-------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT l1_distance('[1,2]', '[3]');
|
||||
SELECT l1_distance('[1,2]'::vector, '[3]');
|
||||
ERROR: different vector dimensions 2 and 1
|
||||
SELECT l1_distance('[3e38]', '[-3e38]');
|
||||
SELECT l1_distance('[3e38]'::vector, '[-3e38]');
|
||||
l1_distance
|
||||
-------------
|
||||
Infinity
|
||||
|
||||
27
test/expected/hnsw_intvec_cosine.out
Normal file
27
test/expected/hnsw_intvec_cosine.out
Normal file
@@ -0,0 +1,27 @@
|
||||
SET enable_seqscan = off;
|
||||
CREATE TABLE t (val intvec(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val intvec_cosine_ops);
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
SELECT * FROM t ORDER BY val <=> '[3,3,3]';
|
||||
val
|
||||
---------
|
||||
[1,1,1]
|
||||
[1,2,3]
|
||||
[1,2,4]
|
||||
[0,0,0]
|
||||
(4 rows)
|
||||
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2;
|
||||
count
|
||||
-------
|
||||
4
|
||||
(1 row)
|
||||
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::intvec)) t2;
|
||||
count
|
||||
-------
|
||||
4
|
||||
(1 row)
|
||||
|
||||
DROP TABLE t;
|
||||
0
test/expected/hnsw_intvec_cosine.out.diff
Normal file
0
test/expected/hnsw_intvec_cosine.out.diff
Normal file
21
test/expected/hnsw_intvec_ip.out
Normal file
21
test/expected/hnsw_intvec_ip.out
Normal file
@@ -0,0 +1,21 @@
|
||||
SET enable_seqscan = off;
|
||||
CREATE TABLE t (val intvec(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val intvec_ip_ops);
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
SELECT * FROM t ORDER BY val <#> '[3,3,3]';
|
||||
val
|
||||
---------
|
||||
[1,2,4]
|
||||
[1,2,3]
|
||||
[1,1,1]
|
||||
[0,0,0]
|
||||
(4 rows)
|
||||
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::intvec)) t2;
|
||||
count
|
||||
-------
|
||||
4
|
||||
(1 row)
|
||||
|
||||
DROP TABLE t;
|
||||
33
test/expected/hnsw_intvec_l2.out
Normal file
33
test/expected/hnsw_intvec_l2.out
Normal file
@@ -0,0 +1,33 @@
|
||||
SET enable_seqscan = off;
|
||||
CREATE TABLE t (val intvec(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val intvec_l2_ops);
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
|
||||
val
|
||||
---------
|
||||
[1,2,3]
|
||||
[1,2,4]
|
||||
[1,1,1]
|
||||
[0,0,0]
|
||||
(4 rows)
|
||||
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::intvec)) t2;
|
||||
count
|
||||
-------
|
||||
4
|
||||
(1 row)
|
||||
|
||||
SELECT COUNT(*) FROM t;
|
||||
count
|
||||
-------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
TRUNCATE t;
|
||||
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
|
||||
val
|
||||
-----
|
||||
(0 rows)
|
||||
|
||||
DROP TABLE t;
|
||||
92
test/expected/intvec_functions.out
Normal file
92
test/expected/intvec_functions.out
Normal file
@@ -0,0 +1,92 @@
|
||||
SELECT l2_distance('[0,0]'::intvec, '[3,4]');
|
||||
l2_distance
|
||||
-------------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
SELECT l2_distance('[0,0]'::intvec, '[0,1]');
|
||||
l2_distance
|
||||
-------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT l2_distance('[1,2]'::intvec, '[3]');
|
||||
ERROR: different intvec dimensions 2 and 1
|
||||
SELECT '[0,0]'::intvec <-> '[3,4]';
|
||||
?column?
|
||||
----------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
SELECT inner_product('[1,2]'::intvec, '[3,4]');
|
||||
inner_product
|
||||
---------------
|
||||
11
|
||||
(1 row)
|
||||
|
||||
SELECT inner_product('[1,2]'::intvec, '[3]');
|
||||
ERROR: different intvec dimensions 2 and 1
|
||||
SELECT inner_product('[127]'::intvec, '[127]');
|
||||
inner_product
|
||||
---------------
|
||||
16129
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2]'::intvec <#> '[3,4]';
|
||||
?column?
|
||||
----------
|
||||
-11
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2]'::intvec, '[2,4]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2]'::intvec, '[0,0]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
NaN
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,1]'::intvec, '[1,1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,0]'::intvec, '[0,2]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,1]'::intvec, '[-1,-1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2]'::intvec, '[3]');
|
||||
ERROR: different intvec dimensions 2 and 1
|
||||
SELECT '[1,2]'::intvec <=> '[2,4]';
|
||||
?column?
|
||||
----------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT l1_distance('[0,0]'::intvec, '[3,4]');
|
||||
l1_distance
|
||||
-------------
|
||||
7
|
||||
(1 row)
|
||||
|
||||
SELECT l1_distance('[0,0]'::intvec, '[0,1]');
|
||||
l1_distance
|
||||
-------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT l1_distance('[1,2]'::intvec, '[3]');
|
||||
ERROR: different intvec dimensions 2 and 1
|
||||
119
test/expected/intvec_input.out
Normal file
119
test/expected/intvec_input.out
Normal file
@@ -0,0 +1,119 @@
|
||||
SELECT '[1,2,3]'::intvec;
|
||||
intvec
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[-1,-2,-3]'::intvec;
|
||||
intvec
|
||||
------------
|
||||
[-1,-2,-3]
|
||||
(1 row)
|
||||
|
||||
SELECT ' [ 1, 2 , 3 ] '::intvec;
|
||||
intvec
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1.23456]'::intvec;
|
||||
ERROR: invalid input syntax for type intvec: "[1.23456]"
|
||||
LINE 1: SELECT '[1.23456]'::intvec;
|
||||
^
|
||||
SELECT '[hello,1]'::intvec;
|
||||
ERROR: invalid input syntax for type intvec: "[hello,1]"
|
||||
LINE 1: SELECT '[hello,1]'::intvec;
|
||||
^
|
||||
SELECT '[127,-128]'::intvec;
|
||||
intvec
|
||||
------------
|
||||
[127,-128]
|
||||
(1 row)
|
||||
|
||||
SELECT '[128,-129]'::intvec;
|
||||
ERROR: value "128" is out of range for type intvec
|
||||
LINE 1: SELECT '[128,-129]'::intvec;
|
||||
^
|
||||
SELECT '[1,2,3'::intvec;
|
||||
ERROR: malformed intvec literal: "[1,2,3"
|
||||
LINE 1: SELECT '[1,2,3'::intvec;
|
||||
^
|
||||
DETAIL: Unexpected end of input.
|
||||
SELECT '[1,2,3]9'::intvec;
|
||||
ERROR: malformed intvec literal: "[1,2,3]9"
|
||||
LINE 1: SELECT '[1,2,3]9'::intvec;
|
||||
^
|
||||
DETAIL: Junk after closing right brace.
|
||||
SELECT '1,2,3'::intvec;
|
||||
ERROR: malformed intvec literal: "1,2,3"
|
||||
LINE 1: SELECT '1,2,3'::intvec;
|
||||
^
|
||||
DETAIL: Vector contents must start with "[".
|
||||
SELECT ''::intvec;
|
||||
ERROR: malformed intvec literal: ""
|
||||
LINE 1: SELECT ''::intvec;
|
||||
^
|
||||
DETAIL: Vector contents must start with "[".
|
||||
SELECT '['::intvec;
|
||||
ERROR: malformed intvec literal: "["
|
||||
LINE 1: SELECT '['::intvec;
|
||||
^
|
||||
DETAIL: Unexpected end of input.
|
||||
SELECT '[,'::intvec;
|
||||
ERROR: malformed intvec literal: "[,"
|
||||
LINE 1: SELECT '[,'::intvec;
|
||||
^
|
||||
DETAIL: Unexpected end of input.
|
||||
SELECT '[]'::intvec;
|
||||
ERROR: intvec must have at least 1 dimension
|
||||
LINE 1: SELECT '[]'::intvec;
|
||||
^
|
||||
SELECT '[1,]'::intvec;
|
||||
ERROR: invalid input syntax for type intvec: "[1,]"
|
||||
LINE 1: SELECT '[1,]'::intvec;
|
||||
^
|
||||
SELECT '[1a]'::intvec;
|
||||
ERROR: invalid input syntax for type intvec: "[1a]"
|
||||
LINE 1: SELECT '[1a]'::intvec;
|
||||
^
|
||||
SELECT '[1,,3]'::intvec;
|
||||
ERROR: malformed intvec literal: "[1,,3]"
|
||||
LINE 1: SELECT '[1,,3]'::intvec;
|
||||
^
|
||||
SELECT '[1, ,3]'::intvec;
|
||||
ERROR: invalid input syntax for type intvec: "[1, ,3]"
|
||||
LINE 1: SELECT '[1, ,3]'::intvec;
|
||||
^
|
||||
SELECT '[1,2,3]'::intvec(3);
|
||||
intvec
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::intvec(2);
|
||||
ERROR: expected 2 dimensions, not 3
|
||||
SELECT '[1,2,3]'::intvec(3, 2);
|
||||
ERROR: invalid type modifier
|
||||
LINE 1: SELECT '[1,2,3]'::intvec(3, 2);
|
||||
^
|
||||
SELECT '[1,2,3]'::intvec('a');
|
||||
ERROR: invalid input syntax for type integer: "a"
|
||||
LINE 1: SELECT '[1,2,3]'::intvec('a');
|
||||
^
|
||||
SELECT '[1,2,3]'::intvec(0);
|
||||
ERROR: dimensions for type intvec must be at least 1
|
||||
LINE 1: SELECT '[1,2,3]'::intvec(0);
|
||||
^
|
||||
SELECT '[1,2,3]'::intvec(16001);
|
||||
ERROR: dimensions for type intvec cannot exceed 16000
|
||||
LINE 1: SELECT '[1,2,3]'::intvec(16001);
|
||||
^
|
||||
SELECT unnest('{"[1,2,3]", "[4,5,6]"}'::intvec[]);
|
||||
unnest
|
||||
---------
|
||||
[1,2,3]
|
||||
[4,5,6]
|
||||
(2 rows)
|
||||
|
||||
SELECT '{"[1,2,3]"}'::intvec(2)[];
|
||||
ERROR: expected 2 dimensions, not 3
|
||||
@@ -1,7 +1,7 @@
|
||||
CREATE TABLE t (val vector(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE TABLE t (val vector(3), val2 intvec(3));
|
||||
INSERT INTO t (val, val2) VALUES ('[0,0,0]', '[0,0,0]'), ('[1,2,3]', '[1,2,3]'), ('[1,1,1]', '[1,1,1]'), (NULL, NULL);
|
||||
|
||||
CREATE TABLE t2 (val vector(3));
|
||||
CREATE TABLE t2 (val vector(3), val2 intvec(3));
|
||||
|
||||
\copy t TO 'results/data.bin' WITH (FORMAT binary)
|
||||
\copy t2 FROM 'results/data.bin' WITH (FORMAT binary)
|
||||
|
||||
@@ -24,29 +24,29 @@ SELECT vector_norm('[3,4]');
|
||||
SELECT vector_norm('[0,1]');
|
||||
SELECT vector_norm('[3e37,4e37]')::real;
|
||||
|
||||
SELECT l2_distance('[0,0]', '[3,4]');
|
||||
SELECT l2_distance('[0,0]', '[0,1]');
|
||||
SELECT l2_distance('[1,2]', '[3]');
|
||||
SELECT l2_distance('[3e38]', '[-3e38]');
|
||||
SELECT l2_distance('[0,0]'::vector, '[3,4]');
|
||||
SELECT l2_distance('[0,0]'::vector, '[0,1]');
|
||||
SELECT l2_distance('[1,2]'::vector, '[3]');
|
||||
SELECT l2_distance('[3e38]'::vector, '[-3e38]');
|
||||
|
||||
SELECT inner_product('[1,2]', '[3,4]');
|
||||
SELECT inner_product('[1,2]', '[3]');
|
||||
SELECT inner_product('[3e38]', '[3e38]');
|
||||
SELECT inner_product('[1,2]'::vector, '[3,4]');
|
||||
SELECT inner_product('[1,2]'::vector, '[3]');
|
||||
SELECT inner_product('[3e38]'::vector, '[3e38]');
|
||||
|
||||
SELECT cosine_distance('[1,2]', '[2,4]');
|
||||
SELECT cosine_distance('[1,2]', '[0,0]');
|
||||
SELECT cosine_distance('[1,1]', '[1,1]');
|
||||
SELECT cosine_distance('[1,0]', '[0,2]');
|
||||
SELECT cosine_distance('[1,1]', '[-1,-1]');
|
||||
SELECT cosine_distance('[1,2]', '[3]');
|
||||
SELECT cosine_distance('[1,1]', '[1.1,1.1]');
|
||||
SELECT cosine_distance('[1,1]', '[-1.1,-1.1]');
|
||||
SELECT cosine_distance('[3e38]', '[3e38]');
|
||||
SELECT cosine_distance('[1,2]'::vector, '[2,4]');
|
||||
SELECT cosine_distance('[1,2]'::vector, '[0,0]');
|
||||
SELECT cosine_distance('[1,1]'::vector, '[1,1]');
|
||||
SELECT cosine_distance('[1,0]'::vector, '[0,2]');
|
||||
SELECT cosine_distance('[1,1]'::vector, '[-1,-1]');
|
||||
SELECT cosine_distance('[1,2]'::vector, '[3]');
|
||||
SELECT cosine_distance('[1,1]'::vector, '[1.1,1.1]');
|
||||
SELECT cosine_distance('[1,1]'::vector, '[-1.1,-1.1]');
|
||||
SELECT cosine_distance('[3e38]'::vector, '[3e38]');
|
||||
|
||||
SELECT l1_distance('[0,0]', '[3,4]');
|
||||
SELECT l1_distance('[0,0]', '[0,1]');
|
||||
SELECT l1_distance('[1,2]', '[3]');
|
||||
SELECT l1_distance('[3e38]', '[-3e38]');
|
||||
SELECT l1_distance('[0,0]'::vector, '[3,4]');
|
||||
SELECT l1_distance('[0,0]'::vector, '[0,1]');
|
||||
SELECT l1_distance('[1,2]'::vector, '[3]');
|
||||
SELECT l1_distance('[3e38]'::vector, '[-3e38]');
|
||||
|
||||
SELECT avg(v) FROM unnest(ARRAY['[1,2,3]'::vector, '[3,5,7]']) v;
|
||||
SELECT avg(v) FROM unnest(ARRAY['[1,2,3]'::vector, '[3,5,7]', NULL]) v;
|
||||
|
||||
13
test/sql/hnsw_intvec_cosine.sql
Normal file
13
test/sql/hnsw_intvec_cosine.sql
Normal file
@@ -0,0 +1,13 @@
|
||||
SET enable_seqscan = off;
|
||||
|
||||
CREATE TABLE t (val intvec(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val intvec_cosine_ops);
|
||||
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
|
||||
SELECT * FROM t ORDER BY val <=> '[3,3,3]';
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2;
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::intvec)) t2;
|
||||
|
||||
DROP TABLE t;
|
||||
12
test/sql/hnsw_intvec_ip.sql
Normal file
12
test/sql/hnsw_intvec_ip.sql
Normal file
@@ -0,0 +1,12 @@
|
||||
SET enable_seqscan = off;
|
||||
|
||||
CREATE TABLE t (val intvec(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val intvec_ip_ops);
|
||||
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
|
||||
SELECT * FROM t ORDER BY val <#> '[3,3,3]';
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::intvec)) t2;
|
||||
|
||||
DROP TABLE t;
|
||||
16
test/sql/hnsw_intvec_l2.sql
Normal file
16
test/sql/hnsw_intvec_l2.sql
Normal file
@@ -0,0 +1,16 @@
|
||||
SET enable_seqscan = off;
|
||||
|
||||
CREATE TABLE t (val intvec(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val intvec_l2_ops);
|
||||
|
||||
INSERT INTO t (val) VALUES ('[1,2,4]');
|
||||
|
||||
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::intvec)) t2;
|
||||
SELECT COUNT(*) FROM t;
|
||||
|
||||
TRUNCATE t;
|
||||
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
|
||||
|
||||
DROP TABLE t;
|
||||
21
test/sql/intvec_functions.sql
Normal file
21
test/sql/intvec_functions.sql
Normal file
@@ -0,0 +1,21 @@
|
||||
SELECT l2_distance('[0,0]'::intvec, '[3,4]');
|
||||
SELECT l2_distance('[0,0]'::intvec, '[0,1]');
|
||||
SELECT l2_distance('[1,2]'::intvec, '[3]');
|
||||
SELECT '[0,0]'::intvec <-> '[3,4]';
|
||||
|
||||
SELECT inner_product('[1,2]'::intvec, '[3,4]');
|
||||
SELECT inner_product('[1,2]'::intvec, '[3]');
|
||||
SELECT inner_product('[127]'::intvec, '[127]');
|
||||
SELECT '[1,2]'::intvec <#> '[3,4]';
|
||||
|
||||
SELECT cosine_distance('[1,2]'::intvec, '[2,4]');
|
||||
SELECT cosine_distance('[1,2]'::intvec, '[0,0]');
|
||||
SELECT cosine_distance('[1,1]'::intvec, '[1,1]');
|
||||
SELECT cosine_distance('[1,0]'::intvec, '[0,2]');
|
||||
SELECT cosine_distance('[1,1]'::intvec, '[-1,-1]');
|
||||
SELECT cosine_distance('[1,2]'::intvec, '[3]');
|
||||
SELECT '[1,2]'::intvec <=> '[2,4]';
|
||||
|
||||
SELECT l1_distance('[0,0]'::intvec, '[3,4]');
|
||||
SELECT l1_distance('[0,0]'::intvec, '[0,1]');
|
||||
SELECT l1_distance('[1,2]'::intvec, '[3]');
|
||||
28
test/sql/intvec_input.sql
Normal file
28
test/sql/intvec_input.sql
Normal file
@@ -0,0 +1,28 @@
|
||||
SELECT '[1,2,3]'::intvec;
|
||||
SELECT '[-1,-2,-3]'::intvec;
|
||||
SELECT ' [ 1, 2 , 3 ] '::intvec;
|
||||
SELECT '[1.23456]'::intvec;
|
||||
SELECT '[hello,1]'::intvec;
|
||||
SELECT '[127,-128]'::intvec;
|
||||
SELECT '[128,-129]'::intvec;
|
||||
SELECT '[1,2,3'::intvec;
|
||||
SELECT '[1,2,3]9'::intvec;
|
||||
SELECT '1,2,3'::intvec;
|
||||
SELECT ''::intvec;
|
||||
SELECT '['::intvec;
|
||||
SELECT '[,'::intvec;
|
||||
SELECT '[]'::intvec;
|
||||
SELECT '[1,]'::intvec;
|
||||
SELECT '[1a]'::intvec;
|
||||
SELECT '[1,,3]'::intvec;
|
||||
SELECT '[1, ,3]'::intvec;
|
||||
|
||||
SELECT '[1,2,3]'::intvec(3);
|
||||
SELECT '[1,2,3]'::intvec(2);
|
||||
SELECT '[1,2,3]'::intvec(3, 2);
|
||||
SELECT '[1,2,3]'::intvec('a');
|
||||
SELECT '[1,2,3]'::intvec(0);
|
||||
SELECT '[1,2,3]'::intvec(16001);
|
||||
|
||||
SELECT unnest('{"[1,2,3]", "[4,5,6]"}'::intvec[]);
|
||||
SELECT '{"[1,2,3]"}'::intvec(2)[];
|
||||
132
test/t/020_hnsw_intvec_build_recall.pl
Normal file
132
test/t/020_hnsw_intvec_build_recall.pl
Normal file
@@ -0,0 +1,132 @@
|
||||
use strict;
|
||||
use warnings;
|
||||
use PostgresNode;
|
||||
use TestLib;
|
||||
use Test::More;
|
||||
|
||||
my $node;
|
||||
my @queries = ();
|
||||
my @expected;
|
||||
my $limit = 20;
|
||||
my $dim = 20;
|
||||
my $array_sql = join(",", ('(random() * 255)::int - 128') x $dim);
|
||||
|
||||
sub test_recall
|
||||
{
|
||||
my ($min, $operator) = @_;
|
||||
my $correct = 0;
|
||||
my $total = 0;
|
||||
|
||||
my $explain = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
EXPLAIN ANALYZE SELECT i FROM tst ORDER BY v $operator '$queries[0]' LIMIT $limit;
|
||||
));
|
||||
like($explain, qr/Index Scan/);
|
||||
|
||||
for my $i (0 .. $#queries)
|
||||
{
|
||||
my $actual = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
SELECT i FROM tst ORDER BY v $operator '$queries[$i]' LIMIT $limit;
|
||||
));
|
||||
my @actual_ids = split("\n", $actual);
|
||||
my %actual_set = map { $_ => 1 } @actual_ids;
|
||||
|
||||
my @expected_ids = split("\n", $expected[$i]);
|
||||
|
||||
foreach (@expected_ids)
|
||||
{
|
||||
if (exists($actual_set{$_}))
|
||||
{
|
||||
$correct++;
|
||||
}
|
||||
$total++;
|
||||
}
|
||||
}
|
||||
|
||||
cmp_ok($correct / $total, ">=", $min, $operator);
|
||||
}
|
||||
|
||||
# Initialize node
|
||||
$node = get_new_node('node');
|
||||
$node->init;
|
||||
$node->start;
|
||||
|
||||
# Create table
|
||||
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v intvec($dim));");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, 10000) i;"
|
||||
);
|
||||
|
||||
# Generate queries
|
||||
for (1 .. 20)
|
||||
{
|
||||
my @r = ();
|
||||
for (1 .. $dim)
|
||||
{
|
||||
push(@r, int(rand(256)) - 128);
|
||||
}
|
||||
push(@queries, "[" . join(",", @r) . "]");
|
||||
}
|
||||
|
||||
# Check each index type
|
||||
my @operators = ("<->", "<#>", "<=>");
|
||||
my @opclasses = ("intvec_l2_ops", "intvec_ip_ops", "intvec_cosine_ops");
|
||||
|
||||
for my $i (0 .. $#operators)
|
||||
{
|
||||
my $operator = $operators[$i];
|
||||
my $opclass = $opclasses[$i];
|
||||
|
||||
# Get exact results
|
||||
@expected = ();
|
||||
foreach (@queries)
|
||||
{
|
||||
my $res = $node->safe_psql("postgres", "SELECT i FROM tst ORDER BY v $operator '$_' LIMIT $limit;");
|
||||
push(@expected, $res);
|
||||
}
|
||||
|
||||
# Build index serially
|
||||
$node->safe_psql("postgres", qq(
|
||||
SET max_parallel_maintenance_workers = 0;
|
||||
CREATE INDEX idx ON tst USING hnsw (v $opclass);
|
||||
));
|
||||
|
||||
# Test approximate results
|
||||
my $min = 0.99;
|
||||
test_recall($min, $operator);
|
||||
|
||||
$node->safe_psql("postgres", "DROP INDEX idx;");
|
||||
|
||||
# Build index in parallel in memory
|
||||
my ($ret, $stdout, $stderr) = $node->psql("postgres", qq(
|
||||
SET client_min_messages = DEBUG;
|
||||
SET min_parallel_table_scan_size = 1;
|
||||
CREATE INDEX idx ON tst USING hnsw (v $opclass);
|
||||
));
|
||||
is($ret, 0, $stderr);
|
||||
like($stderr, qr/using \d+ parallel workers/);
|
||||
|
||||
# Test approximate results
|
||||
test_recall($min, $operator);
|
||||
|
||||
$node->safe_psql("postgres", "DROP INDEX idx;");
|
||||
|
||||
# Build index in parallel on disk
|
||||
# Set parallel_workers on table to use workers with low maintenance_work_mem
|
||||
($ret, $stdout, $stderr) = $node->psql("postgres", qq(
|
||||
ALTER TABLE tst SET (parallel_workers = 2);
|
||||
SET client_min_messages = DEBUG;
|
||||
SET maintenance_work_mem = '4MB';
|
||||
CREATE INDEX idx ON tst USING hnsw (v $opclass);
|
||||
ALTER TABLE tst RESET (parallel_workers);
|
||||
));
|
||||
is($ret, 0, $stderr);
|
||||
like($stderr, qr/using \d+ parallel workers/);
|
||||
like($stderr, qr/hnsw graph no longer fits into maintenance_work_mem/);
|
||||
|
||||
$node->safe_psql("postgres", "DROP INDEX idx;");
|
||||
}
|
||||
|
||||
done_testing();
|
||||
Reference in New Issue
Block a user