diff --git a/README.md b/README.md index 53e727f..ef5cf2b 100644 --- a/README.md +++ b/README.md @@ -501,19 +501,21 @@ SELECT * FROM ( Use the `sparsevec` type to store sparse vectors ```sql -CREATE TABLE items (id bigserial PRIMARY KEY, embedding sparsevec(10)); +CREATE TABLE items (id bigserial PRIMARY KEY, embedding sparsevec(5)); ``` Insert vectors ```sql -INSERT INTO items (embedding) VALUES ('{0:1,1:2,2:3}/10'), ('{0:4,1:5,2:6}/10'); +INSERT INTO items (embedding) VALUES ('{1:1,3:2,5:3}/5'), ('{1:4,3:5,5:6}/5'); ``` +Note: Indices start at 1 like SQL arrays + Get the nearest neighbors by L2 distance ```sql -SELECT * FROM items ORDER BY embedding <-> '{0:3,1:1,2:2}/10' LIMIT 5; +SELECT * FROM items ORDER BY embedding <-> '{1:3,3:1,5:2}/5' LIMIT 5; ``` ## Hybrid Search diff --git a/src/sparsevec.c b/src/sparsevec.c index 6e48f91..33551a7 100644 --- a/src/sparsevec.c +++ b/src/sparsevec.c @@ -89,15 +89,15 @@ CheckIndex(int32 *indices, int i, int dim) { int32 index = indices[i]; - if (index < 0) + if (index < 1) ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), - errmsg("index must not be negative"))); + errmsg("index must be greater than zero"))); - if (index >= dim) + if (index > dim) ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), - errmsg("index must be less than dimensions"))); + errmsg("index must be less than or equal to dimensions"))); if (i > 0) { @@ -245,7 +245,7 @@ sparsevec_in(PG_FUNCTION_ARGS) (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("invalid input syntax for type sparsevec: \"%s\"", lit))); - if (errno == ERANGE || index < 0 || index > INT_MAX) + if (errno == ERANGE || index < 1 || index > INT_MAX) ereport(ERROR, (errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE), errmsg("index \"%ld\" is out of range for type sparsevec", index))); diff --git a/src/vector.c b/src/vector.c index 1f5ec89..31e7386 100644 --- a/src/vector.c +++ b/src/vector.c @@ -1244,7 +1244,7 @@ sparsevec_to_vector(PG_FUNCTION_ARGS) result = InitVector(dim); for (int i = 0; i < svec->nnz; i++) - result->x[svec->indices[i]] = values[i]; + result->x[svec->indices[i] - 1] = values[i]; PG_RETURN_POINTER(result); } diff --git a/test/expected/hnsw_sparsevec_cosine.out b/test/expected/hnsw_sparsevec_cosine.out index 778415e..8645580 100644 --- a/test/expected/hnsw_sparsevec_cosine.out +++ b/test/expected/hnsw_sparsevec_cosine.out @@ -1,14 +1,14 @@ SET enable_seqscan = off; CREATE TABLE t (val sparsevec(3)); -INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL); +INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL); CREATE INDEX ON t USING hnsw (val sparsevec_cosine_ops); -INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3'); -SELECT * FROM t ORDER BY val <=> '{0:3,1:3,2:3}/3'; +INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3'); +SELECT * FROM t ORDER BY val <=> '{1:3,2:3,3:3}/3'; val ----------------- - {0:1,1:1,2:1}/3 - {0:1,1:2,2:3}/3 - {0:1,1:2,2:4}/3 + {1:1,2:1,3:1}/3 + {1:1,2:2,3:3}/3 + {1:1,2:2,3:4}/3 (3 rows) SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '{}/3') t2; diff --git a/test/expected/hnsw_sparsevec_ip.out b/test/expected/hnsw_sparsevec_ip.out index 1c303f0..bfc212b 100644 --- a/test/expected/hnsw_sparsevec_ip.out +++ b/test/expected/hnsw_sparsevec_ip.out @@ -1,14 +1,14 @@ SET enable_seqscan = off; CREATE TABLE t (val sparsevec(3)); -INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL); +INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL); CREATE INDEX ON t USING hnsw (val sparsevec_ip_ops); -INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3'); -SELECT * FROM t ORDER BY val <#> '{0:3,1:3,2:3}/3'; +INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3'); +SELECT * FROM t ORDER BY val <#> '{1:3,2:3,3:3}/3'; val ----------------- - {0:1,1:2,2:4}/3 - {0:1,1:2,2:3}/3 - {0:1,1:1,2:1}/3 + {1:1,2:2,3:4}/3 + {1:1,2:2,3:3}/3 + {1:1,2:1,3:1}/3 {}/3 (4 rows) diff --git a/test/expected/hnsw_sparsevec_l2.out b/test/expected/hnsw_sparsevec_l2.out index adc5cfd..f474871 100644 --- a/test/expected/hnsw_sparsevec_l2.out +++ b/test/expected/hnsw_sparsevec_l2.out @@ -1,14 +1,14 @@ SET enable_seqscan = off; CREATE TABLE t (val sparsevec(3)); -INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL); +INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL); CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops); -INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3'); -SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3'; +INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3'); +SELECT * FROM t ORDER BY val <-> '{1:3,2:3,3:3}/3'; val ----------------- - {0:1,1:2,2:3}/3 - {0:1,1:2,2:4}/3 - {0:1,1:1,2:1}/3 + {1:1,2:2,3:3}/3 + {1:1,2:2,3:4}/3 + {1:1,2:1,3:1}/3 {}/3 (4 rows) @@ -25,7 +25,7 @@ SELECT COUNT(*) FROM t; (1 row) TRUNCATE t; -SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3'; +SELECT * FROM t ORDER BY val <-> '{1:3,2:3,3:3}/3'; val ----- (0 rows) diff --git a/test/expected/sparsevec_functions.out b/test/expected/sparsevec_functions.out index 07117d8..3633060 100644 --- a/test/expected/sparsevec_functions.out +++ b/test/expected/sparsevec_functions.out @@ -1,52 +1,52 @@ -SELECT l2_distance('{}/2'::sparsevec, '{0:3,1:4}/2'); +SELECT l2_distance('{}/2'::sparsevec, '{1:3,2:4}/2'); l2_distance ------------- 5 (1 row) -SELECT l2_distance('{}/2'::sparsevec, '{1:1}/2'); +SELECT l2_distance('{}/2'::sparsevec, '{2:1}/2'); l2_distance ------------- 1 (1 row) -SELECT '{}/2'::sparsevec <-> '{0:3,1:4}/2'; +SELECT '{}/2'::sparsevec <-> '{1:3,2:4}/2'; ?column? ---------- 5 (1 row) -SELECT inner_product('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2'); +SELECT inner_product('{1:1,2:2}/2'::sparsevec, '{1:2,2:4}/2'); inner_product --------------- 10 (1 row) -SELECT sparsevec_negative_inner_product('{0:1,1:2}/2', '{0:2,1:4}/2'); +SELECT sparsevec_negative_inner_product('{1:1,2:2}/2', '{1:2,2:4}/2'); sparsevec_negative_inner_product ---------------------------------- -10 (1 row) -SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2'); +SELECT cosine_distance('{1:1,2:2}/2'::sparsevec, '{1:2,2:4}/2'); cosine_distance ----------------- 0 (1 row) -SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{}/2'); +SELECT cosine_distance('{1:1,2:2}/2'::sparsevec, '{}/2'); cosine_distance ----------------- NaN (1 row) -SELECT cosine_distance('{0:1,1:1}/2'::sparsevec, '{0:-1,1:-1}/2'); +SELECT cosine_distance('{1:1,2:1}/2'::sparsevec, '{1:-1,2:-1}/2'); cosine_distance ----------------- 2 (1 row) -SELECT cosine_distance('{0:1}/2'::sparsevec, '{1:2}/2'); +SELECT cosine_distance('{1:2}/2'::sparsevec, '{2:2}/2'); cosine_distance ----------------- 1 @@ -58,5 +58,5 @@ SELECT cosine_distance('{}/1'::sparsevec, '{}/1'); NaN (1 row) -SELECT cosine_distance('{0:1}/2'::sparsevec, '{0:1}/3'); +SELECT cosine_distance('{1:2}/2'::sparsevec, '{1:1}/3'); ERROR: different sparsevec dimensions 2 and 3 diff --git a/test/expected/sparsevec_input.out b/test/expected/sparsevec_input.out index bd2faf5..c25b852 100644 --- a/test/expected/sparsevec_input.out +++ b/test/expected/sparsevec_input.out @@ -1,22 +1,22 @@ -SELECT '{0:1.5,2:3.5}/5'::sparsevec; +SELECT '{1:1.5,3:3.5}/5'::sparsevec; sparsevec ----------------- - {0:1.5,2:3.5}/5 + {1:1.5,3:3.5}/5 (1 row) -SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector; +SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector; vector ----------------- [1.5,0,3.5,0,0] (1 row) -SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(5); +SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector(5); vector ----------------- [1.5,0,3.5,0,0] (1 row) -SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(4); +SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector(4); ERROR: expected 4 dimensions, not 5 SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec; sparsevec @@ -24,15 +24,15 @@ SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec; {1:1.5,3:3.5}/5 (1 row) -SELECT '{0:0,1:1,2:0}/3'::sparsevec; +SELECT '{1:0,2:1,3:0}/3'::sparsevec; sparsevec ----------- - {1:1}/3 + {2:1}/3 (1 row) -SELECT '{1:1,0:1}/2'::sparsevec; +SELECT '{2:1,1:1}/2'::sparsevec; ERROR: indexes must be in ascending order -LINE 1: SELECT '{1:1,0:1}/2'::sparsevec; +LINE 1: SELECT '{2:1,1:1}/2'::sparsevec; ^ SELECT '{}/5'::sparsevec; sparsevec @@ -50,13 +50,13 @@ LINE 1: SELECT '{}/100001'::sparsevec; ^ SELECT '{}/16001'::sparsevec::vector; ERROR: vector cannot have more than 16000 dimensions -SELECT '{-1:1}/1'::sparsevec; -ERROR: index "-1" is out of range for type sparsevec -LINE 1: SELECT '{-1:1}/1'::sparsevec; +SELECT '{0:1}/1'::sparsevec; +ERROR: index "0" is out of range for type sparsevec +LINE 1: SELECT '{0:1}/1'::sparsevec; ^ -SELECT '{1:1}/1'::sparsevec; -ERROR: index must be less than dimensions -LINE 1: SELECT '{1:1}/1'::sparsevec; +SELECT '{2:1}/1'::sparsevec; +ERROR: index must be less than or equal to dimensions +LINE 1: SELECT '{2:1}/1'::sparsevec; ^ SELECT '{}/1'::sparsevec(2); ERROR: expected 2 dimensions, not 1 diff --git a/test/sql/hnsw_sparsevec_cosine.sql b/test/sql/hnsw_sparsevec_cosine.sql index 685423c..2ba1c1e 100644 --- a/test/sql/hnsw_sparsevec_cosine.sql +++ b/test/sql/hnsw_sparsevec_cosine.sql @@ -1,12 +1,12 @@ SET enable_seqscan = off; CREATE TABLE t (val sparsevec(3)); -INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL); +INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL); CREATE INDEX ON t USING hnsw (val sparsevec_cosine_ops); -INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3'); +INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3'); -SELECT * FROM t ORDER BY val <=> '{0:3,1:3,2:3}/3'; +SELECT * FROM t ORDER BY val <=> '{1:3,2:3,3:3}/3'; SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '{}/3') t2; SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::sparsevec)) t2; diff --git a/test/sql/hnsw_sparsevec_ip.sql b/test/sql/hnsw_sparsevec_ip.sql index 1888d9c..1fc8449 100644 --- a/test/sql/hnsw_sparsevec_ip.sql +++ b/test/sql/hnsw_sparsevec_ip.sql @@ -1,12 +1,12 @@ SET enable_seqscan = off; CREATE TABLE t (val sparsevec(3)); -INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL); +INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL); CREATE INDEX ON t USING hnsw (val sparsevec_ip_ops); -INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3'); +INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3'); -SELECT * FROM t ORDER BY val <#> '{0:3,1:3,2:3}/3'; +SELECT * FROM t ORDER BY val <#> '{1:3,2:3,3:3}/3'; SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::sparsevec)) t2; DROP TABLE t; diff --git a/test/sql/hnsw_sparsevec_l2.sql b/test/sql/hnsw_sparsevec_l2.sql index b472607..9163c47 100644 --- a/test/sql/hnsw_sparsevec_l2.sql +++ b/test/sql/hnsw_sparsevec_l2.sql @@ -1,17 +1,17 @@ SET enable_seqscan = off; CREATE TABLE t (val sparsevec(3)); -INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL); +INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL); CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops); -INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3'); +INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3'); -SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3'; +SELECT * FROM t ORDER BY val <-> '{1:3,2:3,3:3}/3'; SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::sparsevec)) t2; SELECT COUNT(*) FROM t; TRUNCATE t; -SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3'; +SELECT * FROM t ORDER BY val <-> '{1:3,2:3,3:3}/3'; DROP TABLE t; diff --git a/test/sql/sparsevec_functions.sql b/test/sql/sparsevec_functions.sql index 86f7990..57eca71 100644 --- a/test/sql/sparsevec_functions.sql +++ b/test/sql/sparsevec_functions.sql @@ -1,13 +1,13 @@ -SELECT l2_distance('{}/2'::sparsevec, '{0:3,1:4}/2'); -SELECT l2_distance('{}/2'::sparsevec, '{1:1}/2'); -SELECT '{}/2'::sparsevec <-> '{0:3,1:4}/2'; +SELECT l2_distance('{}/2'::sparsevec, '{1:3,2:4}/2'); +SELECT l2_distance('{}/2'::sparsevec, '{2:1}/2'); +SELECT '{}/2'::sparsevec <-> '{1:3,2:4}/2'; -SELECT inner_product('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2'); -SELECT sparsevec_negative_inner_product('{0:1,1:2}/2', '{0:2,1:4}/2'); +SELECT inner_product('{1:1,2:2}/2'::sparsevec, '{1:2,2:4}/2'); +SELECT sparsevec_negative_inner_product('{1:1,2:2}/2', '{1:2,2:4}/2'); -SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2'); -SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{}/2'); -SELECT cosine_distance('{0:1,1:1}/2'::sparsevec, '{0:-1,1:-1}/2'); -SELECT cosine_distance('{0:1}/2'::sparsevec, '{1:2}/2'); +SELECT cosine_distance('{1:1,2:2}/2'::sparsevec, '{1:2,2:4}/2'); +SELECT cosine_distance('{1:1,2:2}/2'::sparsevec, '{}/2'); +SELECT cosine_distance('{1:1,2:1}/2'::sparsevec, '{1:-1,2:-1}/2'); +SELECT cosine_distance('{1:2}/2'::sparsevec, '{2:2}/2'); SELECT cosine_distance('{}/1'::sparsevec, '{}/1'); -SELECT cosine_distance('{0:1}/2'::sparsevec, '{0:1}/3'); +SELECT cosine_distance('{1:2}/2'::sparsevec, '{1:1}/3'); diff --git a/test/sql/sparsevec_input.sql b/test/sql/sparsevec_input.sql index 1fdfd88..757afc0 100644 --- a/test/sql/sparsevec_input.sql +++ b/test/sql/sparsevec_input.sql @@ -1,19 +1,19 @@ -SELECT '{0:1.5,2:3.5}/5'::sparsevec; -SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector; -SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(5); -SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(4); +SELECT '{1:1.5,3:3.5}/5'::sparsevec; +SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector; +SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector(5); +SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector(4); SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec; -SELECT '{0:0,1:1,2:0}/3'::sparsevec; +SELECT '{1:0,2:1,3:0}/3'::sparsevec; -SELECT '{1:1,0:1}/2'::sparsevec; +SELECT '{2:1,1:1}/2'::sparsevec; SELECT '{}/5'::sparsevec; SELECT '{}/-1'::sparsevec; SELECT '{}/100001'::sparsevec; SELECT '{}/16001'::sparsevec::vector; -SELECT '{-1:1}/1'::sparsevec; -SELECT '{1:1}/1'::sparsevec; +SELECT '{0:1}/1'::sparsevec; +SELECT '{2:1}/1'::sparsevec; SELECT '{}/1'::sparsevec(2);