Changed indices to start at 1 for sparse vectors to match SQL

This commit is contained in:
Andrew Kane
2024-04-06 14:02:07 -07:00
parent aec853dc68
commit 4450029bdc
13 changed files with 83 additions and 81 deletions

View File

@@ -501,19 +501,21 @@ SELECT * FROM (
Use the `sparsevec` type to store sparse vectors
```sql
CREATE TABLE items (id bigserial PRIMARY KEY, embedding sparsevec(10));
CREATE TABLE items (id bigserial PRIMARY KEY, embedding sparsevec(5));
```
Insert vectors
```sql
INSERT INTO items (embedding) VALUES ('{0:1,1:2,2:3}/10'), ('{0:4,1:5,2:6}/10');
INSERT INTO items (embedding) VALUES ('{1:1,3:2,5:3}/5'), ('{1:4,3:5,5:6}/5');
```
Note: Indices start at 1 like SQL arrays
Get the nearest neighbors by L2 distance
```sql
SELECT * FROM items ORDER BY embedding <-> '{0:3,1:1,2:2}/10' LIMIT 5;
SELECT * FROM items ORDER BY embedding <-> '{1:3,3:1,5:2}/5' LIMIT 5;
```
## Hybrid Search

View File

@@ -89,15 +89,15 @@ CheckIndex(int32 *indices, int i, int dim)
{
int32 index = indices[i];
if (index < 0)
if (index < 1)
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("index must not be negative")));
errmsg("index must be greater than zero")));
if (index >= dim)
if (index > dim)
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("index must be less than dimensions")));
errmsg("index must be less than or equal to dimensions")));
if (i > 0)
{
@@ -245,7 +245,7 @@ sparsevec_in(PG_FUNCTION_ARGS)
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type sparsevec: \"%s\"", lit)));
if (errno == ERANGE || index < 0 || index > INT_MAX)
if (errno == ERANGE || index < 1 || index > INT_MAX)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("index \"%ld\" is out of range for type sparsevec", index)));

View File

@@ -1244,7 +1244,7 @@ sparsevec_to_vector(PG_FUNCTION_ARGS)
result = InitVector(dim);
for (int i = 0; i < svec->nnz; i++)
result->x[svec->indices[i]] = values[i];
result->x[svec->indices[i] - 1] = values[i];
PG_RETURN_POINTER(result);
}

View File

@@ -1,14 +1,14 @@
SET enable_seqscan = off;
CREATE TABLE t (val sparsevec(3));
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL);
CREATE INDEX ON t USING hnsw (val sparsevec_cosine_ops);
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
SELECT * FROM t ORDER BY val <=> '{0:3,1:3,2:3}/3';
INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3');
SELECT * FROM t ORDER BY val <=> '{1:3,2:3,3:3}/3';
val
-----------------
{0:1,1:1,2:1}/3
{0:1,1:2,2:3}/3
{0:1,1:2,2:4}/3
{1:1,2:1,3:1}/3
{1:1,2:2,3:3}/3
{1:1,2:2,3:4}/3
(3 rows)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '{}/3') t2;

View File

@@ -1,14 +1,14 @@
SET enable_seqscan = off;
CREATE TABLE t (val sparsevec(3));
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL);
CREATE INDEX ON t USING hnsw (val sparsevec_ip_ops);
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
SELECT * FROM t ORDER BY val <#> '{0:3,1:3,2:3}/3';
INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3');
SELECT * FROM t ORDER BY val <#> '{1:3,2:3,3:3}/3';
val
-----------------
{0:1,1:2,2:4}/3
{0:1,1:2,2:3}/3
{0:1,1:1,2:1}/3
{1:1,2:2,3:4}/3
{1:1,2:2,3:3}/3
{1:1,2:1,3:1}/3
{}/3
(4 rows)

View File

@@ -1,14 +1,14 @@
SET enable_seqscan = off;
CREATE TABLE t (val sparsevec(3));
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL);
CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops);
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3';
INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3');
SELECT * FROM t ORDER BY val <-> '{1:3,2:3,3:3}/3';
val
-----------------
{0:1,1:2,2:3}/3
{0:1,1:2,2:4}/3
{0:1,1:1,2:1}/3
{1:1,2:2,3:3}/3
{1:1,2:2,3:4}/3
{1:1,2:1,3:1}/3
{}/3
(4 rows)
@@ -25,7 +25,7 @@ SELECT COUNT(*) FROM t;
(1 row)
TRUNCATE t;
SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3';
SELECT * FROM t ORDER BY val <-> '{1:3,2:3,3:3}/3';
val
-----
(0 rows)

View File

@@ -1,52 +1,52 @@
SELECT l2_distance('{}/2'::sparsevec, '{0:3,1:4}/2');
SELECT l2_distance('{}/2'::sparsevec, '{1:3,2:4}/2');
l2_distance
-------------
5
(1 row)
SELECT l2_distance('{}/2'::sparsevec, '{1:1}/2');
SELECT l2_distance('{}/2'::sparsevec, '{2:1}/2');
l2_distance
-------------
1
(1 row)
SELECT '{}/2'::sparsevec <-> '{0:3,1:4}/2';
SELECT '{}/2'::sparsevec <-> '{1:3,2:4}/2';
?column?
----------
5
(1 row)
SELECT inner_product('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2');
SELECT inner_product('{1:1,2:2}/2'::sparsevec, '{1:2,2:4}/2');
inner_product
---------------
10
(1 row)
SELECT sparsevec_negative_inner_product('{0:1,1:2}/2', '{0:2,1:4}/2');
SELECT sparsevec_negative_inner_product('{1:1,2:2}/2', '{1:2,2:4}/2');
sparsevec_negative_inner_product
----------------------------------
-10
(1 row)
SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2');
SELECT cosine_distance('{1:1,2:2}/2'::sparsevec, '{1:2,2:4}/2');
cosine_distance
-----------------
0
(1 row)
SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{}/2');
SELECT cosine_distance('{1:1,2:2}/2'::sparsevec, '{}/2');
cosine_distance
-----------------
NaN
(1 row)
SELECT cosine_distance('{0:1,1:1}/2'::sparsevec, '{0:-1,1:-1}/2');
SELECT cosine_distance('{1:1,2:1}/2'::sparsevec, '{1:-1,2:-1}/2');
cosine_distance
-----------------
2
(1 row)
SELECT cosine_distance('{0:1}/2'::sparsevec, '{1:2}/2');
SELECT cosine_distance('{1:2}/2'::sparsevec, '{2:2}/2');
cosine_distance
-----------------
1
@@ -58,5 +58,5 @@ SELECT cosine_distance('{}/1'::sparsevec, '{}/1');
NaN
(1 row)
SELECT cosine_distance('{0:1}/2'::sparsevec, '{0:1}/3');
SELECT cosine_distance('{1:2}/2'::sparsevec, '{1:1}/3');
ERROR: different sparsevec dimensions 2 and 3

View File

@@ -1,22 +1,22 @@
SELECT '{0:1.5,2:3.5}/5'::sparsevec;
SELECT '{1:1.5,3:3.5}/5'::sparsevec;
sparsevec
-----------------
{0:1.5,2:3.5}/5
{1:1.5,3:3.5}/5
(1 row)
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector;
SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector;
vector
-----------------
[1.5,0,3.5,0,0]
(1 row)
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(5);
SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector(5);
vector
-----------------
[1.5,0,3.5,0,0]
(1 row)
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(4);
SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector(4);
ERROR: expected 4 dimensions, not 5
SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec;
sparsevec
@@ -24,15 +24,15 @@ SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec;
{1:1.5,3:3.5}/5
(1 row)
SELECT '{0:0,1:1,2:0}/3'::sparsevec;
SELECT '{1:0,2:1,3:0}/3'::sparsevec;
sparsevec
-----------
{1:1}/3
{2:1}/3
(1 row)
SELECT '{1:1,0:1}/2'::sparsevec;
SELECT '{2:1,1:1}/2'::sparsevec;
ERROR: indexes must be in ascending order
LINE 1: SELECT '{1:1,0:1}/2'::sparsevec;
LINE 1: SELECT '{2:1,1:1}/2'::sparsevec;
^
SELECT '{}/5'::sparsevec;
sparsevec
@@ -50,13 +50,13 @@ LINE 1: SELECT '{}/100001'::sparsevec;
^
SELECT '{}/16001'::sparsevec::vector;
ERROR: vector cannot have more than 16000 dimensions
SELECT '{-1:1}/1'::sparsevec;
ERROR: index "-1" is out of range for type sparsevec
LINE 1: SELECT '{-1:1}/1'::sparsevec;
SELECT '{0:1}/1'::sparsevec;
ERROR: index "0" is out of range for type sparsevec
LINE 1: SELECT '{0:1}/1'::sparsevec;
^
SELECT '{1:1}/1'::sparsevec;
ERROR: index must be less than dimensions
LINE 1: SELECT '{1:1}/1'::sparsevec;
SELECT '{2:1}/1'::sparsevec;
ERROR: index must be less than or equal to dimensions
LINE 1: SELECT '{2:1}/1'::sparsevec;
^
SELECT '{}/1'::sparsevec(2);
ERROR: expected 2 dimensions, not 1

View File

@@ -1,12 +1,12 @@
SET enable_seqscan = off;
CREATE TABLE t (val sparsevec(3));
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL);
CREATE INDEX ON t USING hnsw (val sparsevec_cosine_ops);
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3');
SELECT * FROM t ORDER BY val <=> '{0:3,1:3,2:3}/3';
SELECT * FROM t ORDER BY val <=> '{1:3,2:3,3:3}/3';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '{}/3') t2;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::sparsevec)) t2;

View File

@@ -1,12 +1,12 @@
SET enable_seqscan = off;
CREATE TABLE t (val sparsevec(3));
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL);
CREATE INDEX ON t USING hnsw (val sparsevec_ip_ops);
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3');
SELECT * FROM t ORDER BY val <#> '{0:3,1:3,2:3}/3';
SELECT * FROM t ORDER BY val <#> '{1:3,2:3,3:3}/3';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::sparsevec)) t2;
DROP TABLE t;

View File

@@ -1,17 +1,17 @@
SET enable_seqscan = off;
CREATE TABLE t (val sparsevec(3));
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL);
CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops);
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3');
SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3';
SELECT * FROM t ORDER BY val <-> '{1:3,2:3,3:3}/3';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::sparsevec)) t2;
SELECT COUNT(*) FROM t;
TRUNCATE t;
SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3';
SELECT * FROM t ORDER BY val <-> '{1:3,2:3,3:3}/3';
DROP TABLE t;

View File

@@ -1,13 +1,13 @@
SELECT l2_distance('{}/2'::sparsevec, '{0:3,1:4}/2');
SELECT l2_distance('{}/2'::sparsevec, '{1:1}/2');
SELECT '{}/2'::sparsevec <-> '{0:3,1:4}/2';
SELECT l2_distance('{}/2'::sparsevec, '{1:3,2:4}/2');
SELECT l2_distance('{}/2'::sparsevec, '{2:1}/2');
SELECT '{}/2'::sparsevec <-> '{1:3,2:4}/2';
SELECT inner_product('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2');
SELECT sparsevec_negative_inner_product('{0:1,1:2}/2', '{0:2,1:4}/2');
SELECT inner_product('{1:1,2:2}/2'::sparsevec, '{1:2,2:4}/2');
SELECT sparsevec_negative_inner_product('{1:1,2:2}/2', '{1:2,2:4}/2');
SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2');
SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{}/2');
SELECT cosine_distance('{0:1,1:1}/2'::sparsevec, '{0:-1,1:-1}/2');
SELECT cosine_distance('{0:1}/2'::sparsevec, '{1:2}/2');
SELECT cosine_distance('{1:1,2:2}/2'::sparsevec, '{1:2,2:4}/2');
SELECT cosine_distance('{1:1,2:2}/2'::sparsevec, '{}/2');
SELECT cosine_distance('{1:1,2:1}/2'::sparsevec, '{1:-1,2:-1}/2');
SELECT cosine_distance('{1:2}/2'::sparsevec, '{2:2}/2');
SELECT cosine_distance('{}/1'::sparsevec, '{}/1');
SELECT cosine_distance('{0:1}/2'::sparsevec, '{0:1}/3');
SELECT cosine_distance('{1:2}/2'::sparsevec, '{1:1}/3');

View File

@@ -1,19 +1,19 @@
SELECT '{0:1.5,2:3.5}/5'::sparsevec;
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector;
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(5);
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(4);
SELECT '{1:1.5,3:3.5}/5'::sparsevec;
SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector;
SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector(5);
SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector(4);
SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec;
SELECT '{0:0,1:1,2:0}/3'::sparsevec;
SELECT '{1:0,2:1,3:0}/3'::sparsevec;
SELECT '{1:1,0:1}/2'::sparsevec;
SELECT '{2:1,1:1}/2'::sparsevec;
SELECT '{}/5'::sparsevec;
SELECT '{}/-1'::sparsevec;
SELECT '{}/100001'::sparsevec;
SELECT '{}/16001'::sparsevec::vector;
SELECT '{-1:1}/1'::sparsevec;
SELECT '{1:1}/1'::sparsevec;
SELECT '{0:1}/1'::sparsevec;
SELECT '{2:1}/1'::sparsevec;
SELECT '{}/1'::sparsevec(2);