mirror of
https://github.com/pgvector/pgvector.git
synced 2026-06-30 01:31:15 +08:00
Changed indices to start at 1 for sparse vectors to match SQL
This commit is contained in:
@@ -501,19 +501,21 @@ SELECT * FROM (
|
||||
Use the `sparsevec` type to store sparse vectors
|
||||
|
||||
```sql
|
||||
CREATE TABLE items (id bigserial PRIMARY KEY, embedding sparsevec(10));
|
||||
CREATE TABLE items (id bigserial PRIMARY KEY, embedding sparsevec(5));
|
||||
```
|
||||
|
||||
Insert vectors
|
||||
|
||||
```sql
|
||||
INSERT INTO items (embedding) VALUES ('{0:1,1:2,2:3}/10'), ('{0:4,1:5,2:6}/10');
|
||||
INSERT INTO items (embedding) VALUES ('{1:1,3:2,5:3}/5'), ('{1:4,3:5,5:6}/5');
|
||||
```
|
||||
|
||||
Note: Indices start at 1 like SQL arrays
|
||||
|
||||
Get the nearest neighbors by L2 distance
|
||||
|
||||
```sql
|
||||
SELECT * FROM items ORDER BY embedding <-> '{0:3,1:1,2:2}/10' LIMIT 5;
|
||||
SELECT * FROM items ORDER BY embedding <-> '{1:3,3:1,5:2}/5' LIMIT 5;
|
||||
```
|
||||
|
||||
## Hybrid Search
|
||||
|
||||
@@ -89,15 +89,15 @@ CheckIndex(int32 *indices, int i, int dim)
|
||||
{
|
||||
int32 index = indices[i];
|
||||
|
||||
if (index < 0)
|
||||
if (index < 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("index must not be negative")));
|
||||
errmsg("index must be greater than zero")));
|
||||
|
||||
if (index >= dim)
|
||||
if (index > dim)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("index must be less than dimensions")));
|
||||
errmsg("index must be less than or equal to dimensions")));
|
||||
|
||||
if (i > 0)
|
||||
{
|
||||
@@ -245,7 +245,7 @@ sparsevec_in(PG_FUNCTION_ARGS)
|
||||
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||
errmsg("invalid input syntax for type sparsevec: \"%s\"", lit)));
|
||||
|
||||
if (errno == ERANGE || index < 0 || index > INT_MAX)
|
||||
if (errno == ERANGE || index < 1 || index > INT_MAX)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
||||
errmsg("index \"%ld\" is out of range for type sparsevec", index)));
|
||||
|
||||
@@ -1244,7 +1244,7 @@ sparsevec_to_vector(PG_FUNCTION_ARGS)
|
||||
|
||||
result = InitVector(dim);
|
||||
for (int i = 0; i < svec->nnz; i++)
|
||||
result->x[svec->indices[i]] = values[i];
|
||||
result->x[svec->indices[i] - 1] = values[i];
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
SET enable_seqscan = off;
|
||||
CREATE TABLE t (val sparsevec(3));
|
||||
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
|
||||
INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val sparsevec_cosine_ops);
|
||||
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
|
||||
SELECT * FROM t ORDER BY val <=> '{0:3,1:3,2:3}/3';
|
||||
INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3');
|
||||
SELECT * FROM t ORDER BY val <=> '{1:3,2:3,3:3}/3';
|
||||
val
|
||||
-----------------
|
||||
{0:1,1:1,2:1}/3
|
||||
{0:1,1:2,2:3}/3
|
||||
{0:1,1:2,2:4}/3
|
||||
{1:1,2:1,3:1}/3
|
||||
{1:1,2:2,3:3}/3
|
||||
{1:1,2:2,3:4}/3
|
||||
(3 rows)
|
||||
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '{}/3') t2;
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
SET enable_seqscan = off;
|
||||
CREATE TABLE t (val sparsevec(3));
|
||||
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
|
||||
INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val sparsevec_ip_ops);
|
||||
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
|
||||
SELECT * FROM t ORDER BY val <#> '{0:3,1:3,2:3}/3';
|
||||
INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3');
|
||||
SELECT * FROM t ORDER BY val <#> '{1:3,2:3,3:3}/3';
|
||||
val
|
||||
-----------------
|
||||
{0:1,1:2,2:4}/3
|
||||
{0:1,1:2,2:3}/3
|
||||
{0:1,1:1,2:1}/3
|
||||
{1:1,2:2,3:4}/3
|
||||
{1:1,2:2,3:3}/3
|
||||
{1:1,2:1,3:1}/3
|
||||
{}/3
|
||||
(4 rows)
|
||||
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
SET enable_seqscan = off;
|
||||
CREATE TABLE t (val sparsevec(3));
|
||||
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
|
||||
INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops);
|
||||
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
|
||||
SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3';
|
||||
INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3');
|
||||
SELECT * FROM t ORDER BY val <-> '{1:3,2:3,3:3}/3';
|
||||
val
|
||||
-----------------
|
||||
{0:1,1:2,2:3}/3
|
||||
{0:1,1:2,2:4}/3
|
||||
{0:1,1:1,2:1}/3
|
||||
{1:1,2:2,3:3}/3
|
||||
{1:1,2:2,3:4}/3
|
||||
{1:1,2:1,3:1}/3
|
||||
{}/3
|
||||
(4 rows)
|
||||
|
||||
@@ -25,7 +25,7 @@ SELECT COUNT(*) FROM t;
|
||||
(1 row)
|
||||
|
||||
TRUNCATE t;
|
||||
SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3';
|
||||
SELECT * FROM t ORDER BY val <-> '{1:3,2:3,3:3}/3';
|
||||
val
|
||||
-----
|
||||
(0 rows)
|
||||
|
||||
@@ -1,52 +1,52 @@
|
||||
SELECT l2_distance('{}/2'::sparsevec, '{0:3,1:4}/2');
|
||||
SELECT l2_distance('{}/2'::sparsevec, '{1:3,2:4}/2');
|
||||
l2_distance
|
||||
-------------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
SELECT l2_distance('{}/2'::sparsevec, '{1:1}/2');
|
||||
SELECT l2_distance('{}/2'::sparsevec, '{2:1}/2');
|
||||
l2_distance
|
||||
-------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT '{}/2'::sparsevec <-> '{0:3,1:4}/2';
|
||||
SELECT '{}/2'::sparsevec <-> '{1:3,2:4}/2';
|
||||
?column?
|
||||
----------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
SELECT inner_product('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2');
|
||||
SELECT inner_product('{1:1,2:2}/2'::sparsevec, '{1:2,2:4}/2');
|
||||
inner_product
|
||||
---------------
|
||||
10
|
||||
(1 row)
|
||||
|
||||
SELECT sparsevec_negative_inner_product('{0:1,1:2}/2', '{0:2,1:4}/2');
|
||||
SELECT sparsevec_negative_inner_product('{1:1,2:2}/2', '{1:2,2:4}/2');
|
||||
sparsevec_negative_inner_product
|
||||
----------------------------------
|
||||
-10
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2');
|
||||
SELECT cosine_distance('{1:1,2:2}/2'::sparsevec, '{1:2,2:4}/2');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{}/2');
|
||||
SELECT cosine_distance('{1:1,2:2}/2'::sparsevec, '{}/2');
|
||||
cosine_distance
|
||||
-----------------
|
||||
NaN
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('{0:1,1:1}/2'::sparsevec, '{0:-1,1:-1}/2');
|
||||
SELECT cosine_distance('{1:1,2:1}/2'::sparsevec, '{1:-1,2:-1}/2');
|
||||
cosine_distance
|
||||
-----------------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('{0:1}/2'::sparsevec, '{1:2}/2');
|
||||
SELECT cosine_distance('{1:2}/2'::sparsevec, '{2:2}/2');
|
||||
cosine_distance
|
||||
-----------------
|
||||
1
|
||||
@@ -58,5 +58,5 @@ SELECT cosine_distance('{}/1'::sparsevec, '{}/1');
|
||||
NaN
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('{0:1}/2'::sparsevec, '{0:1}/3');
|
||||
SELECT cosine_distance('{1:2}/2'::sparsevec, '{1:1}/3');
|
||||
ERROR: different sparsevec dimensions 2 and 3
|
||||
|
||||
@@ -1,22 +1,22 @@
|
||||
SELECT '{0:1.5,2:3.5}/5'::sparsevec;
|
||||
SELECT '{1:1.5,3:3.5}/5'::sparsevec;
|
||||
sparsevec
|
||||
-----------------
|
||||
{0:1.5,2:3.5}/5
|
||||
{1:1.5,3:3.5}/5
|
||||
(1 row)
|
||||
|
||||
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector;
|
||||
SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector;
|
||||
vector
|
||||
-----------------
|
||||
[1.5,0,3.5,0,0]
|
||||
(1 row)
|
||||
|
||||
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(5);
|
||||
SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector(5);
|
||||
vector
|
||||
-----------------
|
||||
[1.5,0,3.5,0,0]
|
||||
(1 row)
|
||||
|
||||
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(4);
|
||||
SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector(4);
|
||||
ERROR: expected 4 dimensions, not 5
|
||||
SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec;
|
||||
sparsevec
|
||||
@@ -24,15 +24,15 @@ SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec;
|
||||
{1:1.5,3:3.5}/5
|
||||
(1 row)
|
||||
|
||||
SELECT '{0:0,1:1,2:0}/3'::sparsevec;
|
||||
SELECT '{1:0,2:1,3:0}/3'::sparsevec;
|
||||
sparsevec
|
||||
-----------
|
||||
{1:1}/3
|
||||
{2:1}/3
|
||||
(1 row)
|
||||
|
||||
SELECT '{1:1,0:1}/2'::sparsevec;
|
||||
SELECT '{2:1,1:1}/2'::sparsevec;
|
||||
ERROR: indexes must be in ascending order
|
||||
LINE 1: SELECT '{1:1,0:1}/2'::sparsevec;
|
||||
LINE 1: SELECT '{2:1,1:1}/2'::sparsevec;
|
||||
^
|
||||
SELECT '{}/5'::sparsevec;
|
||||
sparsevec
|
||||
@@ -50,13 +50,13 @@ LINE 1: SELECT '{}/100001'::sparsevec;
|
||||
^
|
||||
SELECT '{}/16001'::sparsevec::vector;
|
||||
ERROR: vector cannot have more than 16000 dimensions
|
||||
SELECT '{-1:1}/1'::sparsevec;
|
||||
ERROR: index "-1" is out of range for type sparsevec
|
||||
LINE 1: SELECT '{-1:1}/1'::sparsevec;
|
||||
SELECT '{0:1}/1'::sparsevec;
|
||||
ERROR: index "0" is out of range for type sparsevec
|
||||
LINE 1: SELECT '{0:1}/1'::sparsevec;
|
||||
^
|
||||
SELECT '{1:1}/1'::sparsevec;
|
||||
ERROR: index must be less than dimensions
|
||||
LINE 1: SELECT '{1:1}/1'::sparsevec;
|
||||
SELECT '{2:1}/1'::sparsevec;
|
||||
ERROR: index must be less than or equal to dimensions
|
||||
LINE 1: SELECT '{2:1}/1'::sparsevec;
|
||||
^
|
||||
SELECT '{}/1'::sparsevec(2);
|
||||
ERROR: expected 2 dimensions, not 1
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
SET enable_seqscan = off;
|
||||
|
||||
CREATE TABLE t (val sparsevec(3));
|
||||
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
|
||||
INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val sparsevec_cosine_ops);
|
||||
|
||||
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
|
||||
INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3');
|
||||
|
||||
SELECT * FROM t ORDER BY val <=> '{0:3,1:3,2:3}/3';
|
||||
SELECT * FROM t ORDER BY val <=> '{1:3,2:3,3:3}/3';
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '{}/3') t2;
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::sparsevec)) t2;
|
||||
|
||||
|
||||
@@ -1,12 +1,12 @@
|
||||
SET enable_seqscan = off;
|
||||
|
||||
CREATE TABLE t (val sparsevec(3));
|
||||
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
|
||||
INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val sparsevec_ip_ops);
|
||||
|
||||
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
|
||||
INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3');
|
||||
|
||||
SELECT * FROM t ORDER BY val <#> '{0:3,1:3,2:3}/3';
|
||||
SELECT * FROM t ORDER BY val <#> '{1:3,2:3,3:3}/3';
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::sparsevec)) t2;
|
||||
|
||||
DROP TABLE t;
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
SET enable_seqscan = off;
|
||||
|
||||
CREATE TABLE t (val sparsevec(3));
|
||||
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
|
||||
INSERT INTO t (val) VALUES ('{}/3'), ('{1:1,2:2,3:3}/3'), ('{1:1,2:1,3:1}/3'), (NULL);
|
||||
CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops);
|
||||
|
||||
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
|
||||
INSERT INTO t (val) VALUES ('{1:1,2:2,3:4}/3');
|
||||
|
||||
SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3';
|
||||
SELECT * FROM t ORDER BY val <-> '{1:3,2:3,3:3}/3';
|
||||
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::sparsevec)) t2;
|
||||
SELECT COUNT(*) FROM t;
|
||||
|
||||
TRUNCATE t;
|
||||
SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3';
|
||||
SELECT * FROM t ORDER BY val <-> '{1:3,2:3,3:3}/3';
|
||||
|
||||
DROP TABLE t;
|
||||
|
||||
|
||||
@@ -1,13 +1,13 @@
|
||||
SELECT l2_distance('{}/2'::sparsevec, '{0:3,1:4}/2');
|
||||
SELECT l2_distance('{}/2'::sparsevec, '{1:1}/2');
|
||||
SELECT '{}/2'::sparsevec <-> '{0:3,1:4}/2';
|
||||
SELECT l2_distance('{}/2'::sparsevec, '{1:3,2:4}/2');
|
||||
SELECT l2_distance('{}/2'::sparsevec, '{2:1}/2');
|
||||
SELECT '{}/2'::sparsevec <-> '{1:3,2:4}/2';
|
||||
|
||||
SELECT inner_product('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2');
|
||||
SELECT sparsevec_negative_inner_product('{0:1,1:2}/2', '{0:2,1:4}/2');
|
||||
SELECT inner_product('{1:1,2:2}/2'::sparsevec, '{1:2,2:4}/2');
|
||||
SELECT sparsevec_negative_inner_product('{1:1,2:2}/2', '{1:2,2:4}/2');
|
||||
|
||||
SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2');
|
||||
SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{}/2');
|
||||
SELECT cosine_distance('{0:1,1:1}/2'::sparsevec, '{0:-1,1:-1}/2');
|
||||
SELECT cosine_distance('{0:1}/2'::sparsevec, '{1:2}/2');
|
||||
SELECT cosine_distance('{1:1,2:2}/2'::sparsevec, '{1:2,2:4}/2');
|
||||
SELECT cosine_distance('{1:1,2:2}/2'::sparsevec, '{}/2');
|
||||
SELECT cosine_distance('{1:1,2:1}/2'::sparsevec, '{1:-1,2:-1}/2');
|
||||
SELECT cosine_distance('{1:2}/2'::sparsevec, '{2:2}/2');
|
||||
SELECT cosine_distance('{}/1'::sparsevec, '{}/1');
|
||||
SELECT cosine_distance('{0:1}/2'::sparsevec, '{0:1}/3');
|
||||
SELECT cosine_distance('{1:2}/2'::sparsevec, '{1:1}/3');
|
||||
|
||||
@@ -1,19 +1,19 @@
|
||||
SELECT '{0:1.5,2:3.5}/5'::sparsevec;
|
||||
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector;
|
||||
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(5);
|
||||
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(4);
|
||||
SELECT '{1:1.5,3:3.5}/5'::sparsevec;
|
||||
SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector;
|
||||
SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector(5);
|
||||
SELECT '{1:1.5,3:3.5}/5'::sparsevec::vector(4);
|
||||
SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec;
|
||||
|
||||
SELECT '{0:0,1:1,2:0}/3'::sparsevec;
|
||||
SELECT '{1:0,2:1,3:0}/3'::sparsevec;
|
||||
|
||||
SELECT '{1:1,0:1}/2'::sparsevec;
|
||||
SELECT '{2:1,1:1}/2'::sparsevec;
|
||||
|
||||
SELECT '{}/5'::sparsevec;
|
||||
SELECT '{}/-1'::sparsevec;
|
||||
SELECT '{}/100001'::sparsevec;
|
||||
SELECT '{}/16001'::sparsevec::vector;
|
||||
|
||||
SELECT '{-1:1}/1'::sparsevec;
|
||||
SELECT '{1:1}/1'::sparsevec;
|
||||
SELECT '{0:1}/1'::sparsevec;
|
||||
SELECT '{2:1}/1'::sparsevec;
|
||||
|
||||
SELECT '{}/1'::sparsevec(2);
|
||||
|
||||
Reference in New Issue
Block a user