mirror of
https://github.com/pgvector/pgvector.git
synced 2026-06-06 05:51:21 +08:00
Added sparsevec type
This commit is contained in:
@@ -1,6 +1,7 @@
|
|||||||
## 0.7.0 (unreleased)
|
## 0.7.0 (unreleased)
|
||||||
|
|
||||||
- Added `halfvec` type
|
- Added `halfvec` type
|
||||||
|
- Added `sparsevec` type
|
||||||
- Added support for bit vectors to HNSW
|
- Added support for bit vectors to HNSW
|
||||||
- Added `hamming_distance` function
|
- Added `hamming_distance` function
|
||||||
- Added `jaccard_distance` function
|
- Added `jaccard_distance` function
|
||||||
|
|||||||
4
Makefile
4
Makefile
@@ -3,8 +3,8 @@ EXTVERSION = 0.6.2
|
|||||||
|
|
||||||
MODULE_big = vector
|
MODULE_big = vector
|
||||||
DATA = $(wildcard sql/*--*.sql)
|
DATA = $(wildcard sql/*--*.sql)
|
||||||
OBJS = src/bitvector.o src/halfvec.o src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/vector.o
|
OBJS = src/bitvector.o src/halfvec.o src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/sparsevec.o src/vector.o
|
||||||
HEADERS = src/halfvec.h src/vector.h
|
HEADERS = src/halfvec.h src/sparsevec.h src/vector.h
|
||||||
|
|
||||||
TESTS = $(wildcard test/sql/*.sql)
|
TESTS = $(wildcard test/sql/*.sql)
|
||||||
REGRESS = $(patsubst test/sql/%.sql,%,$(TESTS))
|
REGRESS = $(patsubst test/sql/%.sql,%,$(TESTS))
|
||||||
|
|||||||
@@ -1,10 +1,10 @@
|
|||||||
EXTENSION = vector
|
EXTENSION = vector
|
||||||
EXTVERSION = 0.6.2
|
EXTVERSION = 0.6.2
|
||||||
|
|
||||||
OBJS = src\bitvector.obj src\halfvec.obj src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\vector.obj
|
OBJS = src\bitvector.obj src\halfvec.obj src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\sparsevec.obj src\vector.obj
|
||||||
HEADERS = src\halfvec.h src\vector.h
|
HEADERS = src\halfvec.h src\sparsevec.h src\vector.h
|
||||||
|
|
||||||
REGRESS = bit_functions btree cast copy halfvec_functions halfvec_input hnsw_bit_hamming hnsw_bit_jaccard hnsw_halfvec_cosine hnsw_halfvec_ip hnsw_halfvec_l2 hnsw_options hnsw_unlogged hnsw_vector_cosine hnsw_vector_ip hnsw_vector_l2 ivfflat_options ivfflat_unlogged ivfflat_vector_cosine ivfflat_vector_ip ivfflat_vector_l2 vector_functions vector_input
|
REGRESS = bit_functions btree cast copy halfvec_functions halfvec_input hnsw_bit_hamming hnsw_bit_jaccard hnsw_halfvec_cosine hnsw_halfvec_ip hnsw_halfvec_l2 hnsw_options hnsw_sparsevec_cosine hnsw_sparsevec_ip hnsw_sparsevec_l2 hnsw_unlogged hnsw_vector_cosine hnsw_vector_ip hnsw_vector_l2 ivfflat_options ivfflat_unlogged ivfflat_vector_cosine ivfflat_vector_ip ivfflat_vector_l2 sparsevec_functions sparsevec_input vector_functions vector_input
|
||||||
REGRESS_OPTS = --inputdir=test --load-extension=$(EXTENSION)
|
REGRESS_OPTS = --inputdir=test --load-extension=$(EXTENSION)
|
||||||
|
|
||||||
# For /arch flags
|
# For /arch flags
|
||||||
|
|||||||
22
README.md
22
README.md
@@ -714,6 +714,7 @@ Also, note that `NULL` vectors are not indexed (as well as zero vectors for cosi
|
|||||||
- [Vector](#vector-type)
|
- [Vector](#vector-type)
|
||||||
- [Halfvec](#halfvec-type)
|
- [Halfvec](#halfvec-type)
|
||||||
- [Bit](#bit-type)
|
- [Bit](#bit-type)
|
||||||
|
- [Sparsevec](#sparsevec-type)
|
||||||
|
|
||||||
### Vector Type
|
### Vector Type
|
||||||
|
|
||||||
@@ -789,6 +790,27 @@ Function | Description | Added
|
|||||||
hamming_distance(bit, bit) → double precision | Hamming distance | unreleased
|
hamming_distance(bit, bit) → double precision | Hamming distance | unreleased
|
||||||
jaccard_distance(bit, bit) → double precision | Jaccard distance | unreleased
|
jaccard_distance(bit, bit) → double precision | Jaccard distance | unreleased
|
||||||
|
|
||||||
|
### Sparsevec Type
|
||||||
|
|
||||||
|
Each sparse vector takes `8 * nnz + 16` bytes of storage. Each element is a single-precision floating-point number, and all elements must be finite (no `NaN`, `Infinity` or `-Infinity`).
|
||||||
|
|
||||||
|
### Sparsevec Operators
|
||||||
|
|
||||||
|
Operator | Description | Added
|
||||||
|
--- | --- | ---
|
||||||
|
<-> | Euclidean distance | unreleased
|
||||||
|
<#> | negative inner product | unreleased
|
||||||
|
<=> | cosine distance | unreleased
|
||||||
|
|
||||||
|
### Sparsevec Functions
|
||||||
|
|
||||||
|
Function | Description | Added
|
||||||
|
--- | --- | ---
|
||||||
|
cosine_distance(sparsevec, sparsevec) → double precision | cosine distance | unreleased
|
||||||
|
inner_product(sparsevec, sparsevec) → double precision | inner product | unreleased
|
||||||
|
l2_distance(sparsevec, sparsevec) → double precision | Euclidean distance | unreleased
|
||||||
|
l1_distance(sparsevec, sparsevec) → double precision | taxicab distance | unreleased
|
||||||
|
|
||||||
## Installation Notes - Linux and Mac
|
## Installation Notes - Linux and Mac
|
||||||
|
|
||||||
### Postgres Location
|
### Postgres Location
|
||||||
|
|||||||
@@ -158,3 +158,96 @@ CREATE CAST (halfvec AS vector)
|
|||||||
|
|
||||||
CREATE CAST (vector AS halfvec)
|
CREATE CAST (vector AS halfvec)
|
||||||
WITH FUNCTION vector_to_halfvec(vector, integer, boolean) AS IMPLICIT;
|
WITH FUNCTION vector_to_halfvec(vector, integer, boolean) AS IMPLICIT;
|
||||||
|
|
||||||
|
CREATE TYPE sparsevec;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_in(cstring, oid, integer) RETURNS sparsevec
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_out(sparsevec) RETURNS cstring
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_typmod_in(cstring[]) RETURNS integer
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_recv(internal, oid, integer) RETURNS sparsevec
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_send(sparsevec) RETURNS bytea
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE TYPE sparsevec (
|
||||||
|
INPUT = sparsevec_in,
|
||||||
|
OUTPUT = sparsevec_out,
|
||||||
|
TYPMOD_IN = sparsevec_typmod_in,
|
||||||
|
RECEIVE = sparsevec_recv,
|
||||||
|
SEND = sparsevec_send,
|
||||||
|
STORAGE = external
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE FUNCTION l2_distance(sparsevec, sparsevec) RETURNS float8
|
||||||
|
AS 'MODULE_PATHNAME', 'sparsevec_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION inner_product(sparsevec, sparsevec) RETURNS float8
|
||||||
|
AS 'MODULE_PATHNAME', 'sparsevec_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION cosine_distance(sparsevec, sparsevec) RETURNS float8
|
||||||
|
AS 'MODULE_PATHNAME', 'sparsevec_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_norm(sparsevec) RETURNS float8
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_l2_squared_distance(sparsevec, sparsevec) RETURNS float8
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_negative_inner_product(sparsevec, sparsevec) RETURNS float8
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec(sparsevec, integer, boolean) RETURNS sparsevec
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION vector_to_sparsevec(vector, integer, boolean) RETURNS sparsevec
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_to_vector(sparsevec, integer, boolean) RETURNS vector
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE CAST (sparsevec AS sparsevec)
|
||||||
|
WITH FUNCTION sparsevec(sparsevec, integer, boolean) AS IMPLICIT;
|
||||||
|
|
||||||
|
CREATE CAST (sparsevec AS vector)
|
||||||
|
WITH FUNCTION sparsevec_to_vector(sparsevec, integer, boolean) AS IMPLICIT;
|
||||||
|
|
||||||
|
CREATE CAST (vector AS sparsevec)
|
||||||
|
WITH FUNCTION vector_to_sparsevec(vector, integer, boolean) AS IMPLICIT;
|
||||||
|
|
||||||
|
CREATE OPERATOR <-> (
|
||||||
|
LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = l2_distance,
|
||||||
|
COMMUTATOR = '<->'
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE OPERATOR <#> (
|
||||||
|
LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_negative_inner_product,
|
||||||
|
COMMUTATOR = '<#>'
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE OPERATOR <=> (
|
||||||
|
LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = cosine_distance,
|
||||||
|
COMMUTATOR = '<=>'
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE OPERATOR CLASS sparsevec_l2_ops
|
||||||
|
FOR TYPE sparsevec USING hnsw AS
|
||||||
|
OPERATOR 1 <-> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
||||||
|
FUNCTION 1 sparsevec_l2_squared_distance(sparsevec, sparsevec);
|
||||||
|
|
||||||
|
CREATE OPERATOR CLASS sparsevec_ip_ops
|
||||||
|
FOR TYPE sparsevec USING hnsw AS
|
||||||
|
OPERATOR 1 <#> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
||||||
|
FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec);
|
||||||
|
|
||||||
|
CREATE OPERATOR CLASS sparsevec_cosine_ops
|
||||||
|
FOR TYPE sparsevec USING hnsw AS
|
||||||
|
OPERATOR 1 <=> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
||||||
|
FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec),
|
||||||
|
FUNCTION 2 sparsevec_norm(sparsevec);
|
||||||
|
|||||||
107
sql/vector.sql
107
sql/vector.sql
@@ -463,3 +463,110 @@ CREATE CAST (halfvec AS vector)
|
|||||||
|
|
||||||
CREATE CAST (vector AS halfvec)
|
CREATE CAST (vector AS halfvec)
|
||||||
WITH FUNCTION vector_to_halfvec(vector, integer, boolean) AS IMPLICIT;
|
WITH FUNCTION vector_to_halfvec(vector, integer, boolean) AS IMPLICIT;
|
||||||
|
|
||||||
|
--- sparsevec type
|
||||||
|
|
||||||
|
CREATE TYPE sparsevec;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_in(cstring, oid, integer) RETURNS sparsevec
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_out(sparsevec) RETURNS cstring
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_typmod_in(cstring[]) RETURNS integer
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_recv(internal, oid, integer) RETURNS sparsevec
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_send(sparsevec) RETURNS bytea
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE TYPE sparsevec (
|
||||||
|
INPUT = sparsevec_in,
|
||||||
|
OUTPUT = sparsevec_out,
|
||||||
|
TYPMOD_IN = sparsevec_typmod_in,
|
||||||
|
RECEIVE = sparsevec_recv,
|
||||||
|
SEND = sparsevec_send,
|
||||||
|
STORAGE = external
|
||||||
|
);
|
||||||
|
|
||||||
|
-- sparsevec functions
|
||||||
|
|
||||||
|
CREATE FUNCTION l2_distance(sparsevec, sparsevec) RETURNS float8
|
||||||
|
AS 'MODULE_PATHNAME', 'sparsevec_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION inner_product(sparsevec, sparsevec) RETURNS float8
|
||||||
|
AS 'MODULE_PATHNAME', 'sparsevec_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION cosine_distance(sparsevec, sparsevec) RETURNS float8
|
||||||
|
AS 'MODULE_PATHNAME', 'sparsevec_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_norm(sparsevec) RETURNS float8
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
-- sparsevec private functions
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_l2_squared_distance(sparsevec, sparsevec) RETURNS float8
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_negative_inner_product(sparsevec, sparsevec) RETURNS float8
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
-- sparsevec cast functions
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec(sparsevec, integer, boolean) RETURNS sparsevec
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION vector_to_sparsevec(vector, integer, boolean) RETURNS sparsevec
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
CREATE FUNCTION sparsevec_to_vector(sparsevec, integer, boolean) RETURNS vector
|
||||||
|
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||||
|
|
||||||
|
-- sparsevec casts
|
||||||
|
|
||||||
|
CREATE CAST (sparsevec AS sparsevec)
|
||||||
|
WITH FUNCTION sparsevec(sparsevec, integer, boolean) AS IMPLICIT;
|
||||||
|
|
||||||
|
CREATE CAST (sparsevec AS vector)
|
||||||
|
WITH FUNCTION sparsevec_to_vector(sparsevec, integer, boolean) AS IMPLICIT;
|
||||||
|
|
||||||
|
CREATE CAST (vector AS sparsevec)
|
||||||
|
WITH FUNCTION vector_to_sparsevec(vector, integer, boolean) AS IMPLICIT;
|
||||||
|
|
||||||
|
-- sparsevec operators
|
||||||
|
|
||||||
|
CREATE OPERATOR <-> (
|
||||||
|
LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = l2_distance,
|
||||||
|
COMMUTATOR = '<->'
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE OPERATOR <#> (
|
||||||
|
LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = sparsevec_negative_inner_product,
|
||||||
|
COMMUTATOR = '<#>'
|
||||||
|
);
|
||||||
|
|
||||||
|
CREATE OPERATOR <=> (
|
||||||
|
LEFTARG = sparsevec, RIGHTARG = sparsevec, PROCEDURE = cosine_distance,
|
||||||
|
COMMUTATOR = '<=>'
|
||||||
|
);
|
||||||
|
|
||||||
|
-- sparsevec opclasses
|
||||||
|
|
||||||
|
CREATE OPERATOR CLASS sparsevec_l2_ops
|
||||||
|
FOR TYPE sparsevec USING hnsw AS
|
||||||
|
OPERATOR 1 <-> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
||||||
|
FUNCTION 1 sparsevec_l2_squared_distance(sparsevec, sparsevec);
|
||||||
|
|
||||||
|
CREATE OPERATOR CLASS sparsevec_ip_ops
|
||||||
|
FOR TYPE sparsevec USING hnsw AS
|
||||||
|
OPERATOR 1 <#> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
||||||
|
FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec);
|
||||||
|
|
||||||
|
CREATE OPERATOR CLASS sparsevec_cosine_ops
|
||||||
|
FOR TYPE sparsevec USING hnsw AS
|
||||||
|
OPERATOR 1 <=> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
||||||
|
FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec),
|
||||||
|
FUNCTION 2 sparsevec_norm(sparsevec);
|
||||||
|
|||||||
@@ -17,6 +17,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define HNSW_MAX_DIM 2000
|
#define HNSW_MAX_DIM 2000
|
||||||
|
#define HNSW_MAX_NNZ 1000
|
||||||
|
|
||||||
/* Support functions */
|
/* Support functions */
|
||||||
#define HNSW_DISTANCE_PROC 1
|
#define HNSW_DISTANCE_PROC 1
|
||||||
@@ -59,7 +60,8 @@ typedef enum HnswType
|
|||||||
{
|
{
|
||||||
HNSW_TYPE_VECTOR,
|
HNSW_TYPE_VECTOR,
|
||||||
HNSW_TYPE_HALFVEC,
|
HNSW_TYPE_HALFVEC,
|
||||||
HNSW_TYPE_BIT
|
HNSW_TYPE_BIT,
|
||||||
|
HNSW_TYPE_SPARSEVEC
|
||||||
} HnswType;
|
} HnswType;
|
||||||
|
|
||||||
/* Build phases */
|
/* Build phases */
|
||||||
@@ -376,6 +378,7 @@ int HnswGetEfConstruction(Relation index);
|
|||||||
FmgrInfo *HnswOptionalProcInfo(Relation index, uint16 procnum);
|
FmgrInfo *HnswOptionalProcInfo(Relation index, uint16 procnum);
|
||||||
HnswType HnswGetType(Relation index);
|
HnswType HnswGetType(Relation index);
|
||||||
bool HnswNormValue(FmgrInfo *procinfo, Oid collation, Datum *value, HnswType type);
|
bool HnswNormValue(FmgrInfo *procinfo, Oid collation, Datum *value, HnswType type);
|
||||||
|
void HnswCheckValue(Datum value, HnswType type);
|
||||||
Buffer HnswNewBuffer(Relation index, ForkNumber forkNum);
|
Buffer HnswNewBuffer(Relation index, ForkNumber forkNum);
|
||||||
void HnswInitPage(Buffer buf, Page page);
|
void HnswInitPage(Buffer buf, Page page);
|
||||||
void HnswInit(void);
|
void HnswInit(void);
|
||||||
|
|||||||
@@ -487,6 +487,9 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn
|
|||||||
/* Detoast once for all calls */
|
/* Detoast once for all calls */
|
||||||
Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
|
Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
|
||||||
|
|
||||||
|
/* Check value */
|
||||||
|
HnswCheckValue(value, buildstate->type);
|
||||||
|
|
||||||
/* Normalize if needed */
|
/* Normalize if needed */
|
||||||
if (buildstate->normprocinfo != NULL)
|
if (buildstate->normprocinfo != NULL)
|
||||||
{
|
{
|
||||||
@@ -678,6 +681,8 @@ GetMaxDimensions(HnswType type)
|
|||||||
maxDimensions *= 2;
|
maxDimensions *= 2;
|
||||||
else if (type == HNSW_TYPE_BIT)
|
else if (type == HNSW_TYPE_BIT)
|
||||||
maxDimensions *= 32;
|
maxDimensions *= 32;
|
||||||
|
else if (type == HNSW_TYPE_SPARSEVEC)
|
||||||
|
maxDimensions = INT_MAX;
|
||||||
|
|
||||||
return maxDimensions;
|
return maxDimensions;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -614,15 +614,19 @@ HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_ti
|
|||||||
Datum value;
|
Datum value;
|
||||||
FmgrInfo *normprocinfo;
|
FmgrInfo *normprocinfo;
|
||||||
Oid collation = index->rd_indcollation[0];
|
Oid collation = index->rd_indcollation[0];
|
||||||
|
HnswType type = HnswGetType(index);
|
||||||
|
|
||||||
/* Detoast once for all calls */
|
/* Detoast once for all calls */
|
||||||
value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
|
value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
|
||||||
|
|
||||||
|
/* Check value */
|
||||||
|
HnswCheckValue(value, type);
|
||||||
|
|
||||||
/* Normalize if needed */
|
/* Normalize if needed */
|
||||||
normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC);
|
normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC);
|
||||||
if (normprocinfo != NULL)
|
if (normprocinfo != NULL)
|
||||||
{
|
{
|
||||||
if (!HnswNormValue(normprocinfo, collation, &value, HnswGetType(index)))
|
if (!HnswNormValue(normprocinfo, collation, &value, type))
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -8,6 +8,7 @@
|
|||||||
#include "halfvec.h"
|
#include "halfvec.h"
|
||||||
#include "hnsw.h"
|
#include "hnsw.h"
|
||||||
#include "lib/pairingheap.h"
|
#include "lib/pairingheap.h"
|
||||||
|
#include "sparsevec.h"
|
||||||
#include "storage/bufmgr.h"
|
#include "storage/bufmgr.h"
|
||||||
#include "utils/datum.h"
|
#include "utils/datum.h"
|
||||||
#include "utils/memdebug.h"
|
#include "utils/memdebug.h"
|
||||||
@@ -176,6 +177,8 @@ HnswGetType(Relation index)
|
|||||||
result = HNSW_TYPE_VECTOR;
|
result = HNSW_TYPE_VECTOR;
|
||||||
else if (strcmp(NameStr(type->typname), "halfvec") == 0)
|
else if (strcmp(NameStr(type->typname), "halfvec") == 0)
|
||||||
result = HNSW_TYPE_HALFVEC;
|
result = HNSW_TYPE_HALFVEC;
|
||||||
|
else if (strcmp(NameStr(type->typname), "sparsevec") == 0)
|
||||||
|
result = HNSW_TYPE_SPARSEVEC;
|
||||||
else
|
else
|
||||||
elog(ERROR, "Unsupported type");
|
elog(ERROR, "Unsupported type");
|
||||||
|
|
||||||
@@ -223,6 +226,21 @@ HnswNormValue(FmgrInfo *procinfo, Oid collation, Datum *value, HnswType type)
|
|||||||
|
|
||||||
*value = PointerGetDatum(result);
|
*value = PointerGetDatum(result);
|
||||||
}
|
}
|
||||||
|
else if (type == HNSW_TYPE_SPARSEVEC)
|
||||||
|
{
|
||||||
|
SparseVector *v = DatumGetSparseVector(*value);
|
||||||
|
SparseVector *result = InitSparseVector(v->dim, v->nnz);
|
||||||
|
float *vx = SPARSEVEC_VALUES(v);
|
||||||
|
float *rx = SPARSEVEC_VALUES(result);
|
||||||
|
|
||||||
|
for (int i = 0; i < v->nnz; i++)
|
||||||
|
{
|
||||||
|
result->indices[i] = v->indices[i];
|
||||||
|
rx[i] = vx[i] / norm;
|
||||||
|
}
|
||||||
|
|
||||||
|
*value = PointerGetDatum(result);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
elog(ERROR, "Unsupported type");
|
elog(ERROR, "Unsupported type");
|
||||||
|
|
||||||
@@ -232,6 +250,21 @@ HnswNormValue(FmgrInfo *procinfo, Oid collation, Datum *value, HnswType type)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if a value can be indexed
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
HnswCheckValue(Datum value, HnswType type)
|
||||||
|
{
|
||||||
|
if (type == HNSW_TYPE_SPARSEVEC)
|
||||||
|
{
|
||||||
|
SparseVector *vec = DatumGetSparseVector(value);
|
||||||
|
|
||||||
|
if (vec->nnz > HNSW_MAX_NNZ)
|
||||||
|
elog(ERROR, "sparsevec cannot have more than %d non-zero elements for hnsw index", HNSW_MAX_NNZ);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* New buffer
|
* New buffer
|
||||||
*/
|
*/
|
||||||
|
|||||||
778
src/sparsevec.c
Normal file
778
src/sparsevec.c
Normal file
@@ -0,0 +1,778 @@
|
|||||||
|
#include "postgres.h"
|
||||||
|
|
||||||
|
#include <limits.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
#include "fmgr.h"
|
||||||
|
#include "libpq/pqformat.h"
|
||||||
|
#include "sparsevec.h"
|
||||||
|
#include "utils/array.h"
|
||||||
|
#include "utils/builtins.h"
|
||||||
|
#include "vector.h"
|
||||||
|
|
||||||
|
#if PG_VERSION_NUM >= 120000
|
||||||
|
#include "common/shortest_dec.h"
|
||||||
|
#include "utils/float.h"
|
||||||
|
#else
|
||||||
|
#include <float.h>
|
||||||
|
#include "utils/builtins.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure same dimensions
|
||||||
|
*/
|
||||||
|
static inline void
|
||||||
|
CheckDims(SparseVector * a, SparseVector * b)
|
||||||
|
{
|
||||||
|
if (a->dim != b->dim)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||||
|
errmsg("different sparsevec dimensions %d and %d", a->dim, b->dim)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure expected dimensions
|
||||||
|
*/
|
||||||
|
static inline void
|
||||||
|
CheckExpectedDim(int32 typmod, int dim)
|
||||||
|
{
|
||||||
|
if (typmod != -1 && typmod != dim)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||||
|
errmsg("expected %d dimensions, not %d", typmod, dim)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure valid dimensions
|
||||||
|
*/
|
||||||
|
static inline void
|
||||||
|
CheckDim(int dim)
|
||||||
|
{
|
||||||
|
if (dim < 1)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||||
|
errmsg("sparsevec must have at least 1 dimension")));
|
||||||
|
|
||||||
|
if (dim > SPARSEVEC_MAX_DIM)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||||
|
errmsg("sparsevec cannot have more than %d dimensions", SPARSEVEC_MAX_DIM)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure valid nnz
|
||||||
|
*/
|
||||||
|
static inline void
|
||||||
|
CheckNnz(int nnz, int dim)
|
||||||
|
{
|
||||||
|
if (nnz < 0)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||||
|
errmsg("sparsevec must have at least one element")));
|
||||||
|
|
||||||
|
if (nnz > dim)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||||
|
errmsg("sparsevec cannot have more elements than dimensions")));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure valid index
|
||||||
|
*/
|
||||||
|
static inline void
|
||||||
|
CheckIndex(int32 *indices, int i, int dim)
|
||||||
|
{
|
||||||
|
int32 index = indices[i];
|
||||||
|
|
||||||
|
if (index < 0)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||||
|
errmsg("index must not be negative")));
|
||||||
|
|
||||||
|
if (index >= dim)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||||
|
errmsg("index must be less than dimensions")));
|
||||||
|
|
||||||
|
if (i > 0)
|
||||||
|
{
|
||||||
|
if (index < indices[i - 1])
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||||
|
errmsg("indexes must be in ascending order")));
|
||||||
|
|
||||||
|
if (index == indices[i - 1])
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||||
|
errmsg("indexes must not contain duplicates")));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Ensure finite element
|
||||||
|
*/
|
||||||
|
static inline void
|
||||||
|
CheckElement(float value)
|
||||||
|
{
|
||||||
|
if (isnan(value))
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||||
|
errmsg("NaN not allowed in sparsevec")));
|
||||||
|
|
||||||
|
if (isinf(value))
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||||
|
errmsg("infinite value not allowed in sparsevec")));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allocate and initialize a new sparse vector
|
||||||
|
*/
|
||||||
|
SparseVector *
|
||||||
|
InitSparseVector(int dim, int nnz)
|
||||||
|
{
|
||||||
|
SparseVector *result;
|
||||||
|
int size;
|
||||||
|
|
||||||
|
size = SPARSEVEC_SIZE(nnz);
|
||||||
|
result = (SparseVector *) palloc0(size);
|
||||||
|
SET_VARSIZE(result, size);
|
||||||
|
result->dim = dim;
|
||||||
|
result->nnz = nnz;
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check for whitespace, since array_isspace() is static
|
||||||
|
*/
|
||||||
|
static inline bool
|
||||||
|
sparsevec_isspace(char ch)
|
||||||
|
{
|
||||||
|
if (ch == ' ' ||
|
||||||
|
ch == '\t' ||
|
||||||
|
ch == '\n' ||
|
||||||
|
ch == '\r' ||
|
||||||
|
ch == '\v' ||
|
||||||
|
ch == '\f')
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert textual representation to internal representation
|
||||||
|
*/
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_in);
|
||||||
|
Datum
|
||||||
|
sparsevec_in(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
char *lit = PG_GETARG_CSTRING(0);
|
||||||
|
int32 typmod = PG_GETARG_INT32(2);
|
||||||
|
int dim;
|
||||||
|
char *pt;
|
||||||
|
char *stringEnd;
|
||||||
|
SparseVector *result;
|
||||||
|
float *rvalues;
|
||||||
|
char *litcopy = pstrdup(lit);
|
||||||
|
char *str = litcopy;
|
||||||
|
int32 *indices;
|
||||||
|
float *values;
|
||||||
|
int maxNnz;
|
||||||
|
int nnz = 0;
|
||||||
|
|
||||||
|
maxNnz = 1;
|
||||||
|
pt = str;
|
||||||
|
while (*pt != '\0')
|
||||||
|
{
|
||||||
|
if (*pt == ',')
|
||||||
|
maxNnz++;
|
||||||
|
|
||||||
|
pt++;
|
||||||
|
}
|
||||||
|
|
||||||
|
indices = palloc(maxNnz * sizeof(int32));
|
||||||
|
values = palloc(maxNnz * sizeof(float));
|
||||||
|
|
||||||
|
while (sparsevec_isspace(*str))
|
||||||
|
str++;
|
||||||
|
|
||||||
|
if (*str != '{')
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||||
|
errmsg("malformed sparsevec literal: \"%s\"", lit),
|
||||||
|
errdetail("Vector contents must start with \"{\".")));
|
||||||
|
|
||||||
|
str++;
|
||||||
|
pt = strtok(str, ",");
|
||||||
|
stringEnd = pt;
|
||||||
|
|
||||||
|
while (pt != NULL && *stringEnd != '}')
|
||||||
|
{
|
||||||
|
long index;
|
||||||
|
float value;
|
||||||
|
|
||||||
|
/* TODO Better error */
|
||||||
|
if (nnz == maxNnz)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||||
|
errmsg("ran out of buffer: \"%s\"", lit)));
|
||||||
|
|
||||||
|
while (sparsevec_isspace(*pt))
|
||||||
|
pt++;
|
||||||
|
|
||||||
|
/* Check for empty string like float4in */
|
||||||
|
if (*pt == '\0')
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||||
|
errmsg("invalid input syntax for type sparsevec: \"%s\"", lit)));
|
||||||
|
|
||||||
|
/* Use similar logic as int2vectorin */
|
||||||
|
errno = 0;
|
||||||
|
index = strtol(pt, &stringEnd, 10);
|
||||||
|
|
||||||
|
if (stringEnd == pt)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||||
|
errmsg("invalid input syntax for type sparsevec: \"%s\"", lit)));
|
||||||
|
|
||||||
|
if (errno == ERANGE || index < 0 || index > INT_MAX)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
||||||
|
errmsg("index \"%ld\" is out of range for type sparsevec", index)));
|
||||||
|
|
||||||
|
if (stringEnd == pt)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||||
|
errmsg("invalid input syntax for type sparsevec: \"%s\"", lit)));
|
||||||
|
|
||||||
|
while (sparsevec_isspace(*stringEnd))
|
||||||
|
stringEnd++;
|
||||||
|
|
||||||
|
if (*stringEnd != ':')
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||||
|
errmsg("invalid input syntax for type sparsevec: \"%s\"", lit)));
|
||||||
|
|
||||||
|
stringEnd++;
|
||||||
|
|
||||||
|
while (sparsevec_isspace(*stringEnd))
|
||||||
|
stringEnd++;
|
||||||
|
|
||||||
|
errno = 0;
|
||||||
|
pt = stringEnd;
|
||||||
|
value = strtof(pt, &stringEnd);
|
||||||
|
|
||||||
|
if (stringEnd == pt)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||||
|
errmsg("invalid input syntax for type sparsevec: \"%s\"", lit)));
|
||||||
|
|
||||||
|
/* Check for range error like float4in */
|
||||||
|
if (errno == ERANGE && (value == 0 || isinf(value)))
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
||||||
|
errmsg("\"%s\" is out of range for type sparsevec", pt)));
|
||||||
|
|
||||||
|
/* TODO Decide whether to store zero values */
|
||||||
|
if (value != 0)
|
||||||
|
{
|
||||||
|
indices[nnz] = index;
|
||||||
|
values[nnz] = value;
|
||||||
|
nnz++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (*stringEnd != '\0' && *stringEnd != '}')
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||||
|
errmsg("invalid input syntax for type sparsevec: \"%s\"", lit)));
|
||||||
|
|
||||||
|
pt = strtok(NULL, ",");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stringEnd == NULL || *stringEnd != '}')
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||||
|
errmsg("malformed sparsevec literal: \"%s\"", lit),
|
||||||
|
errdetail("Unexpected end of input.")));
|
||||||
|
|
||||||
|
stringEnd++;
|
||||||
|
|
||||||
|
if (*stringEnd != '/')
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||||
|
errmsg("malformed sparsevec literal: \"%s\"", lit),
|
||||||
|
errdetail("Unexpected end of input.")));
|
||||||
|
|
||||||
|
stringEnd++;
|
||||||
|
|
||||||
|
/* Use similar logic as int2vectorin */
|
||||||
|
errno = 0;
|
||||||
|
pt = stringEnd;
|
||||||
|
dim = strtol(pt, &stringEnd, 10);
|
||||||
|
|
||||||
|
if (stringEnd == pt)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||||
|
errmsg("invalid input syntax for type sparsevec: \"%s\"", lit)));
|
||||||
|
|
||||||
|
/* Only whitespace is allowed after the closing brace */
|
||||||
|
while (sparsevec_isspace(*stringEnd))
|
||||||
|
stringEnd++;
|
||||||
|
|
||||||
|
if (*stringEnd != '\0')
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||||
|
errmsg("malformed sparsevec literal: \"%s\"", lit),
|
||||||
|
errdetail("Junk after closing.")));
|
||||||
|
|
||||||
|
pfree(litcopy);
|
||||||
|
|
||||||
|
CheckDim(dim);
|
||||||
|
CheckExpectedDim(typmod, dim);
|
||||||
|
|
||||||
|
result = InitSparseVector(dim, nnz);
|
||||||
|
rvalues = SPARSEVEC_VALUES(result);
|
||||||
|
for (int i = 0; i < nnz; i++)
|
||||||
|
{
|
||||||
|
result->indices[i] = indices[i];
|
||||||
|
rvalues[i] = values[i];
|
||||||
|
|
||||||
|
CheckIndex(result->indices, i, dim);
|
||||||
|
CheckElement(rvalues[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define AppendChar(ptr, c) (*(ptr)++ = (c))
|
||||||
|
#define AppendFloat(ptr, f) ((ptr) += float_to_shortest_decimal_bufn((f), (ptr)))
|
||||||
|
|
||||||
|
#if PG_VERSION_NUM >= 140000
|
||||||
|
#define AppendInt(ptr, i) ((ptr) += pg_ltoa((i), (ptr)))
|
||||||
|
#else
|
||||||
|
#define AppendInt(ptr, i) \
|
||||||
|
do { \
|
||||||
|
pg_ltoa(i, ptr); \
|
||||||
|
while (*ptr != '\0') \
|
||||||
|
ptr++; \
|
||||||
|
} while (0)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert internal representation to textual representation
|
||||||
|
*/
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_out);
|
||||||
|
Datum
|
||||||
|
sparsevec_out(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
SparseVector *sparsevec = PG_GETARG_SPARSEVEC_P(0);
|
||||||
|
float *values = SPARSEVEC_VALUES(sparsevec);
|
||||||
|
char *buf;
|
||||||
|
char *ptr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Need:
|
||||||
|
*
|
||||||
|
* nnz * 10 bytes for index (positive integer)
|
||||||
|
*
|
||||||
|
* nnz bytes for :
|
||||||
|
*
|
||||||
|
* nnz * (FLOAT_SHORTEST_DECIMAL_LEN - 1) bytes for
|
||||||
|
* float_to_shortest_decimal_bufn
|
||||||
|
*
|
||||||
|
* nnz - 1 bytes for ,
|
||||||
|
*
|
||||||
|
* 10 bytes for dimensions
|
||||||
|
*
|
||||||
|
* 4 bytes for {, }, /, and \0
|
||||||
|
*/
|
||||||
|
buf = (char *) palloc((11 + FLOAT_SHORTEST_DECIMAL_LEN) * sparsevec->nnz + 13);
|
||||||
|
ptr = buf;
|
||||||
|
|
||||||
|
AppendChar(ptr, '{');
|
||||||
|
|
||||||
|
for (int i = 0; i < sparsevec->nnz; i++)
|
||||||
|
{
|
||||||
|
if (i > 0)
|
||||||
|
AppendChar(ptr, ',');
|
||||||
|
|
||||||
|
AppendInt(ptr, sparsevec->indices[i]);
|
||||||
|
AppendChar(ptr, ':');
|
||||||
|
AppendFloat(ptr, values[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
AppendChar(ptr, '}');
|
||||||
|
AppendChar(ptr, '/');
|
||||||
|
AppendInt(ptr, sparsevec->dim);
|
||||||
|
*ptr = '\0';
|
||||||
|
|
||||||
|
PG_FREE_IF_COPY(sparsevec, 0);
|
||||||
|
PG_RETURN_CSTRING(buf);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert type modifier
|
||||||
|
*/
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_typmod_in);
|
||||||
|
Datum
|
||||||
|
sparsevec_typmod_in(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
|
||||||
|
int32 *tl;
|
||||||
|
int n;
|
||||||
|
|
||||||
|
tl = ArrayGetIntegerTypmods(ta, &n);
|
||||||
|
|
||||||
|
if (n != 1)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
|
errmsg("invalid type modifier")));
|
||||||
|
|
||||||
|
if (*tl < 1)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
|
errmsg("dimensions for type sparsevec must be at least 1")));
|
||||||
|
|
||||||
|
if (*tl > SPARSEVEC_MAX_DIM)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||||
|
errmsg("dimensions for type sparsevec cannot exceed %d", SPARSEVEC_MAX_DIM)));
|
||||||
|
|
||||||
|
PG_RETURN_INT32(*tl);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert external binary representation to internal representation
|
||||||
|
*/
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_recv);
|
||||||
|
Datum
|
||||||
|
sparsevec_recv(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
|
||||||
|
int32 typmod = PG_GETARG_INT32(2);
|
||||||
|
SparseVector *result;
|
||||||
|
int32 dim;
|
||||||
|
int32 nnz;
|
||||||
|
int32 unused;
|
||||||
|
float *values;
|
||||||
|
|
||||||
|
dim = pq_getmsgint(buf, sizeof(int32));
|
||||||
|
nnz = pq_getmsgint(buf, sizeof(int32));
|
||||||
|
unused = pq_getmsgint(buf, sizeof(int32));
|
||||||
|
|
||||||
|
CheckDim(dim);
|
||||||
|
CheckNnz(nnz, dim);
|
||||||
|
CheckExpectedDim(typmod, dim);
|
||||||
|
|
||||||
|
if (unused != 0)
|
||||||
|
ereport(ERROR,
|
||||||
|
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||||
|
errmsg("expected unused to be 0, not %d", unused)));
|
||||||
|
|
||||||
|
result = InitSparseVector(dim, nnz);
|
||||||
|
values = SPARSEVEC_VALUES(result);
|
||||||
|
|
||||||
|
for (int i = 0; i < nnz; i++)
|
||||||
|
{
|
||||||
|
result->indices[i] = pq_getmsgint(buf, sizeof(int32));
|
||||||
|
CheckIndex(result->indices, i, dim);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = 0; i < nnz; i++)
|
||||||
|
{
|
||||||
|
values[i] = pq_getmsgfloat4(buf);
|
||||||
|
CheckElement(values[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert internal representation to the external binary representation
|
||||||
|
*/
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_send);
|
||||||
|
Datum
|
||||||
|
sparsevec_send(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
SparseVector *svec = PG_GETARG_SPARSEVEC_P(0);
|
||||||
|
float *values = SPARSEVEC_VALUES(svec);
|
||||||
|
StringInfoData buf;
|
||||||
|
|
||||||
|
pq_begintypsend(&buf);
|
||||||
|
pq_sendint(&buf, svec->dim, sizeof(int32));
|
||||||
|
pq_sendint(&buf, svec->nnz, sizeof(int32));
|
||||||
|
pq_sendint(&buf, svec->unused, sizeof(int32));
|
||||||
|
for (int i = 0; i < svec->nnz; i++)
|
||||||
|
pq_sendint(&buf, svec->indices[i], sizeof(int32));
|
||||||
|
for (int i = 0; i < svec->nnz; i++)
|
||||||
|
pq_sendfloat4(&buf, values[i]);
|
||||||
|
|
||||||
|
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert sparse vector to sparse vector
|
||||||
|
* This is needed to check the type modifier
|
||||||
|
*/
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec);
|
||||||
|
Datum
|
||||||
|
sparsevec(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
SparseVector *svec = PG_GETARG_SPARSEVEC_P(0);
|
||||||
|
int32 typmod = PG_GETARG_INT32(1);
|
||||||
|
|
||||||
|
CheckExpectedDim(typmod, svec->dim);
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(svec);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert dense vector to sparse vector
|
||||||
|
*/
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_to_sparsevec);
|
||||||
|
Datum
|
||||||
|
vector_to_sparsevec(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
Vector *vec = PG_GETARG_VECTOR_P(0);
|
||||||
|
int32 typmod = PG_GETARG_INT32(1);
|
||||||
|
SparseVector *result;
|
||||||
|
int dim = vec->dim;
|
||||||
|
int nnz = 0;
|
||||||
|
float *values;
|
||||||
|
int j = 0;
|
||||||
|
|
||||||
|
CheckDim(dim);
|
||||||
|
CheckExpectedDim(typmod, dim);
|
||||||
|
|
||||||
|
for (int i = 0; i < dim; i++)
|
||||||
|
{
|
||||||
|
if (vec->x[i] != 0)
|
||||||
|
nnz++;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = InitSparseVector(dim, nnz);
|
||||||
|
values = SPARSEVEC_VALUES(result);
|
||||||
|
for (int i = 0; i < dim; i++)
|
||||||
|
{
|
||||||
|
if (vec->x[i] != 0)
|
||||||
|
{
|
||||||
|
/* Safety check */
|
||||||
|
if (j == nnz)
|
||||||
|
elog(ERROR, "safety check failed");
|
||||||
|
|
||||||
|
result->indices[j] = i;
|
||||||
|
values[j] = vec->x[i];
|
||||||
|
j++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(result);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the L2 squared distance between sparse vectors
|
||||||
|
*/
|
||||||
|
static double
|
||||||
|
l2_distance_squared_internal(SparseVector * a, SparseVector * b)
|
||||||
|
{
|
||||||
|
float *ax = SPARSEVEC_VALUES(a);
|
||||||
|
float *bx = SPARSEVEC_VALUES(b);
|
||||||
|
double distance = 0.0;
|
||||||
|
int bpos = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < a->nnz; i++)
|
||||||
|
{
|
||||||
|
int ai = a->indices[i];
|
||||||
|
int bi = -1;
|
||||||
|
|
||||||
|
for (int j = bpos; j < b->nnz; j++)
|
||||||
|
{
|
||||||
|
bi = b->indices[j];
|
||||||
|
|
||||||
|
if (ai == bi)
|
||||||
|
{
|
||||||
|
double diff = ax[i] - bx[j];
|
||||||
|
|
||||||
|
distance += diff * diff;
|
||||||
|
}
|
||||||
|
else if (ai > bi)
|
||||||
|
distance += bx[j] * bx[j];
|
||||||
|
|
||||||
|
/* Update start for next iteration */
|
||||||
|
if (ai >= bi)
|
||||||
|
bpos = j + 1;
|
||||||
|
|
||||||
|
/* Found or passed it */
|
||||||
|
if (bi >= ai)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ai != bi)
|
||||||
|
distance += ax[i] * ax[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int j = bpos; j < b->nnz; j++)
|
||||||
|
distance += bx[j] * bx[j];
|
||||||
|
|
||||||
|
return distance;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the L2 distance between sparse vectors
|
||||||
|
*/
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_l2_distance);
|
||||||
|
Datum
|
||||||
|
sparsevec_l2_distance(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
SparseVector *a = PG_GETARG_SPARSEVEC_P(0);
|
||||||
|
SparseVector *b = PG_GETARG_SPARSEVEC_P(1);
|
||||||
|
|
||||||
|
CheckDims(a, b);
|
||||||
|
|
||||||
|
PG_RETURN_FLOAT8(sqrt(l2_distance_squared_internal(a, b)));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the L2 squared distance between sparse vectors
|
||||||
|
* This saves a sqrt calculation
|
||||||
|
*/
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_l2_squared_distance);
|
||||||
|
Datum
|
||||||
|
sparsevec_l2_squared_distance(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
SparseVector *a = PG_GETARG_SPARSEVEC_P(0);
|
||||||
|
SparseVector *b = PG_GETARG_SPARSEVEC_P(1);
|
||||||
|
|
||||||
|
CheckDims(a, b);
|
||||||
|
|
||||||
|
PG_RETURN_FLOAT8(l2_distance_squared_internal(a, b));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the inner product of two sparse vectors
|
||||||
|
*/
|
||||||
|
static double
|
||||||
|
inner_product_internal(SparseVector * a, SparseVector * b)
|
||||||
|
{
|
||||||
|
float *ax = SPARSEVEC_VALUES(a);
|
||||||
|
float *bx = SPARSEVEC_VALUES(b);
|
||||||
|
double distance = 0.0;
|
||||||
|
int bpos = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < a->nnz; i++)
|
||||||
|
{
|
||||||
|
int ai = a->indices[i];
|
||||||
|
|
||||||
|
for (int j = bpos; j < b->nnz; j++)
|
||||||
|
{
|
||||||
|
int bi = b->indices[j];
|
||||||
|
|
||||||
|
/* Only update when the same index */
|
||||||
|
if (ai == bi)
|
||||||
|
distance += ax[i] * bx[j];
|
||||||
|
|
||||||
|
/* Update start for next iteration */
|
||||||
|
if (ai >= bi)
|
||||||
|
bpos = j + 1;
|
||||||
|
|
||||||
|
/* Found or passed it */
|
||||||
|
if (bi >= ai)
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return distance;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the inner product of two sparse vectors
|
||||||
|
*/
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_inner_product);
|
||||||
|
Datum
|
||||||
|
sparsevec_inner_product(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
SparseVector *a = PG_GETARG_SPARSEVEC_P(0);
|
||||||
|
SparseVector *b = PG_GETARG_SPARSEVEC_P(1);
|
||||||
|
|
||||||
|
CheckDims(a, b);
|
||||||
|
|
||||||
|
PG_RETURN_FLOAT8(inner_product_internal(a, b));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the negative inner product of two sparse vectors
|
||||||
|
*/
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_negative_inner_product);
|
||||||
|
Datum
|
||||||
|
sparsevec_negative_inner_product(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
SparseVector *a = PG_GETARG_SPARSEVEC_P(0);
|
||||||
|
SparseVector *b = PG_GETARG_SPARSEVEC_P(1);
|
||||||
|
|
||||||
|
CheckDims(a, b);
|
||||||
|
|
||||||
|
PG_RETURN_FLOAT8(-inner_product_internal(a, b));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the cosine distance between two sparse vectors
|
||||||
|
*/
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_cosine_distance);
|
||||||
|
Datum
|
||||||
|
sparsevec_cosine_distance(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
SparseVector *a = PG_GETARG_SPARSEVEC_P(0);
|
||||||
|
SparseVector *b = PG_GETARG_SPARSEVEC_P(1);
|
||||||
|
float *ax = SPARSEVEC_VALUES(a);
|
||||||
|
float *bx = SPARSEVEC_VALUES(b);
|
||||||
|
float norma = 0.0;
|
||||||
|
float normb = 0.0;
|
||||||
|
double similarity;
|
||||||
|
|
||||||
|
CheckDims(a, b);
|
||||||
|
|
||||||
|
similarity = inner_product_internal(a, b);
|
||||||
|
|
||||||
|
/* Auto-vectorized */
|
||||||
|
for (int i = 0; i < a->nnz; i++)
|
||||||
|
norma += ax[i] * ax[i];
|
||||||
|
|
||||||
|
/* Auto-vectorized */
|
||||||
|
for (int i = 0; i < b->nnz; i++)
|
||||||
|
normb += bx[i] * bx[i];
|
||||||
|
|
||||||
|
/* Use sqrt(a * b) over sqrt(a) * sqrt(b) */
|
||||||
|
similarity /= sqrt((double) norma * (double) normb);
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
/* /fp:fast may not propagate NaN */
|
||||||
|
if (isnan(similarity))
|
||||||
|
PG_RETURN_FLOAT8(NAN);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Keep in range */
|
||||||
|
if (similarity > 1)
|
||||||
|
similarity = 1.0;
|
||||||
|
else if (similarity < -1)
|
||||||
|
similarity = -1.0;
|
||||||
|
|
||||||
|
PG_RETURN_FLOAT8(1.0 - similarity);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get the L2 norm of a sparse vector
|
||||||
|
*/
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_norm);
|
||||||
|
Datum
|
||||||
|
sparsevec_norm(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
SparseVector *a = PG_GETARG_SPARSEVEC_P(0);
|
||||||
|
float *ax = SPARSEVEC_VALUES(a);
|
||||||
|
double norm = 0.0;
|
||||||
|
|
||||||
|
/* Auto-vectorized */
|
||||||
|
for (int i = 0; i < a->nnz; i++)
|
||||||
|
norm += (double) ax[i] * (double) ax[i];
|
||||||
|
|
||||||
|
PG_RETURN_FLOAT8(sqrt(norm));
|
||||||
|
}
|
||||||
24
src/sparsevec.h
Normal file
24
src/sparsevec.h
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
#ifndef SPARSEVEC_H
|
||||||
|
#define SPARSEVEC_H
|
||||||
|
|
||||||
|
#define SPARSEVEC_MAX_DIM 100000
|
||||||
|
|
||||||
|
/* Ensure values are aligned */
|
||||||
|
#define SPARSEVEC_SIZE(_nnz) (offsetof(SparseVector, indices) + MAXALIGN((_nnz) * sizeof(int32)) + (_nnz * sizeof(float)))
|
||||||
|
#define SPARSEVEC_VALUES(x) ((float *) (((char *) (x)) + offsetof(SparseVector, indices) + MAXALIGN((x)->nnz * sizeof(int32))))
|
||||||
|
#define DatumGetSparseVector(x) ((SparseVector *) PG_DETOAST_DATUM(x))
|
||||||
|
#define PG_GETARG_SPARSEVEC_P(x) DatumGetSparseVector(PG_GETARG_DATUM(x))
|
||||||
|
#define PG_RETURN_SPARSEVEC_P(x) PG_RETURN_POINTER(x)
|
||||||
|
|
||||||
|
typedef struct SparseVector
|
||||||
|
{
|
||||||
|
int32 vl_len_; /* varlena header (do not touch directly!) */
|
||||||
|
int32 dim; /* number of dimensions */
|
||||||
|
int32 nnz;
|
||||||
|
int32 unused;
|
||||||
|
int32 indices[FLEXIBLE_ARRAY_MEMBER];
|
||||||
|
} SparseVector;
|
||||||
|
|
||||||
|
SparseVector *InitSparseVector(int dim, int nnz);
|
||||||
|
|
||||||
|
#endif
|
||||||
24
src/vector.c
24
src/vector.c
@@ -12,6 +12,7 @@
|
|||||||
#include "lib/stringinfo.h"
|
#include "lib/stringinfo.h"
|
||||||
#include "libpq/pqformat.h"
|
#include "libpq/pqformat.h"
|
||||||
#include "port.h" /* for strtof() */
|
#include "port.h" /* for strtof() */
|
||||||
|
#include "sparsevec.h"
|
||||||
#include "utils/array.h"
|
#include "utils/array.h"
|
||||||
#include "utils/builtins.h"
|
#include "utils/builtins.h"
|
||||||
#include "utils/float.h"
|
#include "utils/float.h"
|
||||||
@@ -1214,3 +1215,26 @@ vector_avg(PG_FUNCTION_ARGS)
|
|||||||
|
|
||||||
PG_RETURN_POINTER(result);
|
PG_RETURN_POINTER(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Convert sparse vector to dense vector
|
||||||
|
*/
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_to_vector);
|
||||||
|
Datum
|
||||||
|
sparsevec_to_vector(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
SparseVector *svec = PG_GETARG_SPARSEVEC_P(0);
|
||||||
|
int32 typmod = PG_GETARG_INT32(1);
|
||||||
|
Vector *result;
|
||||||
|
int dim = svec->dim;
|
||||||
|
float *values = SPARSEVEC_VALUES(svec);
|
||||||
|
|
||||||
|
CheckDim(dim);
|
||||||
|
CheckExpectedDim(typmod, dim);
|
||||||
|
|
||||||
|
result = InitVector(dim);
|
||||||
|
for (int i = 0; i < svec->nnz; i++)
|
||||||
|
result->x[svec->indices[i]] = values[i];
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(result);
|
||||||
|
}
|
||||||
|
|||||||
26
test/expected/hnsw_sparsevec_cosine.out
Normal file
26
test/expected/hnsw_sparsevec_cosine.out
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
SET enable_seqscan = off;
|
||||||
|
CREATE TABLE t (val sparsevec(3));
|
||||||
|
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
|
||||||
|
CREATE INDEX ON t USING hnsw (val sparsevec_cosine_ops);
|
||||||
|
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
|
||||||
|
SELECT * FROM t ORDER BY val <=> '{0:3,1:3,2:3}/3';
|
||||||
|
val
|
||||||
|
-----------------
|
||||||
|
{0:1,1:1,2:1}/3
|
||||||
|
{0:1,1:2,2:3}/3
|
||||||
|
{0:1,1:2,2:4}/3
|
||||||
|
(3 rows)
|
||||||
|
|
||||||
|
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '{}/3') t2;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
3
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::sparsevec)) t2;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
3
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
DROP TABLE t;
|
||||||
21
test/expected/hnsw_sparsevec_ip.out
Normal file
21
test/expected/hnsw_sparsevec_ip.out
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
SET enable_seqscan = off;
|
||||||
|
CREATE TABLE t (val sparsevec(3));
|
||||||
|
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
|
||||||
|
CREATE INDEX ON t USING hnsw (val sparsevec_ip_ops);
|
||||||
|
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
|
||||||
|
SELECT * FROM t ORDER BY val <#> '{0:3,1:3,2:3}/3';
|
||||||
|
val
|
||||||
|
-----------------
|
||||||
|
{0:1,1:2,2:4}/3
|
||||||
|
{0:1,1:2,2:3}/3
|
||||||
|
{0:1,1:1,2:1}/3
|
||||||
|
{}/3
|
||||||
|
(4 rows)
|
||||||
|
|
||||||
|
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::sparsevec)) t2;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
4
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
DROP TABLE t;
|
||||||
43
test/expected/hnsw_sparsevec_l2.out
Normal file
43
test/expected/hnsw_sparsevec_l2.out
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
SET enable_seqscan = off;
|
||||||
|
CREATE TABLE t (val sparsevec(3));
|
||||||
|
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
|
||||||
|
CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops);
|
||||||
|
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
|
||||||
|
SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3';
|
||||||
|
val
|
||||||
|
-----------------
|
||||||
|
{0:1,1:2,2:3}/3
|
||||||
|
{0:1,1:2,2:4}/3
|
||||||
|
{0:1,1:1,2:1}/3
|
||||||
|
{}/3
|
||||||
|
(4 rows)
|
||||||
|
|
||||||
|
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::sparsevec)) t2;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
4
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT COUNT(*) FROM t;
|
||||||
|
count
|
||||||
|
-------
|
||||||
|
5
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
TRUNCATE t;
|
||||||
|
SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3';
|
||||||
|
val
|
||||||
|
-----
|
||||||
|
(0 rows)
|
||||||
|
|
||||||
|
DROP TABLE t;
|
||||||
|
-- TODO move
|
||||||
|
CREATE TABLE t (val sparsevec(1001));
|
||||||
|
INSERT INTO t (val) VALUES (array_fill(1, ARRAY[1001])::vector::sparsevec);
|
||||||
|
CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops);
|
||||||
|
ERROR: sparsevec cannot have more than 1000 non-zero elements for hnsw index
|
||||||
|
TRUNCATE t;
|
||||||
|
CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops);
|
||||||
|
INSERT INTO t (val) VALUES (array_fill(1, ARRAY[1001])::vector::sparsevec);
|
||||||
|
ERROR: sparsevec cannot have more than 1000 non-zero elements for hnsw index
|
||||||
|
DROP TABLE t;
|
||||||
62
test/expected/sparsevec_functions.out
Normal file
62
test/expected/sparsevec_functions.out
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
SELECT l2_distance('{}/2'::sparsevec, '{0:3,1:4}/2');
|
||||||
|
l2_distance
|
||||||
|
-------------
|
||||||
|
5
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT l2_distance('{}/2'::sparsevec, '{1:1}/2');
|
||||||
|
l2_distance
|
||||||
|
-------------
|
||||||
|
1
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT '{}/2'::sparsevec <-> '{0:3,1:4}/2';
|
||||||
|
?column?
|
||||||
|
----------
|
||||||
|
5
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT inner_product('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2');
|
||||||
|
inner_product
|
||||||
|
---------------
|
||||||
|
10
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT sparsevec_negative_inner_product('{0:1,1:2}/2', '{0:2,1:4}/2');
|
||||||
|
sparsevec_negative_inner_product
|
||||||
|
----------------------------------
|
||||||
|
-10
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2');
|
||||||
|
cosine_distance
|
||||||
|
-----------------
|
||||||
|
0
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{}/2');
|
||||||
|
cosine_distance
|
||||||
|
-----------------
|
||||||
|
NaN
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT cosine_distance('{0:1,1:1}/2'::sparsevec, '{0:-1,1:-1}/2');
|
||||||
|
cosine_distance
|
||||||
|
-----------------
|
||||||
|
2
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT cosine_distance('{0:1}/2'::sparsevec, '{1:2}/2');
|
||||||
|
cosine_distance
|
||||||
|
-----------------
|
||||||
|
1
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT cosine_distance('{}/1'::sparsevec, '{}/1');
|
||||||
|
cosine_distance
|
||||||
|
-----------------
|
||||||
|
NaN
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT cosine_distance('{0:1}/2'::sparsevec, '{0:1}/3');
|
||||||
|
ERROR: different sparsevec dimensions 2 and 3
|
||||||
62
test/expected/sparsevec_input.out
Normal file
62
test/expected/sparsevec_input.out
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
SELECT '{0:1.5,2:3.5}/5'::sparsevec;
|
||||||
|
sparsevec
|
||||||
|
-----------------
|
||||||
|
{0:1.5,2:3.5}/5
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector;
|
||||||
|
vector
|
||||||
|
-----------------
|
||||||
|
[1.5,0,3.5,0,0]
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(5);
|
||||||
|
vector
|
||||||
|
-----------------
|
||||||
|
[1.5,0,3.5,0,0]
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(4);
|
||||||
|
ERROR: expected 4 dimensions, not 5
|
||||||
|
SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec;
|
||||||
|
sparsevec
|
||||||
|
-----------------
|
||||||
|
{1:1.5,3:3.5}/5
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT '{0:0,1:1,2:0}/3'::sparsevec;
|
||||||
|
sparsevec
|
||||||
|
-----------
|
||||||
|
{1:1}/3
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT '{1:1,0:1}/2'::sparsevec;
|
||||||
|
ERROR: indexes must be in ascending order
|
||||||
|
LINE 1: SELECT '{1:1,0:1}/2'::sparsevec;
|
||||||
|
^
|
||||||
|
SELECT '{}/5'::sparsevec;
|
||||||
|
sparsevec
|
||||||
|
-----------
|
||||||
|
{}/5
|
||||||
|
(1 row)
|
||||||
|
|
||||||
|
SELECT '{}/-1'::sparsevec;
|
||||||
|
ERROR: sparsevec must have at least 1 dimension
|
||||||
|
LINE 1: SELECT '{}/-1'::sparsevec;
|
||||||
|
^
|
||||||
|
SELECT '{}/100001'::sparsevec;
|
||||||
|
ERROR: sparsevec cannot have more than 100000 dimensions
|
||||||
|
LINE 1: SELECT '{}/100001'::sparsevec;
|
||||||
|
^
|
||||||
|
SELECT '{}/16001'::sparsevec::vector;
|
||||||
|
ERROR: vector cannot have more than 16000 dimensions
|
||||||
|
SELECT '{-1:1}/1'::sparsevec;
|
||||||
|
ERROR: index "-1" is out of range for type sparsevec
|
||||||
|
LINE 1: SELECT '{-1:1}/1'::sparsevec;
|
||||||
|
^
|
||||||
|
SELECT '{1:1}/1'::sparsevec;
|
||||||
|
ERROR: index must be less than dimensions
|
||||||
|
LINE 1: SELECT '{1:1}/1'::sparsevec;
|
||||||
|
^
|
||||||
|
SELECT '{}/1'::sparsevec(2);
|
||||||
|
ERROR: expected 2 dimensions, not 1
|
||||||
13
test/sql/hnsw_sparsevec_cosine.sql
Normal file
13
test/sql/hnsw_sparsevec_cosine.sql
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
SET enable_seqscan = off;
|
||||||
|
|
||||||
|
CREATE TABLE t (val sparsevec(3));
|
||||||
|
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
|
||||||
|
CREATE INDEX ON t USING hnsw (val sparsevec_cosine_ops);
|
||||||
|
|
||||||
|
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
|
||||||
|
|
||||||
|
SELECT * FROM t ORDER BY val <=> '{0:3,1:3,2:3}/3';
|
||||||
|
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '{}/3') t2;
|
||||||
|
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::sparsevec)) t2;
|
||||||
|
|
||||||
|
DROP TABLE t;
|
||||||
12
test/sql/hnsw_sparsevec_ip.sql
Normal file
12
test/sql/hnsw_sparsevec_ip.sql
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
SET enable_seqscan = off;
|
||||||
|
|
||||||
|
CREATE TABLE t (val sparsevec(3));
|
||||||
|
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
|
||||||
|
CREATE INDEX ON t USING hnsw (val sparsevec_ip_ops);
|
||||||
|
|
||||||
|
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
|
||||||
|
|
||||||
|
SELECT * FROM t ORDER BY val <#> '{0:3,1:3,2:3}/3';
|
||||||
|
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::sparsevec)) t2;
|
||||||
|
|
||||||
|
DROP TABLE t;
|
||||||
25
test/sql/hnsw_sparsevec_l2.sql
Normal file
25
test/sql/hnsw_sparsevec_l2.sql
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
SET enable_seqscan = off;
|
||||||
|
|
||||||
|
CREATE TABLE t (val sparsevec(3));
|
||||||
|
INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL);
|
||||||
|
CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops);
|
||||||
|
|
||||||
|
INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3');
|
||||||
|
|
||||||
|
SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3';
|
||||||
|
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::sparsevec)) t2;
|
||||||
|
SELECT COUNT(*) FROM t;
|
||||||
|
|
||||||
|
TRUNCATE t;
|
||||||
|
SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3';
|
||||||
|
|
||||||
|
DROP TABLE t;
|
||||||
|
|
||||||
|
-- TODO move
|
||||||
|
CREATE TABLE t (val sparsevec(1001));
|
||||||
|
INSERT INTO t (val) VALUES (array_fill(1, ARRAY[1001])::vector::sparsevec);
|
||||||
|
CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops);
|
||||||
|
TRUNCATE t;
|
||||||
|
CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops);
|
||||||
|
INSERT INTO t (val) VALUES (array_fill(1, ARRAY[1001])::vector::sparsevec);
|
||||||
|
DROP TABLE t;
|
||||||
13
test/sql/sparsevec_functions.sql
Normal file
13
test/sql/sparsevec_functions.sql
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
SELECT l2_distance('{}/2'::sparsevec, '{0:3,1:4}/2');
|
||||||
|
SELECT l2_distance('{}/2'::sparsevec, '{1:1}/2');
|
||||||
|
SELECT '{}/2'::sparsevec <-> '{0:3,1:4}/2';
|
||||||
|
|
||||||
|
SELECT inner_product('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2');
|
||||||
|
SELECT sparsevec_negative_inner_product('{0:1,1:2}/2', '{0:2,1:4}/2');
|
||||||
|
|
||||||
|
SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2');
|
||||||
|
SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{}/2');
|
||||||
|
SELECT cosine_distance('{0:1,1:1}/2'::sparsevec, '{0:-1,1:-1}/2');
|
||||||
|
SELECT cosine_distance('{0:1}/2'::sparsevec, '{1:2}/2');
|
||||||
|
SELECT cosine_distance('{}/1'::sparsevec, '{}/1');
|
||||||
|
SELECT cosine_distance('{0:1}/2'::sparsevec, '{0:1}/3');
|
||||||
19
test/sql/sparsevec_input.sql
Normal file
19
test/sql/sparsevec_input.sql
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
SELECT '{0:1.5,2:3.5}/5'::sparsevec;
|
||||||
|
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector;
|
||||||
|
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(5);
|
||||||
|
SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(4);
|
||||||
|
SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec;
|
||||||
|
|
||||||
|
SELECT '{0:0,1:1,2:0}/3'::sparsevec;
|
||||||
|
|
||||||
|
SELECT '{1:1,0:1}/2'::sparsevec;
|
||||||
|
|
||||||
|
SELECT '{}/5'::sparsevec;
|
||||||
|
SELECT '{}/-1'::sparsevec;
|
||||||
|
SELECT '{}/100001'::sparsevec;
|
||||||
|
SELECT '{}/16001'::sparsevec::vector;
|
||||||
|
|
||||||
|
SELECT '{-1:1}/1'::sparsevec;
|
||||||
|
SELECT '{1:1}/1'::sparsevec;
|
||||||
|
|
||||||
|
SELECT '{}/1'::sparsevec(2);
|
||||||
Reference in New Issue
Block a user