mirror of
https://github.com/pgvector/pgvector.git
synced 2026-07-01 10:11:20 +08:00
Added support for sparse vectors
This commit is contained in:
@@ -1,3 +1,7 @@
|
||||
## 0.6.0 (unreleased)
|
||||
|
||||
- Added support for sparse vectors
|
||||
|
||||
## 0.5.1 (2023-10-10)
|
||||
|
||||
- Improved performance of HNSW index builds
|
||||
|
||||
4
Makefile
4
Makefile
@@ -3,8 +3,8 @@ EXTVERSION = 0.5.1
|
||||
|
||||
MODULE_big = vector
|
||||
DATA = $(wildcard sql/*--*.sql)
|
||||
OBJS = src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/vector.o
|
||||
HEADERS = src/vector.h
|
||||
OBJS = src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/svector.o src/vector.o
|
||||
HEADERS = src/svector.h src/vector.h
|
||||
|
||||
TESTS = $(wildcard test/sql/*.sql)
|
||||
REGRESS = $(patsubst test/sql/%.sql,%,$(TESTS))
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
EXTENSION = vector
|
||||
EXTVERSION = 0.5.1
|
||||
|
||||
OBJS = src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\vector.obj
|
||||
HEADERS = src\vector.h
|
||||
OBJS = src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\svector.obj src\vector.obj
|
||||
HEADERS = src\svector.h src\vector.h
|
||||
|
||||
REGRESS = btree cast copy functions input ivfflat_cosine ivfflat_ip ivfflat_l2 ivfflat_options ivfflat_unlogged
|
||||
REGRESS_OPTS = --inputdir=test --load-extension=$(EXTENSION)
|
||||
|
||||
20
README.md
20
README.md
@@ -369,6 +369,26 @@ To speed up queries with an IVFFlat index, increase the number of inverted lists
|
||||
CREATE INDEX ON items USING ivfflat (embedding vector_l2_ops) WITH (lists = 1000);
|
||||
```
|
||||
|
||||
## Sparse Vectors
|
||||
|
||||
Create a sparse vector column with 10 dimensions
|
||||
|
||||
```sql
|
||||
CREATE TABLE items (id bigserial PRIMARY KEY, embedding svector(10));
|
||||
```
|
||||
|
||||
Insert vectors
|
||||
|
||||
```sql
|
||||
INSERT INTO items (embedding) VALUES ('(0,1),(1,2),(2,3)|10|'), ('(0,4),(1,5),(4,6)|10|');
|
||||
```
|
||||
|
||||
Get the nearest neighbors by L2 distance
|
||||
|
||||
```sql
|
||||
SELECT * FROM items ORDER BY embedding <-> '(0,3),(1,1),(2,2)|10|' LIMIT 5;
|
||||
```
|
||||
|
||||
## Languages
|
||||
|
||||
Use pgvector from any language with a Postgres client. You can even generate and store vectors in one language and query them in another.
|
||||
|
||||
79
sql/vector--0.5.1--0.6.0.sql
Normal file
79
sql/vector--0.5.1--0.6.0.sql
Normal file
@@ -0,0 +1,79 @@
|
||||
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "ALTER EXTENSION vector UPDATE TO '0.5.2'" to load this file. \quit
|
||||
|
||||
CREATE TYPE svector;
|
||||
|
||||
CREATE FUNCTION svector_in(cstring, oid, integer) RETURNS svector
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION svector_out(svector) RETURNS cstring
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION svector_typmod_in(cstring[]) RETURNS integer
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION svector_recv(internal, oid, integer) RETURNS svector
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION svector_send(svector) RETURNS bytea
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE TYPE svector (
|
||||
INPUT = svector_in,
|
||||
OUTPUT = svector_out,
|
||||
TYPMOD_IN = svector_typmod_in,
|
||||
RECEIVE = svector_recv,
|
||||
SEND = svector_send,
|
||||
STORAGE = external
|
||||
);
|
||||
|
||||
CREATE FUNCTION l2_distance(svector, svector) RETURNS float8
|
||||
AS 'MODULE_PATHNAME', 'svector_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION inner_product(svector, svector) RETURNS float8
|
||||
AS 'MODULE_PATHNAME', 'svector_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION cosine_distance(svector, svector) RETURNS float8
|
||||
AS 'MODULE_PATHNAME', 'svector_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION jaccard_distance(svector, svector) RETURNS float8
|
||||
AS 'MODULE_PATHNAME', 'svector_jaccard_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION svector_l2_squared_distance(svector, svector) RETURNS float8
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION svector_negative_inner_product(svector, svector) RETURNS float8
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION svector(svector, integer, boolean) RETURNS svector
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION vector_to_svector(vector, integer, boolean) RETURNS svector
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION svector_to_vector(svector, integer, boolean) RETURNS vector
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE CAST (svector AS svector)
|
||||
WITH FUNCTION svector(svector, integer, boolean) AS IMPLICIT;
|
||||
|
||||
CREATE CAST (svector AS vector)
|
||||
WITH FUNCTION svector_to_vector(svector, integer, boolean) AS IMPLICIT;
|
||||
|
||||
CREATE CAST (vector AS svector)
|
||||
WITH FUNCTION vector_to_svector(vector, integer, boolean) AS IMPLICIT;
|
||||
|
||||
CREATE OPERATOR <-> (
|
||||
LEFTARG = svector, RIGHTARG = svector, PROCEDURE = l2_distance,
|
||||
COMMUTATOR = '<->'
|
||||
);
|
||||
|
||||
CREATE OPERATOR <#> (
|
||||
LEFTARG = svector, RIGHTARG = svector, PROCEDURE = svector_negative_inner_product,
|
||||
COMMUTATOR = '<#>'
|
||||
);
|
||||
|
||||
CREATE OPERATOR <=> (
|
||||
LEFTARG = svector, RIGHTARG = svector, PROCEDURE = cosine_distance,
|
||||
COMMUTATOR = '<=>'
|
||||
);
|
||||
@@ -290,3 +290,92 @@ CREATE OPERATOR CLASS vector_cosine_ops
|
||||
OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops,
|
||||
FUNCTION 1 vector_negative_inner_product(vector, vector),
|
||||
FUNCTION 2 vector_norm(vector);
|
||||
|
||||
--- svector type
|
||||
|
||||
CREATE TYPE svector;
|
||||
|
||||
CREATE FUNCTION svector_in(cstring, oid, integer) RETURNS svector
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION svector_out(svector) RETURNS cstring
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION svector_typmod_in(cstring[]) RETURNS integer
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION svector_recv(internal, oid, integer) RETURNS svector
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION svector_send(svector) RETURNS bytea
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE TYPE svector (
|
||||
INPUT = svector_in,
|
||||
OUTPUT = svector_out,
|
||||
TYPMOD_IN = svector_typmod_in,
|
||||
RECEIVE = svector_recv,
|
||||
SEND = svector_send,
|
||||
STORAGE = external
|
||||
);
|
||||
|
||||
-- svector functions
|
||||
|
||||
CREATE FUNCTION l2_distance(svector, svector) RETURNS float8
|
||||
AS 'MODULE_PATHNAME', 'svector_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION inner_product(svector, svector) RETURNS float8
|
||||
AS 'MODULE_PATHNAME', 'svector_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION cosine_distance(svector, svector) RETURNS float8
|
||||
AS 'MODULE_PATHNAME', 'svector_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION jaccard_distance(svector, svector) RETURNS float8
|
||||
AS 'MODULE_PATHNAME', 'svector_jaccard_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
-- svector private functions
|
||||
|
||||
CREATE FUNCTION svector_l2_squared_distance(svector, svector) RETURNS float8
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION svector_negative_inner_product(svector, svector) RETURNS float8
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
-- svector cast functions
|
||||
|
||||
CREATE FUNCTION svector(svector, integer, boolean) RETURNS svector
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION vector_to_svector(vector, integer, boolean) RETURNS svector
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION svector_to_vector(svector, integer, boolean) RETURNS vector
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
-- svector casts
|
||||
|
||||
CREATE CAST (svector AS svector)
|
||||
WITH FUNCTION svector(svector, integer, boolean) AS IMPLICIT;
|
||||
|
||||
CREATE CAST (svector AS vector)
|
||||
WITH FUNCTION svector_to_vector(svector, integer, boolean) AS IMPLICIT;
|
||||
|
||||
CREATE CAST (vector AS svector)
|
||||
WITH FUNCTION vector_to_svector(vector, integer, boolean) AS IMPLICIT;
|
||||
|
||||
-- svector operators
|
||||
|
||||
CREATE OPERATOR <-> (
|
||||
LEFTARG = svector, RIGHTARG = svector, PROCEDURE = l2_distance,
|
||||
COMMUTATOR = '<->'
|
||||
);
|
||||
|
||||
CREATE OPERATOR <#> (
|
||||
LEFTARG = svector, RIGHTARG = svector, PROCEDURE = svector_negative_inner_product,
|
||||
COMMUTATOR = '<#>'
|
||||
);
|
||||
|
||||
CREATE OPERATOR <=> (
|
||||
LEFTARG = svector, RIGHTARG = svector, PROCEDURE = cosine_distance,
|
||||
COMMUTATOR = '<=>'
|
||||
);
|
||||
|
||||
704
src/svector.c
Normal file
704
src/svector.c
Normal file
@@ -0,0 +1,704 @@
|
||||
#include "postgres.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include "fmgr.h"
|
||||
#include "libpq/pqformat.h"
|
||||
#include "svector.h"
|
||||
#include "utils/array.h"
|
||||
#include "vector.h"
|
||||
|
||||
#if PG_VERSION_NUM >= 120000
|
||||
#include "common/shortest_dec.h"
|
||||
#include "utils/float.h"
|
||||
#else
|
||||
#include <float.h>
|
||||
#include "utils/builtins.h"
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Ensure same dimensions
|
||||
*/
|
||||
static inline void
|
||||
CheckDims(SVector * a, SVector * b)
|
||||
{
|
||||
if (a->dim != b->dim)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("different svector dimensions %d and %d", a->dim, b->dim)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure expected dimensions
|
||||
*/
|
||||
static inline void
|
||||
CheckExpectedDim(int32 typmod, int dim)
|
||||
{
|
||||
if (typmod != -1 && typmod != dim)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("expected %d dimensions, not %d", typmod, dim)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure valid dimensions
|
||||
*/
|
||||
static inline void
|
||||
CheckDim(int dim)
|
||||
{
|
||||
if (dim < 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("svector must have at least 1 dimension")));
|
||||
|
||||
if (dim > SVECTOR_MAX_DIM)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||
errmsg("svector cannot have more than %d dimensions", SVECTOR_MAX_DIM)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure valid nnz
|
||||
*/
|
||||
static inline void
|
||||
CheckNnz(int nnz, int dim)
|
||||
{
|
||||
if (nnz < 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("svector must have at least one element")));
|
||||
|
||||
if (nnz > dim)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
|
||||
errmsg("svector cannot have more elements than dimensions")));
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure valid index
|
||||
*/
|
||||
static inline void
|
||||
CheckIndex(int32 *indices, int i, int dim)
|
||||
{
|
||||
int32 index = indices[i];
|
||||
|
||||
if (index < 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("index must not be negative")));
|
||||
|
||||
if (index >= dim)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("index must be less than dimensions")));
|
||||
|
||||
if (i > 0)
|
||||
{
|
||||
if (index < indices[i - 1])
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("indexes must be in ascending order")));
|
||||
|
||||
if (index == indices[i - 1])
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("indexes must not contain duplicates")));
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure finite element
|
||||
*/
|
||||
static inline void
|
||||
CheckElement(float value)
|
||||
{
|
||||
if (isnan(value))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("NaN not allowed in svector")));
|
||||
|
||||
if (isinf(value))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("infinite value not allowed in svector")));
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate and initialize a new sparse vector
|
||||
*/
|
||||
SVector *
|
||||
InitSVector(int dim, int nnz)
|
||||
{
|
||||
SVector *result;
|
||||
int size;
|
||||
|
||||
size = SVECTOR_SIZE(nnz);
|
||||
result = (SVector *) palloc0(size);
|
||||
SET_VARSIZE(result, size);
|
||||
result->dim = dim;
|
||||
result->nnz = nnz;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert textual representation to internal representation
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(svector_in);
|
||||
Datum
|
||||
svector_in(PG_FUNCTION_ARGS)
|
||||
{
|
||||
char *str = PG_GETARG_CSTRING(0);
|
||||
int32 typmod = PG_GETARG_INT32(2);
|
||||
int dim;
|
||||
char *pt;
|
||||
SVector *result;
|
||||
float *rvalues;
|
||||
char *lit = pstrdup(str);
|
||||
int n;
|
||||
int32 *indices;
|
||||
float *values;
|
||||
int index;
|
||||
float value;
|
||||
int maxNnz;
|
||||
int nnz = 0;
|
||||
|
||||
/* TODO Improve code and checks after deciding on format */
|
||||
|
||||
maxNnz = 1;
|
||||
pt = str;
|
||||
while (*pt != '\0')
|
||||
{
|
||||
if (*pt == ',')
|
||||
maxNnz++;
|
||||
|
||||
pt++;
|
||||
}
|
||||
maxNnz /= 2;
|
||||
|
||||
indices = palloc(maxNnz * sizeof(int32));
|
||||
values = palloc(maxNnz * sizeof(float));
|
||||
|
||||
while (sscanf(str, "(%d,%f)%n", &index, &value, &n) == 2)
|
||||
{
|
||||
/* TODO Better error */
|
||||
if (nnz == maxNnz)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||
errmsg("ran out of buffer: \"%s\"", lit)));
|
||||
|
||||
indices[nnz] = index;
|
||||
values[nnz] = value;
|
||||
nnz++;
|
||||
|
||||
str += n;
|
||||
|
||||
if (*str == ',')
|
||||
str++;
|
||||
else if (*str == '|')
|
||||
break;
|
||||
else
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||
errmsg("malformed svector literal: \"%s\"", lit)));
|
||||
}
|
||||
|
||||
if (sscanf(str, "|%d|%n", &dim, &n) != 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||
errmsg("malformed svector literal: \"%s\"", lit)));
|
||||
|
||||
str += n;
|
||||
|
||||
if (*str != '\0')
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
|
||||
errmsg("malformed svector literal: \"%s\"", lit),
|
||||
errdetail("Junk after closing pipe.")));
|
||||
|
||||
pfree(lit);
|
||||
|
||||
CheckDim(dim);
|
||||
CheckExpectedDim(typmod, dim);
|
||||
|
||||
result = InitSVector(dim, nnz);
|
||||
rvalues = SVECTOR_VALUES(result);
|
||||
for (int i = 0; i < nnz; i++)
|
||||
{
|
||||
result->indices[i] = indices[i];
|
||||
rvalues[i] = values[i];
|
||||
|
||||
CheckIndex(result->indices, i, dim);
|
||||
CheckElement(rvalues[i]);
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert internal representation to textual representation
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(svector_out);
|
||||
Datum
|
||||
svector_out(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SVector *svector = PG_GETARG_SVECTOR_P(0);
|
||||
float *values = SVECTOR_VALUES(svector);
|
||||
char *buf;
|
||||
char *ptr;
|
||||
int n;
|
||||
|
||||
/* TODO Improve code after deciding on format */
|
||||
|
||||
#if PG_VERSION_NUM < 120000
|
||||
int ndig = FLT_DIG + extra_float_digits;
|
||||
|
||||
if (ndig < 1)
|
||||
ndig = 1;
|
||||
|
||||
#define FLOAT_SHORTEST_DECIMAL_LEN (ndig + 10)
|
||||
#endif
|
||||
|
||||
/* TODO Move */
|
||||
#define APPEND_CHAR(ptr, ch) (*(ptr)++ = (ch))
|
||||
|
||||
/* TODO Improve */
|
||||
buf = (char *) palloc((FLOAT_SHORTEST_DECIMAL_LEN + 20) * svector->nnz + 20);
|
||||
ptr = buf;
|
||||
|
||||
for (int i = 0; i < svector->nnz; i++)
|
||||
{
|
||||
if (i > 0)
|
||||
APPEND_CHAR(ptr, ',');
|
||||
|
||||
n = sprintf(ptr, "(%d,", svector->indices[i]);
|
||||
ptr += n;
|
||||
|
||||
#if PG_VERSION_NUM >= 120000
|
||||
n = float_to_shortest_decimal_bufn(values[i], ptr);
|
||||
#else
|
||||
n = sprintf(ptr, "%.*g", ndig, values[i]);
|
||||
#endif
|
||||
ptr += n;
|
||||
|
||||
APPEND_CHAR(ptr, ')');
|
||||
}
|
||||
|
||||
n = sprintf(ptr, "|%d|", svector->dim);
|
||||
ptr += n;
|
||||
|
||||
APPEND_CHAR(ptr, '\0');
|
||||
|
||||
PG_FREE_IF_COPY(svector, 0);
|
||||
PG_RETURN_CSTRING(buf);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert type modifier
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(svector_typmod_in);
|
||||
Datum
|
||||
svector_typmod_in(PG_FUNCTION_ARGS)
|
||||
{
|
||||
ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
|
||||
int32 *tl;
|
||||
int n;
|
||||
|
||||
tl = ArrayGetIntegerTypmods(ta, &n);
|
||||
|
||||
if (n != 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("invalid type modifier")));
|
||||
|
||||
if (*tl < 1)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("dimensions for type svector must be at least 1")));
|
||||
|
||||
if (*tl > SVECTOR_MAX_DIM)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
|
||||
errmsg("dimensions for type svector cannot exceed %d", SVECTOR_MAX_DIM)));
|
||||
|
||||
PG_RETURN_INT32(*tl);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert external binary representation to internal representation
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(svector_recv);
|
||||
Datum
|
||||
svector_recv(PG_FUNCTION_ARGS)
|
||||
{
|
||||
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
|
||||
int32 typmod = PG_GETARG_INT32(2);
|
||||
SVector *result;
|
||||
int32 dim;
|
||||
int32 nnz;
|
||||
int32 unused;
|
||||
float *values;
|
||||
|
||||
dim = pq_getmsgint(buf, sizeof(int32));
|
||||
nnz = pq_getmsgint(buf, sizeof(int32));
|
||||
unused = pq_getmsgint(buf, sizeof(int32));
|
||||
|
||||
CheckDim(dim);
|
||||
CheckNnz(nnz, dim);
|
||||
CheckExpectedDim(typmod, dim);
|
||||
|
||||
if (unused != 0)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("expected unused to be 0, not %d", unused)));
|
||||
|
||||
result = InitSVector(dim, nnz);
|
||||
values = SVECTOR_VALUES(result);
|
||||
|
||||
for (int i = 0; i < nnz; i++)
|
||||
{
|
||||
result->indices[i] = pq_getmsgint(buf, sizeof(int32));
|
||||
CheckIndex(result->indices, i, dim);
|
||||
}
|
||||
|
||||
for (int i = 0; i < nnz; i++)
|
||||
{
|
||||
values[i] = pq_getmsgfloat4(buf);
|
||||
CheckElement(values[i]);
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert internal representation to the external binary representation
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(svector_send);
|
||||
Datum
|
||||
svector_send(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SVector *svec = PG_GETARG_SVECTOR_P(0);
|
||||
float *values = SVECTOR_VALUES(svec);
|
||||
StringInfoData buf;
|
||||
|
||||
pq_begintypsend(&buf);
|
||||
pq_sendint(&buf, svec->dim, sizeof(int32));
|
||||
pq_sendint(&buf, svec->nnz, sizeof(int32));
|
||||
pq_sendint(&buf, svec->unused, sizeof(int32));
|
||||
for (int i = 0; i < svec->nnz; i++)
|
||||
pq_sendint(&buf, svec->indices[i], sizeof(int32));
|
||||
for (int i = 0; i < svec->nnz; i++)
|
||||
pq_sendfloat4(&buf, values[i]);
|
||||
|
||||
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert sparse vector to sparse vector
|
||||
* This is needed to check the type modifier
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(svector);
|
||||
Datum
|
||||
svector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SVector *svec = PG_GETARG_SVECTOR_P(0);
|
||||
int32 typmod = PG_GETARG_INT32(1);
|
||||
|
||||
CheckExpectedDim(typmod, svec->dim);
|
||||
|
||||
PG_RETURN_POINTER(svec);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert dense vector to sparse vector
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_to_svector);
|
||||
Datum
|
||||
vector_to_svector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
Vector *vec = PG_GETARG_VECTOR_P(0);
|
||||
int32 typmod = PG_GETARG_INT32(1);
|
||||
SVector *result;
|
||||
int dim = vec->dim;
|
||||
int nnz = 0;
|
||||
float *values;
|
||||
int j = 0;
|
||||
|
||||
CheckDim(dim);
|
||||
CheckExpectedDim(typmod, dim);
|
||||
|
||||
for (int i = 0; i < dim; i++)
|
||||
{
|
||||
if (vec->x[i] != 0)
|
||||
nnz++;
|
||||
}
|
||||
|
||||
result = InitSVector(dim, nnz);
|
||||
values = SVECTOR_VALUES(result);
|
||||
for (int i = 0; i < dim; i++)
|
||||
{
|
||||
if (vec->x[i] != 0)
|
||||
{
|
||||
/* Safety check */
|
||||
if (j == nnz)
|
||||
elog(ERROR, "safety check failed");
|
||||
|
||||
result->indices[j] = i;
|
||||
values[j] = vec->x[i];
|
||||
j++;
|
||||
}
|
||||
}
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the L2 squared distance between sparse vectors
|
||||
*/
|
||||
static double
|
||||
l2_distance_squared_internal(SVector * a, SVector * b)
|
||||
{
|
||||
float *ax = SVECTOR_VALUES(a);
|
||||
float *bx = SVECTOR_VALUES(b);
|
||||
double distance = 0.0;
|
||||
int bpos = 0;
|
||||
|
||||
for (int i = 0; i < a->nnz; i++)
|
||||
{
|
||||
int ai = a->indices[i];
|
||||
int bi = -1;
|
||||
|
||||
for (int j = bpos; j < b->nnz; j++)
|
||||
{
|
||||
bi = b->indices[j];
|
||||
|
||||
if (ai == bi)
|
||||
{
|
||||
double diff = ax[i] - bx[j];
|
||||
|
||||
distance += diff * diff;
|
||||
}
|
||||
else if (ai > bi)
|
||||
distance += bx[j] * bx[j];
|
||||
|
||||
/* Update start for next iteration */
|
||||
if (ai >= bi)
|
||||
bpos = j + 1;
|
||||
|
||||
/* Found or passed it */
|
||||
if (bi >= ai)
|
||||
break;
|
||||
}
|
||||
|
||||
if (ai != bi)
|
||||
distance += ax[i] * ax[i];
|
||||
}
|
||||
|
||||
for (int j = bpos; j < b->nnz; j++)
|
||||
distance += bx[j] * bx[j];
|
||||
|
||||
return distance;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the L2 distance between sparse vectors
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(svector_l2_distance);
|
||||
Datum
|
||||
svector_l2_distance(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SVector *a = PG_GETARG_SVECTOR_P(0);
|
||||
SVector *b = PG_GETARG_SVECTOR_P(1);
|
||||
|
||||
CheckDims(a, b);
|
||||
|
||||
PG_RETURN_FLOAT8(sqrt(l2_distance_squared_internal(a, b)));
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the L2 squared distance between sparse vectors
|
||||
* This saves a sqrt calculation
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(svector_l2_squared_distance);
|
||||
Datum
|
||||
svector_l2_squared_distance(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SVector *a = PG_GETARG_SVECTOR_P(0);
|
||||
SVector *b = PG_GETARG_SVECTOR_P(1);
|
||||
|
||||
CheckDims(a, b);
|
||||
|
||||
PG_RETURN_FLOAT8(l2_distance_squared_internal(a, b));
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the inner product of two sparse vectors
|
||||
*/
|
||||
static double
|
||||
inner_product_internal(SVector * a, SVector * b)
|
||||
{
|
||||
float *ax = SVECTOR_VALUES(a);
|
||||
float *bx = SVECTOR_VALUES(b);
|
||||
double distance = 0.0;
|
||||
int bpos = 0;
|
||||
|
||||
for (int i = 0; i < a->nnz; i++)
|
||||
{
|
||||
int ai = a->indices[i];
|
||||
|
||||
for (int j = bpos; j < b->nnz; j++)
|
||||
{
|
||||
int bi = b->indices[j];
|
||||
|
||||
/* Only update when the same index */
|
||||
if (ai == bi)
|
||||
distance += ax[i] * bx[j];
|
||||
|
||||
/* Update start for next iteration */
|
||||
if (ai >= bi)
|
||||
bpos = j + 1;
|
||||
|
||||
/* Found or passed it */
|
||||
if (bi >= ai)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return distance;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the inner product of two sparse vectors
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(svector_inner_product);
|
||||
Datum
|
||||
svector_inner_product(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SVector *a = PG_GETARG_SVECTOR_P(0);
|
||||
SVector *b = PG_GETARG_SVECTOR_P(1);
|
||||
|
||||
CheckDims(a, b);
|
||||
|
||||
PG_RETURN_FLOAT8(inner_product_internal(a, b));
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the negative inner product of two sparse vectors
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(svector_negative_inner_product);
|
||||
Datum
|
||||
svector_negative_inner_product(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SVector *a = PG_GETARG_SVECTOR_P(0);
|
||||
SVector *b = PG_GETARG_SVECTOR_P(1);
|
||||
|
||||
CheckDims(a, b);
|
||||
|
||||
PG_RETURN_FLOAT8(-inner_product_internal(a, b));
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the cosine distance between two sparse vectors
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(svector_cosine_distance);
|
||||
Datum
|
||||
svector_cosine_distance(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SVector *a = PG_GETARG_SVECTOR_P(0);
|
||||
SVector *b = PG_GETARG_SVECTOR_P(1);
|
||||
float *ax = SVECTOR_VALUES(a);
|
||||
float *bx = SVECTOR_VALUES(b);
|
||||
float norma = 0.0;
|
||||
float normb = 0.0;
|
||||
double similarity;
|
||||
|
||||
CheckDims(a, b);
|
||||
|
||||
similarity = inner_product_internal(a, b);
|
||||
|
||||
/* Auto-vectorized */
|
||||
for (int i = 0; i < a->nnz; i++)
|
||||
norma += ax[i] * ax[i];
|
||||
|
||||
/* Auto-vectorized */
|
||||
for (int i = 0; i < b->nnz; i++)
|
||||
normb += bx[i] * bx[i];
|
||||
|
||||
/* Use sqrt(a * b) over sqrt(a) * sqrt(b) */
|
||||
similarity /= sqrt((double) norma * (double) normb);
|
||||
|
||||
#ifdef _MSC_VER
|
||||
/* /fp:fast may not propagate NaN */
|
||||
if (isnan(similarity))
|
||||
PG_RETURN_FLOAT8(NAN);
|
||||
#endif
|
||||
|
||||
/* Keep in range */
|
||||
if (similarity > 1)
|
||||
similarity = 1.0;
|
||||
else if (similarity < -1)
|
||||
similarity = -1.0;
|
||||
|
||||
PG_RETURN_FLOAT8(1.0 - similarity);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the weighted Jaccard distance between two sparse vectors
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(svector_jaccard_distance);
|
||||
Datum
|
||||
svector_jaccard_distance(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SVector *a = PG_GETARG_SVECTOR_P(0);
|
||||
SVector *b = PG_GETARG_SVECTOR_P(1);
|
||||
float *ax = SVECTOR_VALUES(a);
|
||||
float *bx = SVECTOR_VALUES(b);
|
||||
double num = 0.0;
|
||||
double denom = 0.0;
|
||||
int bpos = 0;
|
||||
|
||||
CheckDims(a, b);
|
||||
|
||||
/*
|
||||
* Weighted Jaccard distance is not defined for vectors with negative
|
||||
* values. Could check and return NaN if minimal impact on performance.
|
||||
*/
|
||||
|
||||
for (int i = 0; i < a->nnz; i++)
|
||||
{
|
||||
int ai = a->indices[i];
|
||||
int bi = -1;
|
||||
|
||||
for (int j = bpos; j < b->nnz; j++)
|
||||
{
|
||||
bi = b->indices[j];
|
||||
|
||||
if (ai == bi)
|
||||
{
|
||||
num += ax[i] < bx[j] ? ax[i] : bx[j];
|
||||
denom += ax[i] > bx[j] ? ax[i] : bx[j];
|
||||
}
|
||||
else if (ai > bi)
|
||||
denom += bx[j];
|
||||
|
||||
/* Update start for next iteration */
|
||||
if (ai >= bi)
|
||||
bpos = j + 1;
|
||||
|
||||
/* Found or passed it */
|
||||
if (bi >= ai)
|
||||
break;
|
||||
}
|
||||
|
||||
if (ai != bi)
|
||||
denom += ax[i];
|
||||
}
|
||||
|
||||
for (int j = bpos; j < b->nnz; j++)
|
||||
denom += bx[j];
|
||||
|
||||
if (denom > 0)
|
||||
PG_RETURN_FLOAT8(1.0 - (num / denom));
|
||||
else
|
||||
PG_RETURN_FLOAT8(NAN);
|
||||
}
|
||||
23
src/svector.h
Normal file
23
src/svector.h
Normal file
@@ -0,0 +1,23 @@
|
||||
#ifndef SVECTOR_H
|
||||
#define SVECTOR_H
|
||||
|
||||
#define SVECTOR_MAX_DIM 100000
|
||||
|
||||
#define SVECTOR_SIZE(_nnz) (offsetof(SVector, indices) + (_nnz) * sizeof(int32) + (_nnz * sizeof(float)))
|
||||
#define SVECTOR_VALUES(x) ((float *) (((char *) (x)) + offsetof(SVector, indices) + (x)->nnz * sizeof(int32)))
|
||||
#define DatumGetSVector(x) ((SVector *) PG_DETOAST_DATUM(x))
|
||||
#define PG_GETARG_SVECTOR_P(x) DatumGetSVector(PG_GETARG_DATUM(x))
|
||||
#define PG_RETURN_SVECTOR_P(x) PG_RETURN_POINTER(x)
|
||||
|
||||
typedef struct SVector
|
||||
{
|
||||
int32 vl_len_; /* varlena header (do not touch directly!) */
|
||||
int32 dim; /* number of dimensions */
|
||||
int32 nnz;
|
||||
int32 unused;
|
||||
int32 indices[FLEXIBLE_ARRAY_MEMBER];
|
||||
} SVector;
|
||||
|
||||
SVector *InitSVector(int dim, int nnz);
|
||||
|
||||
#endif
|
||||
24
src/vector.c
24
src/vector.c
@@ -9,6 +9,7 @@
|
||||
#include "lib/stringinfo.h"
|
||||
#include "libpq/pqformat.h"
|
||||
#include "port.h" /* for strtof() */
|
||||
#include "svector.h"
|
||||
#include "utils/array.h"
|
||||
#include "utils/builtins.h"
|
||||
#include "utils/lsyscache.h"
|
||||
@@ -1151,3 +1152,26 @@ vector_avg(PG_FUNCTION_ARGS)
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert sparse vector to dense vector
|
||||
*/
|
||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(svector_to_vector);
|
||||
Datum
|
||||
svector_to_vector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
SVector *svec = PG_GETARG_SVECTOR_P(0);
|
||||
int32 typmod = PG_GETARG_INT32(1);
|
||||
Vector *result;
|
||||
int dim = svec->dim;
|
||||
float *values = SVECTOR_VALUES(svec);
|
||||
|
||||
CheckDim(dim);
|
||||
CheckExpectedDim(typmod, dim);
|
||||
|
||||
result = InitVector(dim);
|
||||
for (int i = 0; i < svec->nnz; i++)
|
||||
result->x[svec->indices[i]] = values[i];
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
@@ -54,85 +54,85 @@ SELECT vector_norm('[3e37,4e37]')::real;
|
||||
5e+37
|
||||
(1 row)
|
||||
|
||||
SELECT l2_distance('[0,0]', '[3,4]');
|
||||
SELECT l2_distance('[0,0]'::vector, '[3,4]');
|
||||
l2_distance
|
||||
-------------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
SELECT l2_distance('[0,0]', '[0,1]');
|
||||
SELECT l2_distance('[0,0]'::vector, '[0,1]');
|
||||
l2_distance
|
||||
-------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT l2_distance('[1,2]', '[3]');
|
||||
SELECT l2_distance('[1,2]'::vector, '[3]');
|
||||
ERROR: different vector dimensions 2 and 1
|
||||
SELECT l2_distance('[3e38]', '[-3e38]');
|
||||
SELECT l2_distance('[3e38]'::vector, '[-3e38]');
|
||||
l2_distance
|
||||
-------------
|
||||
Infinity
|
||||
(1 row)
|
||||
|
||||
SELECT inner_product('[1,2]', '[3,4]');
|
||||
SELECT inner_product('[1,2]'::vector, '[3,4]');
|
||||
inner_product
|
||||
---------------
|
||||
11
|
||||
(1 row)
|
||||
|
||||
SELECT inner_product('[1,2]', '[3]');
|
||||
SELECT inner_product('[1,2]'::vector, '[3]');
|
||||
ERROR: different vector dimensions 2 and 1
|
||||
SELECT inner_product('[3e38]', '[3e38]');
|
||||
SELECT inner_product('[3e38]'::vector, '[3e38]');
|
||||
inner_product
|
||||
---------------
|
||||
Infinity
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2]', '[2,4]');
|
||||
SELECT cosine_distance('[1,2]'::vector, '[2,4]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2]', '[0,0]');
|
||||
SELECT cosine_distance('[1,2]'::vector, '[0,0]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
NaN
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,1]', '[1,1]');
|
||||
SELECT cosine_distance('[1,1]'::vector, '[1,1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,0]', '[0,2]');
|
||||
SELECT cosine_distance('[1,0]'::vector, '[0,2]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,1]', '[-1,-1]');
|
||||
SELECT cosine_distance('[1,1]'::vector, '[-1,-1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2]', '[3]');
|
||||
SELECT cosine_distance('[1,2]'::vector, '[3]');
|
||||
ERROR: different vector dimensions 2 and 1
|
||||
SELECT cosine_distance('[1,1]', '[1.1,1.1]');
|
||||
SELECT cosine_distance('[1,1]'::vector, '[1.1,1.1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,1]', '[-1.1,-1.1]');
|
||||
SELECT cosine_distance('[1,1]'::vector, '[-1.1,-1.1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[3e38]', '[3e38]');
|
||||
SELECT cosine_distance('[3e38]'::vector, '[3e38]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
NaN
|
||||
|
||||
134
test/expected/svector.out
Normal file
134
test/expected/svector.out
Normal file
@@ -0,0 +1,134 @@
|
||||
SELECT '(0,1.5),(2,3.5)|5|'::svector;
|
||||
svector
|
||||
--------------------
|
||||
(0,1.5),(2,3.5)|5|
|
||||
(1 row)
|
||||
|
||||
SELECT '(0,1.5),(2,3.5)|5|'::svector::vector;
|
||||
vector
|
||||
-----------------
|
||||
[1.5,0,3.5,0,0]
|
||||
(1 row)
|
||||
|
||||
SELECT '(0,1.5),(2,3.5)|5|'::svector::vector(5);
|
||||
vector
|
||||
-----------------
|
||||
[1.5,0,3.5,0,0]
|
||||
(1 row)
|
||||
|
||||
SELECT '(0,1.5),(2,3.5)|5|'::svector::vector(4);
|
||||
ERROR: expected 4 dimensions, not 5
|
||||
SELECT '[0,1.5,0,3.5,0]'::vector::svector;
|
||||
svector
|
||||
--------------------
|
||||
(1,1.5),(3,3.5)|5|
|
||||
(1 row)
|
||||
|
||||
SELECT '|5|'::svector;
|
||||
svector
|
||||
---------
|
||||
|5|
|
||||
(1 row)
|
||||
|
||||
SELECT '|-1|'::svector;
|
||||
ERROR: svector must have at least 1 dimension
|
||||
LINE 1: SELECT '|-1|'::svector;
|
||||
^
|
||||
SELECT '|100001|'::svector;
|
||||
ERROR: svector cannot have more than 100000 dimensions
|
||||
LINE 1: SELECT '|100001|'::svector;
|
||||
^
|
||||
SELECT '|16001|'::svector::vector;
|
||||
ERROR: vector cannot have more than 16000 dimensions
|
||||
SELECT '(-1,1)|1|'::svector;
|
||||
ERROR: index must not be negative
|
||||
LINE 1: SELECT '(-1,1)|1|'::svector;
|
||||
^
|
||||
SELECT '(1,1)|1|'::svector;
|
||||
ERROR: index must be less than dimensions
|
||||
LINE 1: SELECT '(1,1)|1|'::svector;
|
||||
^
|
||||
SELECT '|1|'::svector(2);
|
||||
ERROR: expected 2 dimensions, not 1
|
||||
SELECT l2_distance('|2|'::svector, '(0,3),(1,4)|2|');
|
||||
l2_distance
|
||||
-------------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
SELECT l2_distance('|2|'::svector, '(1,1)|2|');
|
||||
l2_distance
|
||||
-------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT '|2|'::svector <-> '(0,3),(1,4)|2|';
|
||||
?column?
|
||||
----------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
SELECT inner_product('(0,1),(1,2)|2|'::svector, '(0,2),(1,4)|2|');
|
||||
inner_product
|
||||
---------------
|
||||
10
|
||||
(1 row)
|
||||
|
||||
SELECT svector_negative_inner_product('(0,1),(1,2)|2|', '(0,2),(1,4)|2|');
|
||||
svector_negative_inner_product
|
||||
--------------------------------
|
||||
-10
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('(0,1),(1,2)|2|'::svector, '(0,2),(1,4)|2|');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('(0,1),(1,2)|2|'::svector, '|2|');
|
||||
cosine_distance
|
||||
-----------------
|
||||
NaN
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('(0,1),(1,1)|2|'::svector, '(0,-1),(1,-1)|2|');
|
||||
cosine_distance
|
||||
-----------------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('(0,1)|2|'::svector, '(1,2)|2|');
|
||||
cosine_distance
|
||||
-----------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('|1|'::svector, '|1|');
|
||||
cosine_distance
|
||||
-----------------
|
||||
NaN
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('(0,1)|2|'::svector, '(0,1)|3|');
|
||||
ERROR: different svector dimensions 2 and 3
|
||||
SELECT jaccard_distance('(0,1)|2|', '(0,1)|2|');
|
||||
jaccard_distance
|
||||
------------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT jaccard_distance('(0,1)|2|', '(1,1)|2|');
|
||||
jaccard_distance
|
||||
------------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT jaccard_distance('|1|', '|1|');
|
||||
jaccard_distance
|
||||
------------------
|
||||
NaN
|
||||
(1 row)
|
||||
|
||||
SELECT jaccard_distance('(0,1)|2|', '(0,1)|3|');
|
||||
ERROR: different svector dimensions 2 and 3
|
||||
@@ -13,24 +13,24 @@ SELECT vector_norm('[3,4]');
|
||||
SELECT vector_norm('[0,1]');
|
||||
SELECT vector_norm('[3e37,4e37]')::real;
|
||||
|
||||
SELECT l2_distance('[0,0]', '[3,4]');
|
||||
SELECT l2_distance('[0,0]', '[0,1]');
|
||||
SELECT l2_distance('[1,2]', '[3]');
|
||||
SELECT l2_distance('[3e38]', '[-3e38]');
|
||||
SELECT l2_distance('[0,0]'::vector, '[3,4]');
|
||||
SELECT l2_distance('[0,0]'::vector, '[0,1]');
|
||||
SELECT l2_distance('[1,2]'::vector, '[3]');
|
||||
SELECT l2_distance('[3e38]'::vector, '[-3e38]');
|
||||
|
||||
SELECT inner_product('[1,2]', '[3,4]');
|
||||
SELECT inner_product('[1,2]', '[3]');
|
||||
SELECT inner_product('[3e38]', '[3e38]');
|
||||
SELECT inner_product('[1,2]'::vector, '[3,4]');
|
||||
SELECT inner_product('[1,2]'::vector, '[3]');
|
||||
SELECT inner_product('[3e38]'::vector, '[3e38]');
|
||||
|
||||
SELECT cosine_distance('[1,2]', '[2,4]');
|
||||
SELECT cosine_distance('[1,2]', '[0,0]');
|
||||
SELECT cosine_distance('[1,1]', '[1,1]');
|
||||
SELECT cosine_distance('[1,0]', '[0,2]');
|
||||
SELECT cosine_distance('[1,1]', '[-1,-1]');
|
||||
SELECT cosine_distance('[1,2]', '[3]');
|
||||
SELECT cosine_distance('[1,1]', '[1.1,1.1]');
|
||||
SELECT cosine_distance('[1,1]', '[-1.1,-1.1]');
|
||||
SELECT cosine_distance('[3e38]', '[3e38]');
|
||||
SELECT cosine_distance('[1,2]'::vector, '[2,4]');
|
||||
SELECT cosine_distance('[1,2]'::vector, '[0,0]');
|
||||
SELECT cosine_distance('[1,1]'::vector, '[1,1]');
|
||||
SELECT cosine_distance('[1,0]'::vector, '[0,2]');
|
||||
SELECT cosine_distance('[1,1]'::vector, '[-1,-1]');
|
||||
SELECT cosine_distance('[1,2]'::vector, '[3]');
|
||||
SELECT cosine_distance('[1,1]'::vector, '[1.1,1.1]');
|
||||
SELECT cosine_distance('[1,1]'::vector, '[-1.1,-1.1]');
|
||||
SELECT cosine_distance('[3e38]'::vector, '[3e38]');
|
||||
|
||||
SELECT l1_distance('[0,0]', '[3,4]');
|
||||
SELECT l1_distance('[0,0]', '[0,1]');
|
||||
|
||||
34
test/sql/svector.sql
Normal file
34
test/sql/svector.sql
Normal file
@@ -0,0 +1,34 @@
|
||||
SELECT '(0,1.5),(2,3.5)|5|'::svector;
|
||||
SELECT '(0,1.5),(2,3.5)|5|'::svector::vector;
|
||||
SELECT '(0,1.5),(2,3.5)|5|'::svector::vector(5);
|
||||
SELECT '(0,1.5),(2,3.5)|5|'::svector::vector(4);
|
||||
SELECT '[0,1.5,0,3.5,0]'::vector::svector;
|
||||
|
||||
SELECT '|5|'::svector;
|
||||
SELECT '|-1|'::svector;
|
||||
SELECT '|100001|'::svector;
|
||||
SELECT '|16001|'::svector::vector;
|
||||
|
||||
SELECT '(-1,1)|1|'::svector;
|
||||
SELECT '(1,1)|1|'::svector;
|
||||
|
||||
SELECT '|1|'::svector(2);
|
||||
|
||||
SELECT l2_distance('|2|'::svector, '(0,3),(1,4)|2|');
|
||||
SELECT l2_distance('|2|'::svector, '(1,1)|2|');
|
||||
SELECT '|2|'::svector <-> '(0,3),(1,4)|2|';
|
||||
|
||||
SELECT inner_product('(0,1),(1,2)|2|'::svector, '(0,2),(1,4)|2|');
|
||||
SELECT svector_negative_inner_product('(0,1),(1,2)|2|', '(0,2),(1,4)|2|');
|
||||
|
||||
SELECT cosine_distance('(0,1),(1,2)|2|'::svector, '(0,2),(1,4)|2|');
|
||||
SELECT cosine_distance('(0,1),(1,2)|2|'::svector, '|2|');
|
||||
SELECT cosine_distance('(0,1),(1,1)|2|'::svector, '(0,-1),(1,-1)|2|');
|
||||
SELECT cosine_distance('(0,1)|2|'::svector, '(1,2)|2|');
|
||||
SELECT cosine_distance('|1|'::svector, '|1|');
|
||||
SELECT cosine_distance('(0,1)|2|'::svector, '(0,1)|3|');
|
||||
|
||||
SELECT jaccard_distance('(0,1)|2|', '(0,1)|2|');
|
||||
SELECT jaccard_distance('(0,1)|2|', '(1,1)|2|');
|
||||
SELECT jaccard_distance('|1|', '|1|');
|
||||
SELECT jaccard_distance('(0,1)|2|', '(0,1)|3|');
|
||||
Reference in New Issue
Block a user