From dde3a2aacdb1c2d161008f0ce1d41e9e1cb8d9c0 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 3 Apr 2024 20:55:03 -0700 Subject: [PATCH] Removed dimensions from sparsevec --- sql/vector--0.6.2--0.7.0.sql | 4 - sql/vector.sql | 4 - src/hnswbuild.c | 19 ++- src/hnswutils.c | 2 +- src/sparsevec.c | 168 ++++-------------------- src/sparsevec.h | 6 +- src/vector.c | 12 +- test/expected/hnsw_sparsevec_cosine.out | 20 +-- test/expected/hnsw_sparsevec_ip.out | 20 +-- test/expected/hnsw_sparsevec_l2.out | 24 ++-- test/expected/sparsevec_functions.out | 22 ++-- test/expected/sparsevec_input.out | 78 +++++------ test/sql/hnsw_sparsevec_cosine.sql | 10 +- test/sql/hnsw_sparsevec_ip.sql | 8 +- test/sql/hnsw_sparsevec_l2.sql | 12 +- test/sql/sparsevec_functions.sql | 21 ++- test/sql/sparsevec_input.sql | 25 ++-- 17 files changed, 166 insertions(+), 289 deletions(-) diff --git a/sql/vector--0.6.2--0.7.0.sql b/sql/vector--0.6.2--0.7.0.sql index 3fdc8f1..1b48a9b 100644 --- a/sql/vector--0.6.2--0.7.0.sql +++ b/sql/vector--0.6.2--0.7.0.sql @@ -173,9 +173,6 @@ CREATE FUNCTION sparsevec_in(cstring, oid, integer) RETURNS sparsevec CREATE FUNCTION sparsevec_out(sparsevec) RETURNS cstring AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; -CREATE FUNCTION sparsevec_typmod_in(cstring[]) RETURNS integer - AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; - CREATE FUNCTION sparsevec_recv(internal, oid, integer) RETURNS sparsevec AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; @@ -185,7 +182,6 @@ CREATE FUNCTION sparsevec_send(sparsevec) RETURNS bytea CREATE TYPE sparsevec ( INPUT = sparsevec_in, OUTPUT = sparsevec_out, - TYPMOD_IN = sparsevec_typmod_in, RECEIVE = sparsevec_recv, SEND = sparsevec_send, STORAGE = external diff --git a/sql/vector.sql b/sql/vector.sql index c694b1c..bf668fd 100644 --- a/sql/vector.sql +++ b/sql/vector.sql @@ -480,9 +480,6 @@ CREATE FUNCTION sparsevec_in(cstring, oid, integer) RETURNS sparsevec CREATE FUNCTION sparsevec_out(sparsevec) RETURNS cstring AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; -CREATE FUNCTION sparsevec_typmod_in(cstring[]) RETURNS integer - AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; - CREATE FUNCTION sparsevec_recv(internal, oid, integer) RETURNS sparsevec AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; @@ -492,7 +489,6 @@ CREATE FUNCTION sparsevec_send(sparsevec) RETURNS bytea CREATE TYPE sparsevec ( INPUT = sparsevec_in, OUTPUT = sparsevec_out, - TYPMOD_IN = sparsevec_typmod_in, RECEIVE = sparsevec_recv, SEND = sparsevec_send, STORAGE = external diff --git a/src/hnswbuild.c b/src/hnswbuild.c index 2300127..2965f5d 100644 --- a/src/hnswbuild.c +++ b/src/hnswbuild.c @@ -681,8 +681,6 @@ GetMaxDimensions(HnswType type) maxDimensions *= 2; else if (type == HNSW_TYPE_BIT) maxDimensions *= 32; - else if (type == HNSW_TYPE_SPARSEVEC) - maxDimensions = INT_MAX; return maxDimensions; } @@ -693,8 +691,6 @@ GetMaxDimensions(HnswType type) static void InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, IndexInfo *indexInfo, ForkNumber forkNum) { - int maxDimensions; - buildstate->heap = heap; buildstate->index = index; buildstate->indexInfo = indexInfo; @@ -705,14 +701,17 @@ InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, Index buildstate->efConstruction = HnswGetEfConstruction(index); buildstate->dimensions = TupleDescAttr(index->rd_att, 0)->atttypmod; - maxDimensions = GetMaxDimensions(buildstate->type); - /* Require column to have dimensions to be indexed */ - if (buildstate->dimensions < 0) - elog(ERROR, "column does not have dimensions"); + if (buildstate->type != HNSW_TYPE_SPARSEVEC) + { + int maxDimensions = GetMaxDimensions(buildstate->type); - if (buildstate->dimensions > maxDimensions) - elog(ERROR, "column cannot have more than %d dimensions for hnsw index", maxDimensions); + if (buildstate->dimensions < 0) + elog(ERROR, "column does not have dimensions"); + + if (buildstate->dimensions > maxDimensions) + elog(ERROR, "column cannot have more than %d dimensions for hnsw index", maxDimensions); + } if (buildstate->efConstruction < 2 * buildstate->m) elog(ERROR, "ef_construction must be greater than or equal to 2 * m"); diff --git a/src/hnswutils.c b/src/hnswutils.c index e082808..960ebca 100644 --- a/src/hnswutils.c +++ b/src/hnswutils.c @@ -229,7 +229,7 @@ HnswNormValue(FmgrInfo *procinfo, Oid collation, Datum *value, HnswType type) else if (type == HNSW_TYPE_SPARSEVEC) { SparseVector *v = DatumGetSparseVector(*value); - SparseVector *result = InitSparseVector(v->dim, v->nnz); + SparseVector *result = InitSparseVector(v->nnz); float *vx = SPARSEVEC_VALUES(v); float *rx = SPARSEVEC_VALUES(result); diff --git a/src/sparsevec.c b/src/sparsevec.c index 22649cd..db858be 100644 --- a/src/sparsevec.c +++ b/src/sparsevec.c @@ -18,69 +18,28 @@ #include "utils/builtins.h" #endif -/* - * Ensure same dimensions - */ -static inline void -CheckDims(SparseVector * a, SparseVector * b) -{ - if (a->dim != b->dim) - ereport(ERROR, - (errcode(ERRCODE_DATA_EXCEPTION), - errmsg("different sparsevec dimensions %d and %d", a->dim, b->dim))); -} - -/* - * Ensure expected dimensions - */ -static inline void -CheckExpectedDim(int32 typmod, int dim) -{ - if (typmod != -1 && typmod != dim) - ereport(ERROR, - (errcode(ERRCODE_DATA_EXCEPTION), - errmsg("expected %d dimensions, not %d", typmod, dim))); -} - -/* - * Ensure valid dimensions - */ -static inline void -CheckDim(int dim) -{ - if (dim < 1) - ereport(ERROR, - (errcode(ERRCODE_DATA_EXCEPTION), - errmsg("sparsevec must have at least 1 dimension"))); - - if (dim > SPARSEVEC_MAX_DIM) - ereport(ERROR, - (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("sparsevec cannot have more than %d dimensions", SPARSEVEC_MAX_DIM))); -} - /* * Ensure valid nnz */ static inline void -CheckNnz(int nnz, int dim) +CheckNnz(int nnz) { if (nnz < 0) ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("sparsevec must have at least one element"))); - if (nnz > dim) + if (nnz > SPARSEVEC_MAX_NNZ) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), - errmsg("sparsevec cannot have more elements than dimensions"))); + errmsg("sparsevec cannot have more elements than non-zero elements"))); } /* * Ensure valid index */ static inline void -CheckIndex(int32 *indices, int i, int dim) +CheckIndex(int32 *indices, int i) { int32 index = indices[i]; @@ -89,11 +48,6 @@ CheckIndex(int32 *indices, int i, int dim) (errcode(ERRCODE_DATA_EXCEPTION), errmsg("index must not be negative"))); - if (index >= dim) - ereport(ERROR, - (errcode(ERRCODE_DATA_EXCEPTION), - errmsg("index must be less than dimensions"))); - if (i > 0) { if (index < indices[i - 1]) @@ -129,7 +83,7 @@ CheckElement(float value) * Allocate and initialize a new sparse vector */ SparseVector * -InitSparseVector(int dim, int nnz) +InitSparseVector(int nnz) { SparseVector *result; int size; @@ -137,7 +91,6 @@ InitSparseVector(int dim, int nnz) size = SPARSEVEC_SIZE(nnz); result = (SparseVector *) palloc0(size); SET_VARSIZE(result, size); - result->dim = dim; result->nnz = nnz; return result; @@ -167,8 +120,6 @@ Datum sparsevec_in(PG_FUNCTION_ARGS) { char *lit = PG_GETARG_CSTRING(0); - int32 typmod = PG_GETARG_INT32(2); - int dim; char *pt; char *stringEnd; SparseVector *result; @@ -190,6 +141,11 @@ sparsevec_in(PG_FUNCTION_ARGS) pt++; } + if (maxNnz > SPARSEVEC_MAX_NNZ) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("sparsevec cannot have more than %d non-zero elements", SPARSEVEC_MAX_NNZ))); + indices = palloc(maxNnz * sizeof(int32)); values = palloc(maxNnz * sizeof(float)); @@ -297,24 +253,6 @@ sparsevec_in(PG_FUNCTION_ARGS) stringEnd++; - if (*stringEnd != '/') - ereport(ERROR, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("malformed sparsevec literal: \"%s\"", lit), - errdetail("Unexpected end of input."))); - - stringEnd++; - - /* Use similar logic as int2vectorin */ - errno = 0; - pt = stringEnd; - dim = strtol(pt, &stringEnd, 10); - - if (stringEnd == pt) - ereport(ERROR, - (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), - errmsg("invalid input syntax for type sparsevec: \"%s\"", lit))); - /* Only whitespace is allowed after the closing brace */ while (sparsevec_isspace(*stringEnd)) stringEnd++; @@ -323,21 +261,18 @@ sparsevec_in(PG_FUNCTION_ARGS) ereport(ERROR, (errcode(ERRCODE_INVALID_TEXT_REPRESENTATION), errmsg("malformed sparsevec literal: \"%s\"", lit), - errdetail("Junk after closing."))); + errdetail("Junk after closing right brace."))); pfree(litcopy); - CheckDim(dim); - CheckExpectedDim(typmod, dim); - - result = InitSparseVector(dim, nnz); + result = InitSparseVector(nnz); rvalues = SPARSEVEC_VALUES(result); for (int i = 0; i < nnz; i++) { result->indices[i] = indices[i]; rvalues[i] = values[i]; - CheckIndex(result->indices, i, dim); + CheckIndex(result->indices, i); CheckElement(rvalues[i]); } @@ -382,11 +317,9 @@ sparsevec_out(PG_FUNCTION_ARGS) * * nnz - 1 bytes for , * - * 10 bytes for dimensions - * - * 4 bytes for {, }, /, and \0 + * 3 bytes for {, }, and \0 */ - buf = (char *) palloc((11 + FLOAT_SHORTEST_DECIMAL_LEN) * sparsevec->nnz + 13); + buf = (char *) palloc((11 + FLOAT_SHORTEST_DECIMAL_LEN) * sparsevec->nnz + 2); ptr = buf; AppendChar(ptr, '{'); @@ -402,45 +335,12 @@ sparsevec_out(PG_FUNCTION_ARGS) } AppendChar(ptr, '}'); - AppendChar(ptr, '/'); - AppendInt(ptr, sparsevec->dim); *ptr = '\0'; PG_FREE_IF_COPY(sparsevec, 0); PG_RETURN_CSTRING(buf); } -/* - * Convert type modifier - */ -PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec_typmod_in); -Datum -sparsevec_typmod_in(PG_FUNCTION_ARGS) -{ - ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0); - int32 *tl; - int n; - - tl = ArrayGetIntegerTypmods(ta, &n); - - if (n != 1) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("invalid type modifier"))); - - if (*tl < 1) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("dimensions for type sparsevec must be at least 1"))); - - if (*tl > SPARSEVEC_MAX_DIM) - ereport(ERROR, - (errcode(ERRCODE_INVALID_PARAMETER_VALUE), - errmsg("dimensions for type sparsevec cannot exceed %d", SPARSEVEC_MAX_DIM))); - - PG_RETURN_INT32(*tl); -} - /* * Convert external binary representation to internal representation */ @@ -449,33 +349,30 @@ Datum sparsevec_recv(PG_FUNCTION_ARGS) { StringInfo buf = (StringInfo) PG_GETARG_POINTER(0); - int32 typmod = PG_GETARG_INT32(2); SparseVector *result; - int32 dim; int32 nnz; int32 unused; + int32 unused2; float *values; - dim = pq_getmsgint(buf, sizeof(int32)); nnz = pq_getmsgint(buf, sizeof(int32)); unused = pq_getmsgint(buf, sizeof(int32)); + unused2 = pq_getmsgint(buf, sizeof(int32)); - CheckDim(dim); - CheckNnz(nnz, dim); - CheckExpectedDim(typmod, dim); + CheckNnz(nnz); - if (unused != 0) + if (unused != 0 || unused2 != 0) ereport(ERROR, (errcode(ERRCODE_DATA_EXCEPTION), errmsg("expected unused to be 0, not %d", unused))); - result = InitSparseVector(dim, nnz); + result = InitSparseVector(nnz); values = SPARSEVEC_VALUES(result); for (int i = 0; i < nnz; i++) { result->indices[i] = pq_getmsgint(buf, sizeof(int32)); - CheckIndex(result->indices, i, dim); + CheckIndex(result->indices, i); } for (int i = 0; i < nnz; i++) @@ -499,9 +396,9 @@ sparsevec_send(PG_FUNCTION_ARGS) StringInfoData buf; pq_begintypsend(&buf); - pq_sendint(&buf, svec->dim, sizeof(int32)); pq_sendint(&buf, svec->nnz, sizeof(int32)); pq_sendint(&buf, svec->unused, sizeof(int32)); + pq_sendint(&buf, svec->unused2, sizeof(int32)); for (int i = 0; i < svec->nnz; i++) pq_sendint(&buf, svec->indices[i], sizeof(int32)); for (int i = 0; i < svec->nnz; i++) @@ -512,16 +409,12 @@ sparsevec_send(PG_FUNCTION_ARGS) /* * Convert sparse vector to sparse vector - * This is needed to check the type modifier */ PGDLLEXPORT PG_FUNCTION_INFO_V1(sparsevec); Datum sparsevec(PG_FUNCTION_ARGS) { SparseVector *svec = PG_GETARG_SPARSEVEC_P(0); - int32 typmod = PG_GETARG_INT32(1); - - CheckExpectedDim(typmod, svec->dim); PG_RETURN_POINTER(svec); } @@ -534,23 +427,20 @@ Datum vector_to_sparsevec(PG_FUNCTION_ARGS) { Vector *vec = PG_GETARG_VECTOR_P(0); - int32 typmod = PG_GETARG_INT32(1); SparseVector *result; int dim = vec->dim; int nnz = 0; float *values; int j = 0; - CheckDim(dim); - CheckExpectedDim(typmod, dim); - for (int i = 0; i < dim; i++) { if (vec->x[i] != 0) nnz++; } - result = InitSparseVector(dim, nnz); + CheckNnz(nnz); + result = InitSparseVector(nnz); values = SPARSEVEC_VALUES(result); for (int i = 0; i < dim; i++) { @@ -627,8 +517,6 @@ sparsevec_l2_distance(PG_FUNCTION_ARGS) SparseVector *a = PG_GETARG_SPARSEVEC_P(0); SparseVector *b = PG_GETARG_SPARSEVEC_P(1); - CheckDims(a, b); - PG_RETURN_FLOAT8(sqrt(l2_distance_squared_internal(a, b))); } @@ -643,8 +531,6 @@ sparsevec_l2_squared_distance(PG_FUNCTION_ARGS) SparseVector *a = PG_GETARG_SPARSEVEC_P(0); SparseVector *b = PG_GETARG_SPARSEVEC_P(1); - CheckDims(a, b); - PG_RETURN_FLOAT8(l2_distance_squared_internal(a, b)); } @@ -694,8 +580,6 @@ sparsevec_inner_product(PG_FUNCTION_ARGS) SparseVector *a = PG_GETARG_SPARSEVEC_P(0); SparseVector *b = PG_GETARG_SPARSEVEC_P(1); - CheckDims(a, b); - PG_RETURN_FLOAT8(inner_product_internal(a, b)); } @@ -709,8 +593,6 @@ sparsevec_negative_inner_product(PG_FUNCTION_ARGS) SparseVector *a = PG_GETARG_SPARSEVEC_P(0); SparseVector *b = PG_GETARG_SPARSEVEC_P(1); - CheckDims(a, b); - PG_RETURN_FLOAT8(-inner_product_internal(a, b)); } @@ -729,8 +611,6 @@ sparsevec_cosine_distance(PG_FUNCTION_ARGS) float normb = 0.0; double similarity; - CheckDims(a, b); - similarity = inner_product_internal(a, b); /* Auto-vectorized */ diff --git a/src/sparsevec.h b/src/sparsevec.h index 673c5b0..bcbe216 100644 --- a/src/sparsevec.h +++ b/src/sparsevec.h @@ -1,7 +1,7 @@ #ifndef SPARSEVEC_H #define SPARSEVEC_H -#define SPARSEVEC_MAX_DIM 100000 +#define SPARSEVEC_MAX_NNZ 100000 /* Ensure values are aligned */ #define SPARSEVEC_SIZE(_nnz) (offsetof(SparseVector, indices) + MAXALIGN((_nnz) * sizeof(int32)) + (_nnz * sizeof(float))) @@ -13,12 +13,12 @@ typedef struct SparseVector { int32 vl_len_; /* varlena header (do not touch directly!) */ - int32 dim; /* number of dimensions */ int32 nnz; int32 unused; + int32 unused2; int32 indices[FLEXIBLE_ARRAY_MEMBER]; } SparseVector; -SparseVector *InitSparseVector(int dim, int nnz); +SparseVector *InitSparseVector(int nnz); #endif diff --git a/src/vector.c b/src/vector.c index 1f5ec89..3098f74 100644 --- a/src/vector.c +++ b/src/vector.c @@ -1236,11 +1236,19 @@ sparsevec_to_vector(PG_FUNCTION_ARGS) SparseVector *svec = PG_GETARG_SPARSEVEC_P(0); int32 typmod = PG_GETARG_INT32(1); Vector *result; - int dim = svec->dim; + int dim; float *values = SPARSEVEC_VALUES(svec); + int maxIndex = svec->nnz == 0 ? -1 : svec->indices[svec->nnz - 1]; + + if (typmod == -1) + dim = maxIndex + 1; + else + dim = typmod; CheckDim(dim); - CheckExpectedDim(typmod, dim); + + if (dim < maxIndex + 1) + elog(ERROR, "Vector must have at least %d dimensions", maxIndex + 1); result = InitVector(dim); for (int i = 0; i < svec->nnz; i++) diff --git a/test/expected/hnsw_sparsevec_cosine.out b/test/expected/hnsw_sparsevec_cosine.out index 778415e..284abdc 100644 --- a/test/expected/hnsw_sparsevec_cosine.out +++ b/test/expected/hnsw_sparsevec_cosine.out @@ -1,17 +1,17 @@ SET enable_seqscan = off; -CREATE TABLE t (val sparsevec(3)); -INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL); +CREATE TABLE t (val sparsevec); +INSERT INTO t (val) VALUES ('{}'), ('{0:1,1:2,2:3}'), ('{0:1,1:1,2:1}'), (NULL); CREATE INDEX ON t USING hnsw (val sparsevec_cosine_ops); -INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3'); -SELECT * FROM t ORDER BY val <=> '{0:3,1:3,2:3}/3'; - val ------------------ - {0:1,1:1,2:1}/3 - {0:1,1:2,2:3}/3 - {0:1,1:2,2:4}/3 +INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}'); +SELECT * FROM t ORDER BY val <=> '{0:3,1:3,2:3}'; + val +--------------- + {0:1,1:1,2:1} + {0:1,1:2,2:3} + {0:1,1:2,2:4} (3 rows) -SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '{}/3') t2; +SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '{}') t2; count ------- 3 diff --git a/test/expected/hnsw_sparsevec_ip.out b/test/expected/hnsw_sparsevec_ip.out index 1c303f0..e8f2e71 100644 --- a/test/expected/hnsw_sparsevec_ip.out +++ b/test/expected/hnsw_sparsevec_ip.out @@ -1,15 +1,15 @@ SET enable_seqscan = off; -CREATE TABLE t (val sparsevec(3)); -INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL); +CREATE TABLE t (val sparsevec); +INSERT INTO t (val) VALUES ('{}'), ('{0:1,1:2,2:3}'), ('{0:1,1:1,2:1}'), (NULL); CREATE INDEX ON t USING hnsw (val sparsevec_ip_ops); -INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3'); -SELECT * FROM t ORDER BY val <#> '{0:3,1:3,2:3}/3'; - val ------------------ - {0:1,1:2,2:4}/3 - {0:1,1:2,2:3}/3 - {0:1,1:1,2:1}/3 - {}/3 +INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}'); +SELECT * FROM t ORDER BY val <#> '{0:3,1:3,2:3}'; + val +--------------- + {0:1,1:2,2:4} + {0:1,1:2,2:3} + {0:1,1:1,2:1} + {} (4 rows) SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::sparsevec)) t2; diff --git a/test/expected/hnsw_sparsevec_l2.out b/test/expected/hnsw_sparsevec_l2.out index adc5cfd..0828096 100644 --- a/test/expected/hnsw_sparsevec_l2.out +++ b/test/expected/hnsw_sparsevec_l2.out @@ -1,15 +1,15 @@ SET enable_seqscan = off; -CREATE TABLE t (val sparsevec(3)); -INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL); +CREATE TABLE t (val sparsevec); +INSERT INTO t (val) VALUES ('{}'), ('{0:1,1:2,2:3}'), ('{0:1,1:1,2:1}'), (NULL); CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops); -INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3'); -SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3'; - val ------------------ - {0:1,1:2,2:3}/3 - {0:1,1:2,2:4}/3 - {0:1,1:1,2:1}/3 - {}/3 +INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}'); +SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}'; + val +--------------- + {0:1,1:2,2:3} + {0:1,1:2,2:4} + {0:1,1:1,2:1} + {} (4 rows) SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::sparsevec)) t2; @@ -25,14 +25,14 @@ SELECT COUNT(*) FROM t; (1 row) TRUNCATE t; -SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3'; +SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}'; val ----- (0 rows) DROP TABLE t; -- TODO move -CREATE TABLE t (val sparsevec(1001)); +CREATE TABLE t (val sparsevec); INSERT INTO t (val) VALUES (array_fill(1, ARRAY[1001])::vector::sparsevec); CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops); ERROR: sparsevec cannot have more than 1000 non-zero elements for hnsw index diff --git a/test/expected/sparsevec_functions.out b/test/expected/sparsevec_functions.out index 07117d8..596ca75 100644 --- a/test/expected/sparsevec_functions.out +++ b/test/expected/sparsevec_functions.out @@ -1,62 +1,60 @@ -SELECT l2_distance('{}/2'::sparsevec, '{0:3,1:4}/2'); +SELECT l2_distance('{}'::sparsevec, '{0:3,1:4}'); l2_distance ------------- 5 (1 row) -SELECT l2_distance('{}/2'::sparsevec, '{1:1}/2'); +SELECT l2_distance('{}'::sparsevec, '{1:1}'); l2_distance ------------- 1 (1 row) -SELECT '{}/2'::sparsevec <-> '{0:3,1:4}/2'; +SELECT '{}'::sparsevec <-> '{0:3,1:4}'; ?column? ---------- 5 (1 row) -SELECT inner_product('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2'); +SELECT inner_product('{0:1,1:2}'::sparsevec, '{0:2,1:4}'); inner_product --------------- 10 (1 row) -SELECT sparsevec_negative_inner_product('{0:1,1:2}/2', '{0:2,1:4}/2'); +SELECT sparsevec_negative_inner_product('{0:1,1:2}', '{0:2,1:4}'); sparsevec_negative_inner_product ---------------------------------- -10 (1 row) -SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2'); +SELECT cosine_distance('{0:1,1:2}'::sparsevec, '{0:2,1:4}'); cosine_distance ----------------- 0 (1 row) -SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{}/2'); +SELECT cosine_distance('{0:1,1:2}'::sparsevec, '{}'); cosine_distance ----------------- NaN (1 row) -SELECT cosine_distance('{0:1,1:1}/2'::sparsevec, '{0:-1,1:-1}/2'); +SELECT cosine_distance('{0:1,1:1}'::sparsevec, '{0:-1,1:-1}'); cosine_distance ----------------- 2 (1 row) -SELECT cosine_distance('{0:1}/2'::sparsevec, '{1:2}/2'); +SELECT cosine_distance('{0:1}'::sparsevec, '{1:2}'); cosine_distance ----------------- 1 (1 row) -SELECT cosine_distance('{}/1'::sparsevec, '{}/1'); +SELECT cosine_distance('{}'::sparsevec, '{}'); cosine_distance ----------------- NaN (1 row) -SELECT cosine_distance('{0:1}/2'::sparsevec, '{0:1}/3'); -ERROR: different sparsevec dimensions 2 and 3 diff --git a/test/expected/sparsevec_input.out b/test/expected/sparsevec_input.out index bd2faf5..65c4434 100644 --- a/test/expected/sparsevec_input.out +++ b/test/expected/sparsevec_input.out @@ -1,62 +1,64 @@ -SELECT '{0:1.5,2:3.5}/5'::sparsevec; - sparsevec ------------------ - {0:1.5,2:3.5}/5 +SELECT '{0:1.5,2:3.5}'::sparsevec; + sparsevec +--------------- + {0:1.5,2:3.5} (1 row) -SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector; +SELECT '{0:1.5,2:3.5}'::sparsevec::vector; + vector +------------- + [1.5,0,3.5] +(1 row) + +SELECT '{0:1.5,2:3.5}'::sparsevec::vector(5); vector ----------------- [1.5,0,3.5,0,0] (1 row) -SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(5); - vector ------------------ - [1.5,0,3.5,0,0] +SELECT '{0:1.5,2:3.5}'::sparsevec::vector(4); + vector +--------------- + [1.5,0,3.5,0] (1 row) -SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(4); -ERROR: expected 4 dimensions, not 5 +SELECT '{0:1.5,2:3.5}'::sparsevec::vector(2); +ERROR: Vector must have at least 3 dimensions SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec; - sparsevec ------------------ - {1:1.5,3:3.5}/5 + sparsevec +--------------- + {1:1.5,3:3.5} (1 row) -SELECT '{0:0,1:1,2:0}/3'::sparsevec; +SELECT '{0:0,1:1,2:0}'::sparsevec; sparsevec ----------- - {1:1}/3 + {1:1} (1 row) -SELECT '{1:1,0:1}/2'::sparsevec; +SELECT '{1:1,0:1}'::sparsevec; ERROR: indexes must be in ascending order -LINE 1: SELECT '{1:1,0:1}/2'::sparsevec; +LINE 1: SELECT '{1:1,0:1}'::sparsevec; ^ -SELECT '{}/5'::sparsevec; +SELECT '{}'::sparsevec; sparsevec ----------- - {}/5 + {} (1 row) -SELECT '{}/-1'::sparsevec; -ERROR: sparsevec must have at least 1 dimension -LINE 1: SELECT '{}/-1'::sparsevec; - ^ -SELECT '{}/100001'::sparsevec; -ERROR: sparsevec cannot have more than 100000 dimensions -LINE 1: SELECT '{}/100001'::sparsevec; - ^ -SELECT '{}/16001'::sparsevec::vector; -ERROR: vector cannot have more than 16000 dimensions -SELECT '{-1:1}/1'::sparsevec; +SELECT '{}'::sparsevec::vector; +ERROR: vector must have at least 1 dimension +SELECT '{-1:1}'::sparsevec; ERROR: index "-1" is out of range for type sparsevec -LINE 1: SELECT '{-1:1}/1'::sparsevec; +LINE 1: SELECT '{-1:1}'::sparsevec; ^ -SELECT '{1:1}/1'::sparsevec; -ERROR: index must be less than dimensions -LINE 1: SELECT '{1:1}/1'::sparsevec; - ^ -SELECT '{}/1'::sparsevec(2); -ERROR: expected 2 dimensions, not 1 +SELECT '{1:1}'::sparsevec; + sparsevec +----------- + {1:1} +(1 row) + +SELECT '{}'::sparsevec(2); +ERROR: type modifier is not allowed for type "sparsevec" +LINE 1: SELECT '{}'::sparsevec(2); + ^ diff --git a/test/sql/hnsw_sparsevec_cosine.sql b/test/sql/hnsw_sparsevec_cosine.sql index 685423c..2fb6fb9 100644 --- a/test/sql/hnsw_sparsevec_cosine.sql +++ b/test/sql/hnsw_sparsevec_cosine.sql @@ -1,13 +1,13 @@ SET enable_seqscan = off; -CREATE TABLE t (val sparsevec(3)); -INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL); +CREATE TABLE t (val sparsevec); +INSERT INTO t (val) VALUES ('{}'), ('{0:1,1:2,2:3}'), ('{0:1,1:1,2:1}'), (NULL); CREATE INDEX ON t USING hnsw (val sparsevec_cosine_ops); -INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3'); +INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}'); -SELECT * FROM t ORDER BY val <=> '{0:3,1:3,2:3}/3'; -SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '{}/3') t2; +SELECT * FROM t ORDER BY val <=> '{0:3,1:3,2:3}'; +SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '{}') t2; SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::sparsevec)) t2; DROP TABLE t; diff --git a/test/sql/hnsw_sparsevec_ip.sql b/test/sql/hnsw_sparsevec_ip.sql index 1888d9c..f0da5e9 100644 --- a/test/sql/hnsw_sparsevec_ip.sql +++ b/test/sql/hnsw_sparsevec_ip.sql @@ -1,12 +1,12 @@ SET enable_seqscan = off; -CREATE TABLE t (val sparsevec(3)); -INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL); +CREATE TABLE t (val sparsevec); +INSERT INTO t (val) VALUES ('{}'), ('{0:1,1:2,2:3}'), ('{0:1,1:1,2:1}'), (NULL); CREATE INDEX ON t USING hnsw (val sparsevec_ip_ops); -INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3'); +INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}'); -SELECT * FROM t ORDER BY val <#> '{0:3,1:3,2:3}/3'; +SELECT * FROM t ORDER BY val <#> '{0:3,1:3,2:3}'; SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::sparsevec)) t2; DROP TABLE t; diff --git a/test/sql/hnsw_sparsevec_l2.sql b/test/sql/hnsw_sparsevec_l2.sql index b472607..3ad998f 100644 --- a/test/sql/hnsw_sparsevec_l2.sql +++ b/test/sql/hnsw_sparsevec_l2.sql @@ -1,22 +1,22 @@ SET enable_seqscan = off; -CREATE TABLE t (val sparsevec(3)); -INSERT INTO t (val) VALUES ('{}/3'), ('{0:1,1:2,2:3}/3'), ('{0:1,1:1,2:1}/3'), (NULL); +CREATE TABLE t (val sparsevec); +INSERT INTO t (val) VALUES ('{}'), ('{0:1,1:2,2:3}'), ('{0:1,1:1,2:1}'), (NULL); CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops); -INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}/3'); +INSERT INTO t (val) VALUES ('{0:1,1:2,2:4}'); -SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3'; +SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}'; SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::sparsevec)) t2; SELECT COUNT(*) FROM t; TRUNCATE t; -SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}/3'; +SELECT * FROM t ORDER BY val <-> '{0:3,1:3,2:3}'; DROP TABLE t; -- TODO move -CREATE TABLE t (val sparsevec(1001)); +CREATE TABLE t (val sparsevec); INSERT INTO t (val) VALUES (array_fill(1, ARRAY[1001])::vector::sparsevec); CREATE INDEX ON t USING hnsw (val sparsevec_l2_ops); TRUNCATE t; diff --git a/test/sql/sparsevec_functions.sql b/test/sql/sparsevec_functions.sql index 86f7990..a027d6c 100644 --- a/test/sql/sparsevec_functions.sql +++ b/test/sql/sparsevec_functions.sql @@ -1,13 +1,12 @@ -SELECT l2_distance('{}/2'::sparsevec, '{0:3,1:4}/2'); -SELECT l2_distance('{}/2'::sparsevec, '{1:1}/2'); -SELECT '{}/2'::sparsevec <-> '{0:3,1:4}/2'; +SELECT l2_distance('{}'::sparsevec, '{0:3,1:4}'); +SELECT l2_distance('{}'::sparsevec, '{1:1}'); +SELECT '{}'::sparsevec <-> '{0:3,1:4}'; -SELECT inner_product('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2'); -SELECT sparsevec_negative_inner_product('{0:1,1:2}/2', '{0:2,1:4}/2'); +SELECT inner_product('{0:1,1:2}'::sparsevec, '{0:2,1:4}'); +SELECT sparsevec_negative_inner_product('{0:1,1:2}', '{0:2,1:4}'); -SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{0:2,1:4}/2'); -SELECT cosine_distance('{0:1,1:2}/2'::sparsevec, '{}/2'); -SELECT cosine_distance('{0:1,1:1}/2'::sparsevec, '{0:-1,1:-1}/2'); -SELECT cosine_distance('{0:1}/2'::sparsevec, '{1:2}/2'); -SELECT cosine_distance('{}/1'::sparsevec, '{}/1'); -SELECT cosine_distance('{0:1}/2'::sparsevec, '{0:1}/3'); +SELECT cosine_distance('{0:1,1:2}'::sparsevec, '{0:2,1:4}'); +SELECT cosine_distance('{0:1,1:2}'::sparsevec, '{}'); +SELECT cosine_distance('{0:1,1:1}'::sparsevec, '{0:-1,1:-1}'); +SELECT cosine_distance('{0:1}'::sparsevec, '{1:2}'); +SELECT cosine_distance('{}'::sparsevec, '{}'); diff --git a/test/sql/sparsevec_input.sql b/test/sql/sparsevec_input.sql index 1fdfd88..432f4ec 100644 --- a/test/sql/sparsevec_input.sql +++ b/test/sql/sparsevec_input.sql @@ -1,19 +1,18 @@ -SELECT '{0:1.5,2:3.5}/5'::sparsevec; -SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector; -SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(5); -SELECT '{0:1.5,2:3.5}/5'::sparsevec::vector(4); +SELECT '{0:1.5,2:3.5}'::sparsevec; +SELECT '{0:1.5,2:3.5}'::sparsevec::vector; +SELECT '{0:1.5,2:3.5}'::sparsevec::vector(5); +SELECT '{0:1.5,2:3.5}'::sparsevec::vector(4); +SELECT '{0:1.5,2:3.5}'::sparsevec::vector(2); SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec; -SELECT '{0:0,1:1,2:0}/3'::sparsevec; +SELECT '{0:0,1:1,2:0}'::sparsevec; -SELECT '{1:1,0:1}/2'::sparsevec; +SELECT '{1:1,0:1}'::sparsevec; -SELECT '{}/5'::sparsevec; -SELECT '{}/-1'::sparsevec; -SELECT '{}/100001'::sparsevec; -SELECT '{}/16001'::sparsevec::vector; +SELECT '{}'::sparsevec; +SELECT '{}'::sparsevec::vector; -SELECT '{-1:1}/1'::sparsevec; -SELECT '{1:1}/1'::sparsevec; +SELECT '{-1:1}'::sparsevec; +SELECT '{1:1}'::sparsevec; -SELECT '{}/1'::sparsevec(2); +SELECT '{}'::sparsevec(2);