diff --git a/CHANGELOG.md b/CHANGELOG.md index 761a4d3..96d7c28 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - Added `jaccard_distance` function - Added `l2_normalize` function - Added `subvector` function +- Added concatenate operator for vectors - Added CPU dispatching for distance functions on Linux x86-64 - Updated comparison operators to support vectors with different dimensions diff --git a/README.md b/README.md index fe2e6ea..1699cfb 100644 --- a/README.md +++ b/README.md @@ -850,6 +850,7 @@ Operator | Description | Added \+ | element-wise addition | \- | element-wise subtraction | \* | element-wise multiplication | 0.5.0 +\|\| | concatenate | unreleased <-> | Euclidean distance | <#> | negative inner product | <=> | cosine distance | @@ -886,6 +887,7 @@ Operator | Description | Added \+ | element-wise addition | unreleased \- | element-wise subtraction | unreleased \* | element-wise multiplication | unreleased +\|\| | concatenate | unreleased <-> | Euclidean distance | unreleased <#> | negative inner product | unreleased <=> | cosine distance | unreleased diff --git a/sql/vector--0.6.2--0.7.0.sql b/sql/vector--0.6.2--0.7.0.sql index 220d331..16fe5ef 100644 --- a/sql/vector--0.6.2--0.7.0.sql +++ b/sql/vector--0.6.2--0.7.0.sql @@ -10,6 +10,13 @@ CREATE FUNCTION binary_quantize(vector) RETURNS bit CREATE FUNCTION subvector(vector, int, int) RETURNS vector AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; +CREATE FUNCTION vector_concat(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE OPERATOR || ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_concat +); + CREATE FUNCTION hamming_distance(bit, bit) RETURNS float8 AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; @@ -98,6 +105,9 @@ CREATE FUNCTION halfvec_sub(halfvec, halfvec) RETURNS halfvec CREATE FUNCTION halfvec_mul(halfvec, halfvec) RETURNS halfvec AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; +CREATE FUNCTION halfvec_concat(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + CREATE FUNCTION halfvec_lt(halfvec, halfvec) RETURNS bool AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; @@ -227,6 +237,10 @@ CREATE OPERATOR * ( COMMUTATOR = * ); +CREATE OPERATOR || ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_concat +); + CREATE OPERATOR < ( LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_lt, COMMUTATOR = > , NEGATOR = >= , diff --git a/sql/vector.sql b/sql/vector.sql index efa53b6..b31ea54 100644 --- a/sql/vector.sql +++ b/sql/vector.sql @@ -69,6 +69,9 @@ CREATE FUNCTION vector_sub(vector, vector) RETURNS vector CREATE FUNCTION vector_mul(vector, vector) RETURNS vector AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; +CREATE FUNCTION vector_concat(vector, vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + CREATE FUNCTION vector_lt(vector, vector) RETURNS bool AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; @@ -197,6 +200,10 @@ CREATE OPERATOR * ( COMMUTATOR = * ); +CREATE OPERATOR || ( + LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_concat +); + CREATE OPERATOR < ( LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_lt, COMMUTATOR = > , NEGATOR = >= , @@ -393,6 +400,9 @@ CREATE FUNCTION halfvec_sub(halfvec, halfvec) RETURNS halfvec CREATE FUNCTION halfvec_mul(halfvec, halfvec) RETURNS halfvec AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; +CREATE FUNCTION halfvec_concat(halfvec, halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + CREATE FUNCTION halfvec_lt(halfvec, halfvec) RETURNS bool AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; @@ -530,6 +540,10 @@ CREATE OPERATOR * ( COMMUTATOR = * ); +CREATE OPERATOR || ( + LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_concat +); + CREATE OPERATOR < ( LEFTARG = halfvec, RIGHTARG = halfvec, PROCEDURE = halfvec_lt, COMMUTATOR = > , NEGATOR = >= , diff --git a/src/halfvec.c b/src/halfvec.c index 16f0a91..72edbc5 100644 --- a/src/halfvec.c +++ b/src/halfvec.c @@ -905,6 +905,30 @@ halfvec_mul(PG_FUNCTION_ARGS) PG_RETURN_POINTER(result); } +/* + * Concatenate half vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_concat); +Datum +halfvec_concat(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + HalfVector *b = PG_GETARG_HALFVEC_P(1); + HalfVector *result; + int dim = a->dim + b->dim; + + CheckDim(dim); + result = InitHalfVector(dim); + + for (int i = 0; i < a->dim; i++) + result->x[i] = a->x[i]; + + for (int i = 0; i < b->dim; i++) + result->x[i + a->dim] = b->x[i]; + + PG_RETURN_POINTER(result); +} + /* * Quantize a half vector */ diff --git a/src/vector.c b/src/vector.c index 1dd9ed5..ee72be7 100644 --- a/src/vector.c +++ b/src/vector.c @@ -916,6 +916,30 @@ vector_mul(PG_FUNCTION_ARGS) PG_RETURN_POINTER(result); } +/* + * Concatenate vectors + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(vector_concat); +Datum +vector_concat(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + Vector *b = PG_GETARG_VECTOR_P(1); + Vector *result; + int dim = a->dim + b->dim; + + CheckDim(dim); + result = InitVector(dim); + + for (int i = 0; i < a->dim; i++) + result->x[i] = a->x[i]; + + for (int i = 0; i < b->dim; i++) + result->x[i + a->dim] = b->x[i]; + + PG_RETURN_POINTER(result); +} + /* * Quantize a vector */ diff --git a/test/expected/halfvec_functions.out b/test/expected/halfvec_functions.out index 3887098..ddf8d1c 100644 --- a/test/expected/halfvec_functions.out +++ b/test/expected/halfvec_functions.out @@ -24,6 +24,14 @@ SELECT '[65519]'::halfvec * '[65519]'; ERROR: value out of range: overflow SELECT '[1e-7]'::halfvec * '[1e-7]'; ERROR: value out of range: underflow +SELECT '[1,2,3]'::halfvec || '[4,5]'::halfvec; + ?column? +------------- + [1,2,3,4,5] +(1 row) + +SELECT array_fill(0, ARRAY[16000])::halfvec || '[1]'::halfvec; +ERROR: halfvec cannot have more than 16000 dimensions SELECT '[1,2,3]'::halfvec < '[1,2,3]'; ?column? ---------- diff --git a/test/expected/vector_functions.out b/test/expected/vector_functions.out index 99274d8..71cd438 100644 --- a/test/expected/vector_functions.out +++ b/test/expected/vector_functions.out @@ -24,6 +24,14 @@ SELECT '[1e37]'::vector * '[1e37]'; ERROR: value out of range: overflow SELECT '[1e-37]'::vector * '[1e-37]'; ERROR: value out of range: underflow +SELECT '[1,2,3]'::vector || '[4,5]'::vector; + ?column? +------------- + [1,2,3,4,5] +(1 row) + +SELECT array_fill(0, ARRAY[16000])::vector || '[1]'::vector; +ERROR: vector cannot have more than 16000 dimensions SELECT '[1,2,3]'::vector < '[1,2,3]'; ?column? ---------- diff --git a/test/sql/halfvec_functions.sql b/test/sql/halfvec_functions.sql index b18fe21..a20b083 100644 --- a/test/sql/halfvec_functions.sql +++ b/test/sql/halfvec_functions.sql @@ -6,6 +6,9 @@ SELECT '[1,2,3]'::halfvec * '[4,5,6]'; SELECT '[65519]'::halfvec * '[65519]'; SELECT '[1e-7]'::halfvec * '[1e-7]'; +SELECT '[1,2,3]'::halfvec || '[4,5]'::halfvec; +SELECT array_fill(0, ARRAY[16000])::halfvec || '[1]'::halfvec; + SELECT '[1,2,3]'::halfvec < '[1,2,3]'; SELECT '[1,2,3]'::halfvec < '[1,2]'; SELECT '[1,2,3]'::halfvec <= '[1,2,3]'; diff --git a/test/sql/vector_functions.sql b/test/sql/vector_functions.sql index e27f794..1a1fe5f 100644 --- a/test/sql/vector_functions.sql +++ b/test/sql/vector_functions.sql @@ -6,6 +6,9 @@ SELECT '[1,2,3]'::vector * '[4,5,6]'; SELECT '[1e37]'::vector * '[1e37]'; SELECT '[1e-37]'::vector * '[1e-37]'; +SELECT '[1,2,3]'::vector || '[4,5]'::vector; +SELECT array_fill(0, ARRAY[16000])::vector || '[1]'::vector; + SELECT '[1,2,3]'::vector < '[1,2,3]'; SELECT '[1,2,3]'::vector < '[1,2]'; SELECT '[1,2,3]'::vector <= '[1,2,3]';