diff --git a/CHANGELOG.md b/CHANGELOG.md index b38b189..761a4d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - Added `binary_quantize` function - Added `hamming_distance` function - Added `jaccard_distance` function +- Added `l2_normalize` function - Added `subvector` function - Added CPU dispatching for distance functions on Linux x86-64 - Updated comparison operators to support vectors with different dimensions diff --git a/README.md b/README.md index 4e837be..fe2e6ea 100644 --- a/README.md +++ b/README.md @@ -863,6 +863,7 @@ cosine_distance(vector, vector) → double precision | cosine distance | inner_product(vector, vector) → double precision | inner product | l1_distance(vector, vector) → double precision | taxicab distance | 0.5.0 l2_distance(vector, vector) → double precision | Euclidean distance | +l2_normalize(vector) → vector | Normalize with Euclidean norm | unreleased subvector(vector, integer, integer) → vector | subvector | unreleased vector_dims(vector) → integer | number of dimensions | vector_norm(vector) → double precision | Euclidean norm | @@ -899,6 +900,7 @@ inner_product(halfvec, halfvec) → double precision | inner product | unrelease l1_distance(halfvec, halfvec) → double precision | taxicab distance | unreleased l2_distance(halfvec, halfvec) → double precision | Euclidean distance | unreleased l2_norm(halfvec) → double precision | Euclidean norm | unreleased +l2_normalize(halfvec) → halfvec | Normalize with Euclidean norm | unreleased subvector(halfvec, integer, integer) → halfvec | subvector | unreleased vector_dims(halfvec) → integer | number of dimensions | unreleased diff --git a/sql/vector--0.6.2--0.7.0.sql b/sql/vector--0.6.2--0.7.0.sql index 16e0646..b2db93e 100644 --- a/sql/vector--0.6.2--0.7.0.sql +++ b/sql/vector--0.6.2--0.7.0.sql @@ -1,6 +1,9 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION vector UPDATE TO '0.7.0'" to load this file. \quit +CREATE FUNCTION l2_normalize(vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + CREATE FUNCTION binary_quantize(vector) RETURNS bit AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; @@ -77,6 +80,9 @@ CREATE FUNCTION vector_dims(halfvec) RETURNS integer CREATE FUNCTION l2_norm(halfvec) RETURNS float8 AS 'MODULE_PATHNAME', 'halfvec_l2_norm' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; +CREATE FUNCTION l2_normalize(halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME', 'halfvec_l2_normalize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + CREATE FUNCTION binary_quantize(halfvec) RETURNS bit AS 'MODULE_PATHNAME', 'halfvec_binary_quantize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; diff --git a/sql/vector.sql b/sql/vector.sql index 2580e05..69c3fc0 100644 --- a/sql/vector.sql +++ b/sql/vector.sql @@ -49,6 +49,9 @@ CREATE FUNCTION vector_dims(vector) RETURNS integer CREATE FUNCTION vector_norm(vector) RETURNS float8 AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; +CREATE FUNCTION l2_normalize(vector) RETURNS vector + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + CREATE FUNCTION binary_quantize(vector) RETURNS bit AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; @@ -370,6 +373,9 @@ CREATE FUNCTION vector_dims(halfvec) RETURNS integer CREATE FUNCTION l2_norm(halfvec) RETURNS float8 AS 'MODULE_PATHNAME', 'halfvec_l2_norm' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; +CREATE FUNCTION l2_normalize(halfvec) RETURNS halfvec + AS 'MODULE_PATHNAME', 'halfvec_l2_normalize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + CREATE FUNCTION binary_quantize(halfvec) RETURNS bit AS 'MODULE_PATHNAME', 'halfvec_binary_quantize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; diff --git a/src/halfvec.c b/src/halfvec.c index e99bf0c..16f0a91 100644 --- a/src/halfvec.c +++ b/src/halfvec.c @@ -746,6 +746,45 @@ halfvec_l2_norm(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8(sqrt(norm)); } +/* + * Normalize a half vector with the L2 norm + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_l2_normalize); +Datum +halfvec_l2_normalize(PG_FUNCTION_ARGS) +{ + HalfVector *a = PG_GETARG_HALFVEC_P(0); + half *ax = a->x; + double norm = 0; + HalfVector *result; + half *rx; + + result = InitHalfVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) + norm += (double) HalfToFloat4(ax[i]) * (double) HalfToFloat4(ax[i]); + + norm = sqrt(norm); + + /* Return zero vector for zero norm */ + if (norm > 0) + { + for (int i = 0; i < a->dim; i++) + rx[i] = Float4ToHalfUnchecked(HalfToFloat4(ax[i]) / norm); + + /* Check for overflow */ + for (int i = 0; i < a->dim; i++) + { + if (HalfIsInf(rx[i])) + float_overflow_error(); + } + } + + PG_RETURN_POINTER(result); +} + /* * Add half vectors */ diff --git a/src/vector.c b/src/vector.c index 065a5aa..1dd9ed5 100644 --- a/src/vector.c +++ b/src/vector.c @@ -775,6 +775,45 @@ vector_norm(PG_FUNCTION_ARGS) PG_RETURN_FLOAT8(sqrt(norm)); } +/* + * Normalize a vector with the L2 norm + */ +PGDLLEXPORT PG_FUNCTION_INFO_V1(l2_normalize); +Datum +l2_normalize(PG_FUNCTION_ARGS) +{ + Vector *a = PG_GETARG_VECTOR_P(0); + float *ax = a->x; + double norm = 0; + Vector *result; + float *rx; + + result = InitVector(a->dim); + rx = result->x; + + /* Auto-vectorized */ + for (int i = 0; i < a->dim; i++) + norm += (double) ax[i] * (double) ax[i]; + + norm = sqrt(norm); + + /* Return zero vector for zero norm */ + if (norm > 0) + { + for (int i = 0; i < a->dim; i++) + rx[i] = ax[i] / norm; + + /* Check for overflow */ + for (int i = 0; i < a->dim; i++) + { + if (isinf(rx[i])) + float_overflow_error(); + } + } + + PG_RETURN_POINTER(result); +} + /* * Add vectors */ diff --git a/test/expected/halfvec_functions.out b/test/expected/halfvec_functions.out index ca94c85..3887098 100644 --- a/test/expected/halfvec_functions.out +++ b/test/expected/halfvec_functions.out @@ -278,6 +278,36 @@ SELECT l1_distance('[0,0]'::halfvec, '[0,1]'); SELECT l1_distance('[1,2]'::halfvec, '[3]'); ERROR: different halfvec dimensions 2 and 1 +SELECT l2_normalize('[3,4]'::halfvec); + l2_normalize +------------------------ + [0.60009766,0.7998047] +(1 row) + +SELECT l2_normalize('[3,0]'::halfvec); + l2_normalize +-------------- + [1,0] +(1 row) + +SELECT l2_normalize('[0,0.1]'::halfvec); + l2_normalize +-------------- + [0,1] +(1 row) + +SELECT l2_normalize('[0,0]'::halfvec); + l2_normalize +-------------- + [0,0] +(1 row) + +SELECT l2_normalize('[65504]'::halfvec); + l2_normalize +-------------- + [1] +(1 row) + SELECT binary_quantize('[1,0,-1]'::halfvec); binary_quantize ----------------- diff --git a/test/expected/vector_functions.out b/test/expected/vector_functions.out index 3fdeb54..99274d8 100644 --- a/test/expected/vector_functions.out +++ b/test/expected/vector_functions.out @@ -272,6 +272,36 @@ SELECT l1_distance('[3e38]'::vector, '[-3e38]'); Infinity (1 row) +SELECT l2_normalize('[3,4]'::vector); + l2_normalize +-------------- + [0.6,0.8] +(1 row) + +SELECT l2_normalize('[3,0]'::vector); + l2_normalize +-------------- + [1,0] +(1 row) + +SELECT l2_normalize('[0,0.1]'::vector); + l2_normalize +-------------- + [0,1] +(1 row) + +SELECT l2_normalize('[0,0]'::vector); + l2_normalize +-------------- + [0,0] +(1 row) + +SELECT l2_normalize('[3e38]'::vector); + l2_normalize +-------------- + [1] +(1 row) + SELECT binary_quantize('[1,0,-1]'::vector); binary_quantize ----------------- diff --git a/test/sql/halfvec_functions.sql b/test/sql/halfvec_functions.sql index 7482f4e..b18fe21 100644 --- a/test/sql/halfvec_functions.sql +++ b/test/sql/halfvec_functions.sql @@ -59,6 +59,12 @@ SELECT l1_distance('[0,0]'::halfvec, '[3,4]'); SELECT l1_distance('[0,0]'::halfvec, '[0,1]'); SELECT l1_distance('[1,2]'::halfvec, '[3]'); +SELECT l2_normalize('[3,4]'::halfvec); +SELECT l2_normalize('[3,0]'::halfvec); +SELECT l2_normalize('[0,0.1]'::halfvec); +SELECT l2_normalize('[0,0]'::halfvec); +SELECT l2_normalize('[65504]'::halfvec); + SELECT binary_quantize('[1,0,-1]'::halfvec); SELECT binary_quantize('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::halfvec); diff --git a/test/sql/vector_functions.sql b/test/sql/vector_functions.sql index 0d8fd5e..e27f794 100644 --- a/test/sql/vector_functions.sql +++ b/test/sql/vector_functions.sql @@ -58,6 +58,12 @@ SELECT l1_distance('[0,0]'::vector, '[0,1]'); SELECT l1_distance('[1,2]'::vector, '[3]'); SELECT l1_distance('[3e38]'::vector, '[-3e38]'); +SELECT l2_normalize('[3,4]'::vector); +SELECT l2_normalize('[3,0]'::vector); +SELECT l2_normalize('[0,0.1]'::vector); +SELECT l2_normalize('[0,0]'::vector); +SELECT l2_normalize('[3e38]'::vector); + SELECT binary_quantize('[1,0,-1]'::vector); SELECT binary_quantize('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::vector);