Added l2_normalize function - closes #220

This commit is contained in:
Andrew Kane
2024-04-14 20:53:05 -07:00
parent 00308491d3
commit b70fb2b3f4
10 changed files with 165 additions and 0 deletions

View File

@@ -6,6 +6,7 @@
- Added `binary_quantize` function
- Added `hamming_distance` function
- Added `jaccard_distance` function
- Added `l2_normalize` function
- Added `subvector` function
- Added CPU dispatching for distance functions on Linux x86-64
- Updated comparison operators to support vectors with different dimensions

View File

@@ -863,6 +863,7 @@ cosine_distance(vector, vector) → double precision | cosine distance |
inner_product(vector, vector) → double precision | inner product |
l1_distance(vector, vector) → double precision | taxicab distance | 0.5.0
l2_distance(vector, vector) → double precision | Euclidean distance |
l2_normalize(vector) → vector | Normalize with Euclidean norm | unreleased
subvector(vector, integer, integer) → vector | subvector | unreleased
vector_dims(vector) → integer | number of dimensions |
vector_norm(vector) → double precision | Euclidean norm |
@@ -899,6 +900,7 @@ inner_product(halfvec, halfvec) → double precision | inner product | unrelease
l1_distance(halfvec, halfvec) → double precision | taxicab distance | unreleased
l2_distance(halfvec, halfvec) → double precision | Euclidean distance | unreleased
l2_norm(halfvec) → double precision | Euclidean norm | unreleased
l2_normalize(halfvec) → halfvec | Normalize with Euclidean norm | unreleased
subvector(halfvec, integer, integer) → halfvec | subvector | unreleased
vector_dims(halfvec) → integer | number of dimensions | unreleased

View File

@@ -1,6 +1,9 @@
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "ALTER EXTENSION vector UPDATE TO '0.7.0'" to load this file. \quit
CREATE FUNCTION l2_normalize(vector) RETURNS vector
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION binary_quantize(vector) RETURNS bit
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
@@ -77,6 +80,9 @@ CREATE FUNCTION vector_dims(halfvec) RETURNS integer
CREATE FUNCTION l2_norm(halfvec) RETURNS float8
AS 'MODULE_PATHNAME', 'halfvec_l2_norm' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION l2_normalize(halfvec) RETURNS halfvec
AS 'MODULE_PATHNAME', 'halfvec_l2_normalize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION binary_quantize(halfvec) RETURNS bit
AS 'MODULE_PATHNAME', 'halfvec_binary_quantize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

View File

@@ -49,6 +49,9 @@ CREATE FUNCTION vector_dims(vector) RETURNS integer
CREATE FUNCTION vector_norm(vector) RETURNS float8
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION l2_normalize(vector) RETURNS vector
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION binary_quantize(vector) RETURNS bit
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
@@ -370,6 +373,9 @@ CREATE FUNCTION vector_dims(halfvec) RETURNS integer
CREATE FUNCTION l2_norm(halfvec) RETURNS float8
AS 'MODULE_PATHNAME', 'halfvec_l2_norm' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION l2_normalize(halfvec) RETURNS halfvec
AS 'MODULE_PATHNAME', 'halfvec_l2_normalize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION binary_quantize(halfvec) RETURNS bit
AS 'MODULE_PATHNAME', 'halfvec_binary_quantize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;

View File

@@ -746,6 +746,45 @@ halfvec_l2_norm(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(sqrt(norm));
}
/*
* Normalize a half vector with the L2 norm
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_l2_normalize);
Datum
halfvec_l2_normalize(PG_FUNCTION_ARGS)
{
HalfVector *a = PG_GETARG_HALFVEC_P(0);
half *ax = a->x;
double norm = 0;
HalfVector *result;
half *rx;
result = InitHalfVector(a->dim);
rx = result->x;
/* Auto-vectorized */
for (int i = 0; i < a->dim; i++)
norm += (double) HalfToFloat4(ax[i]) * (double) HalfToFloat4(ax[i]);
norm = sqrt(norm);
/* Return zero vector for zero norm */
if (norm > 0)
{
for (int i = 0; i < a->dim; i++)
rx[i] = Float4ToHalfUnchecked(HalfToFloat4(ax[i]) / norm);
/* Check for overflow */
for (int i = 0; i < a->dim; i++)
{
if (HalfIsInf(rx[i]))
float_overflow_error();
}
}
PG_RETURN_POINTER(result);
}
/*
* Add half vectors
*/

View File

@@ -775,6 +775,45 @@ vector_norm(PG_FUNCTION_ARGS)
PG_RETURN_FLOAT8(sqrt(norm));
}
/*
* Normalize a vector with the L2 norm
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(l2_normalize);
Datum
l2_normalize(PG_FUNCTION_ARGS)
{
Vector *a = PG_GETARG_VECTOR_P(0);
float *ax = a->x;
double norm = 0;
Vector *result;
float *rx;
result = InitVector(a->dim);
rx = result->x;
/* Auto-vectorized */
for (int i = 0; i < a->dim; i++)
norm += (double) ax[i] * (double) ax[i];
norm = sqrt(norm);
/* Return zero vector for zero norm */
if (norm > 0)
{
for (int i = 0; i < a->dim; i++)
rx[i] = ax[i] / norm;
/* Check for overflow */
for (int i = 0; i < a->dim; i++)
{
if (isinf(rx[i]))
float_overflow_error();
}
}
PG_RETURN_POINTER(result);
}
/*
* Add vectors
*/

View File

@@ -278,6 +278,36 @@ SELECT l1_distance('[0,0]'::halfvec, '[0,1]');
SELECT l1_distance('[1,2]'::halfvec, '[3]');
ERROR: different halfvec dimensions 2 and 1
SELECT l2_normalize('[3,4]'::halfvec);
l2_normalize
------------------------
[0.60009766,0.7998047]
(1 row)
SELECT l2_normalize('[3,0]'::halfvec);
l2_normalize
--------------
[1,0]
(1 row)
SELECT l2_normalize('[0,0.1]'::halfvec);
l2_normalize
--------------
[0,1]
(1 row)
SELECT l2_normalize('[0,0]'::halfvec);
l2_normalize
--------------
[0,0]
(1 row)
SELECT l2_normalize('[65504]'::halfvec);
l2_normalize
--------------
[1]
(1 row)
SELECT binary_quantize('[1,0,-1]'::halfvec);
binary_quantize
-----------------

View File

@@ -272,6 +272,36 @@ SELECT l1_distance('[3e38]'::vector, '[-3e38]');
Infinity
(1 row)
SELECT l2_normalize('[3,4]'::vector);
l2_normalize
--------------
[0.6,0.8]
(1 row)
SELECT l2_normalize('[3,0]'::vector);
l2_normalize
--------------
[1,0]
(1 row)
SELECT l2_normalize('[0,0.1]'::vector);
l2_normalize
--------------
[0,1]
(1 row)
SELECT l2_normalize('[0,0]'::vector);
l2_normalize
--------------
[0,0]
(1 row)
SELECT l2_normalize('[3e38]'::vector);
l2_normalize
--------------
[1]
(1 row)
SELECT binary_quantize('[1,0,-1]'::vector);
binary_quantize
-----------------

View File

@@ -59,6 +59,12 @@ SELECT l1_distance('[0,0]'::halfvec, '[3,4]');
SELECT l1_distance('[0,0]'::halfvec, '[0,1]');
SELECT l1_distance('[1,2]'::halfvec, '[3]');
SELECT l2_normalize('[3,4]'::halfvec);
SELECT l2_normalize('[3,0]'::halfvec);
SELECT l2_normalize('[0,0.1]'::halfvec);
SELECT l2_normalize('[0,0]'::halfvec);
SELECT l2_normalize('[65504]'::halfvec);
SELECT binary_quantize('[1,0,-1]'::halfvec);
SELECT binary_quantize('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::halfvec);

View File

@@ -58,6 +58,12 @@ SELECT l1_distance('[0,0]'::vector, '[0,1]');
SELECT l1_distance('[1,2]'::vector, '[3]');
SELECT l1_distance('[3e38]'::vector, '[-3e38]');
SELECT l2_normalize('[3,4]'::vector);
SELECT l2_normalize('[3,0]'::vector);
SELECT l2_normalize('[0,0.1]'::vector);
SELECT l2_normalize('[0,0]'::vector);
SELECT l2_normalize('[3e38]'::vector);
SELECT binary_quantize('[1,0,-1]'::vector);
SELECT binary_quantize('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::vector);