diff --git a/CHANGELOG.md b/CHANGELOG.md index 69aa98d..edc5da8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,9 +3,9 @@ - Added `halfvec` type - Added `sparsevec` type - Added support for `bit` vectors to HNSW +- Added `binary_quantize` function - Added `hamming_distance` function - Added `jaccard_distance` function -- Added `quantize_binary` function - Added `subvector` function - Updated comparison operators to support vectors with different dimensions diff --git a/README.md b/README.md index 23b6405..4e837be 100644 --- a/README.md +++ b/README.md @@ -482,20 +482,20 @@ Also supports Jaccard distance (`<%>`) Use expression indexing for binary quantization ```sql -CREATE INDEX ON items USING hnsw ((quantize_binary(embedding)::bit(3)) bit_hamming_ops); +CREATE INDEX ON items USING hnsw ((binary_quantize(embedding)::bit(3)) bit_hamming_ops); ``` Get the nearest neighbors by Hamming distance ```sql -SELECT * FROM items ORDER BY quantize_binary(embedding)::bit(3) <~> quantize_binary('[1,-2,3]') LIMIT 5; +SELECT * FROM items ORDER BY binary_quantize(embedding)::bit(3) <~> binary_quantize('[1,-2,3]') LIMIT 5; ``` Re-rank by the original vectors for better recall ```sql SELECT * FROM ( - SELECT * FROM items ORDER BY quantize_binary(embedding)::bit(3) <~> quantize_binary('[1,-2,3]') LIMIT 20 + SELECT * FROM items ORDER BY binary_quantize(embedding)::bit(3) <~> binary_quantize('[1,-2,3]') LIMIT 20 ) ORDER BY embedding <=> '[1,-2,3]' LIMIT 5; ``` @@ -858,11 +858,11 @@ Operator | Description | Added Function | Description | Added --- | --- | --- +binary_quantize(vector) → bit | binary quantize | unreleased cosine_distance(vector, vector) → double precision | cosine distance | inner_product(vector, vector) → double precision | inner product | l1_distance(vector, vector) → double precision | taxicab distance | 0.5.0 l2_distance(vector, vector) → double precision | Euclidean distance | -quantize_binary(vector) → bit | quantize | unreleased subvector(vector, integer, integer) → vector | subvector | unreleased vector_dims(vector) → integer | number of dimensions | vector_norm(vector) → double precision | Euclidean norm | @@ -893,12 +893,12 @@ Operator | Description | Added Function | Description | Added --- | --- | --- +binary_quantize(halfvec) → bit | binary quantize | unreleased cosine_distance(halfvec, halfvec) → double precision | cosine distance | unreleased inner_product(halfvec, halfvec) → double precision | inner product | unreleased l1_distance(halfvec, halfvec) → double precision | taxicab distance | unreleased l2_distance(halfvec, halfvec) → double precision | Euclidean distance | unreleased l2_norm(halfvec) → double precision | Euclidean norm | unreleased -quantize_binary(halfvec) → bit | quantize | unreleased subvector(halfvec, integer, integer) → halfvec | subvector | unreleased vector_dims(halfvec) → integer | number of dimensions | unreleased diff --git a/sql/vector--0.6.2--0.7.0.sql b/sql/vector--0.6.2--0.7.0.sql index 147166c..16e0646 100644 --- a/sql/vector--0.6.2--0.7.0.sql +++ b/sql/vector--0.6.2--0.7.0.sql @@ -1,7 +1,7 @@ -- complain if script is sourced in psql, rather than via CREATE EXTENSION \echo Use "ALTER EXTENSION vector UPDATE TO '0.7.0'" to load this file. \quit -CREATE FUNCTION quantize_binary(vector) RETURNS bit +CREATE FUNCTION binary_quantize(vector) RETURNS bit AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION subvector(vector, int, int) RETURNS vector @@ -77,8 +77,8 @@ CREATE FUNCTION vector_dims(halfvec) RETURNS integer CREATE FUNCTION l2_norm(halfvec) RETURNS float8 AS 'MODULE_PATHNAME', 'halfvec_l2_norm' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; -CREATE FUNCTION quantize_binary(halfvec) RETURNS bit - AS 'MODULE_PATHNAME', 'halfvec_quantize_binary' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; +CREATE FUNCTION binary_quantize(halfvec) RETURNS bit + AS 'MODULE_PATHNAME', 'halfvec_binary_quantize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION subvector(halfvec, int, int) RETURNS halfvec AS 'MODULE_PATHNAME', 'halfvec_subvector' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; diff --git a/sql/vector.sql b/sql/vector.sql index 60e5620..2580e05 100644 --- a/sql/vector.sql +++ b/sql/vector.sql @@ -49,7 +49,7 @@ CREATE FUNCTION vector_dims(vector) RETURNS integer CREATE FUNCTION vector_norm(vector) RETURNS float8 AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; -CREATE FUNCTION quantize_binary(vector) RETURNS bit +CREATE FUNCTION binary_quantize(vector) RETURNS bit AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION subvector(vector, int, int) RETURNS vector @@ -370,8 +370,8 @@ CREATE FUNCTION vector_dims(halfvec) RETURNS integer CREATE FUNCTION l2_norm(halfvec) RETURNS float8 AS 'MODULE_PATHNAME', 'halfvec_l2_norm' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; -CREATE FUNCTION quantize_binary(halfvec) RETURNS bit - AS 'MODULE_PATHNAME', 'halfvec_quantize_binary' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; +CREATE FUNCTION binary_quantize(halfvec) RETURNS bit + AS 'MODULE_PATHNAME', 'halfvec_binary_quantize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; CREATE FUNCTION subvector(halfvec, int, int) RETURNS halfvec AS 'MODULE_PATHNAME', 'halfvec_subvector' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; diff --git a/src/halfvec.c b/src/halfvec.c index 874cdc4..e99bf0c 100644 --- a/src/halfvec.c +++ b/src/halfvec.c @@ -869,9 +869,9 @@ halfvec_mul(PG_FUNCTION_ARGS) /* * Quantize a half vector */ -PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_quantize_binary); +PGDLLEXPORT PG_FUNCTION_INFO_V1(halfvec_binary_quantize); Datum -halfvec_quantize_binary(PG_FUNCTION_ARGS) +halfvec_binary_quantize(PG_FUNCTION_ARGS) { HalfVector *a = PG_GETARG_HALFVEC_P(0); half *ax = a->x; diff --git a/src/vector.c b/src/vector.c index d823e4f..2e50fca 100644 --- a/src/vector.c +++ b/src/vector.c @@ -873,9 +873,9 @@ vector_mul(PG_FUNCTION_ARGS) /* * Quantize a vector */ -PGDLLEXPORT PG_FUNCTION_INFO_V1(quantize_binary); +PGDLLEXPORT PG_FUNCTION_INFO_V1(binary_quantize); Datum -quantize_binary(PG_FUNCTION_ARGS) +binary_quantize(PG_FUNCTION_ARGS) { Vector *a = PG_GETARG_VECTOR_P(0); float *ax = a->x; diff --git a/test/expected/halfvec_functions.out b/test/expected/halfvec_functions.out index 52590bb..ca94c85 100644 --- a/test/expected/halfvec_functions.out +++ b/test/expected/halfvec_functions.out @@ -278,14 +278,14 @@ SELECT l1_distance('[0,0]'::halfvec, '[0,1]'); SELECT l1_distance('[1,2]'::halfvec, '[3]'); ERROR: different halfvec dimensions 2 and 1 -SELECT quantize_binary('[1,0,-1]'::halfvec); - quantize_binary +SELECT binary_quantize('[1,0,-1]'::halfvec); + binary_quantize ----------------- 100 (1 row) -SELECT quantize_binary('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::halfvec); - quantize_binary +SELECT binary_quantize('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::halfvec); + binary_quantize ----------------- 01001110101 (1 row) diff --git a/test/expected/vector_functions.out b/test/expected/vector_functions.out index 389a26f..3fdeb54 100644 --- a/test/expected/vector_functions.out +++ b/test/expected/vector_functions.out @@ -272,14 +272,14 @@ SELECT l1_distance('[3e38]'::vector, '[-3e38]'); Infinity (1 row) -SELECT quantize_binary('[1,0,-1]'::vector); - quantize_binary +SELECT binary_quantize('[1,0,-1]'::vector); + binary_quantize ----------------- 100 (1 row) -SELECT quantize_binary('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::vector); - quantize_binary +SELECT binary_quantize('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::vector); + binary_quantize ----------------- 01001110101 (1 row) diff --git a/test/sql/halfvec_functions.sql b/test/sql/halfvec_functions.sql index 91a15f0..7482f4e 100644 --- a/test/sql/halfvec_functions.sql +++ b/test/sql/halfvec_functions.sql @@ -59,8 +59,8 @@ SELECT l1_distance('[0,0]'::halfvec, '[3,4]'); SELECT l1_distance('[0,0]'::halfvec, '[0,1]'); SELECT l1_distance('[1,2]'::halfvec, '[3]'); -SELECT quantize_binary('[1,0,-1]'::halfvec); -SELECT quantize_binary('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::halfvec); +SELECT binary_quantize('[1,0,-1]'::halfvec); +SELECT binary_quantize('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::halfvec); SELECT subvector('[1,2,3,4,5]'::halfvec, 1, 3); SELECT subvector('[1,2,3,4,5]'::halfvec, 3, 2); diff --git a/test/sql/vector_functions.sql b/test/sql/vector_functions.sql index a8a3fed..0d8fd5e 100644 --- a/test/sql/vector_functions.sql +++ b/test/sql/vector_functions.sql @@ -58,8 +58,8 @@ SELECT l1_distance('[0,0]'::vector, '[0,1]'); SELECT l1_distance('[1,2]'::vector, '[3]'); SELECT l1_distance('[3e38]'::vector, '[-3e38]'); -SELECT quantize_binary('[1,0,-1]'::vector); -SELECT quantize_binary('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::vector); +SELECT binary_quantize('[1,0,-1]'::vector); +SELECT binary_quantize('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::vector); SELECT subvector('[1,2,3,4,5]'::vector, 1, 3); SELECT subvector('[1,2,3,4,5]'::vector, 3, 2);