From 69c3e719f73d0e00e7eafd28da5c7f2ad48ec4b8 Mon Sep 17 00:00:00 2001 From: Andrew Kane Date: Wed, 24 Apr 2024 15:27:10 -0700 Subject: [PATCH] Added support functions for max dimensions for ivfflat --- sql/vector--0.6.2--0.7.0.sql | 18 ++++++++++++++---- sql/vector.sql | 18 ++++++++++++++---- src/ivfbuild.c | 18 ++++++++++-------- src/ivfflat.c | 2 +- src/ivfflat.h | 3 ++- src/ivfutils.c | 14 ++++++++++++++ 6 files changed, 55 insertions(+), 18 deletions(-) diff --git a/sql/vector--0.6.2--0.7.0.sql b/sql/vector--0.6.2--0.7.0.sql index 9d6bf8a..e054031 100644 --- a/sql/vector--0.6.2--0.7.0.sql +++ b/sql/vector--0.6.2--0.7.0.sql @@ -22,6 +22,12 @@ CREATE OPERATOR || ( LEFTARG = vector, RIGHTARG = vector, PROCEDURE = vector_concat ); +CREATE FUNCTION ivfflat_bit_max_dims(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION ivfflat_halfvec_max_dims(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + CREATE FUNCTION ivfflat_bit_support(internal) RETURNS internal AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; @@ -66,7 +72,8 @@ CREATE OPERATOR CLASS bit_hamming_ops OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops, FUNCTION 1 hamming_distance(bit, bit), FUNCTION 3 hamming_distance(bit, bit), - FUNCTION 6 ivfflat_bit_support(internal); + FUNCTION 6 ivfflat_bit_max_dims(internal), + FUNCTION 7 ivfflat_bit_support(internal); CREATE OPERATOR CLASS bit_hamming_ops FOR TYPE bit USING hnsw AS @@ -333,7 +340,8 @@ CREATE OPERATOR CLASS halfvec_l2_ops OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops, FUNCTION 1 halfvec_l2_squared_distance(halfvec, halfvec), FUNCTION 3 l2_distance(halfvec, halfvec), - FUNCTION 6 ivfflat_halfvec_support(internal); + FUNCTION 6 ivfflat_halfvec_max_dims(internal), + FUNCTION 7 ivfflat_halfvec_support(internal); CREATE OPERATOR CLASS halfvec_ip_ops FOR TYPE halfvec USING ivfflat AS @@ -342,7 +350,8 @@ CREATE OPERATOR CLASS halfvec_ip_ops FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec), FUNCTION 4 l2_norm(halfvec), FUNCTION 5 l2_normalize(halfvec), - FUNCTION 6 ivfflat_halfvec_support(internal); + FUNCTION 6 ivfflat_halfvec_max_dims(internal), + FUNCTION 7 ivfflat_halfvec_support(internal); CREATE OPERATOR CLASS halfvec_cosine_ops FOR TYPE halfvec USING ivfflat AS @@ -352,7 +361,8 @@ CREATE OPERATOR CLASS halfvec_cosine_ops FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec), FUNCTION 4 l2_norm(halfvec), FUNCTION 5 l2_normalize(halfvec), - FUNCTION 6 ivfflat_halfvec_support(internal); + FUNCTION 6 ivfflat_halfvec_max_dims(internal), + FUNCTION 7 ivfflat_halfvec_support(internal); CREATE OPERATOR CLASS halfvec_l2_ops FOR TYPE halfvec USING hnsw AS diff --git a/sql/vector.sql b/sql/vector.sql index d1dcefe..c168c84 100644 --- a/sql/vector.sql +++ b/sql/vector.sql @@ -263,6 +263,12 @@ COMMENT ON ACCESS METHOD hnsw IS 'hnsw index access method'; -- access method private functions +CREATE FUNCTION ivfflat_bit_max_dims(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + +CREATE FUNCTION ivfflat_halfvec_max_dims(internal) RETURNS internal + AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; + CREATE FUNCTION ivfflat_bit_support(internal) RETURNS internal AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE; @@ -361,7 +367,8 @@ CREATE OPERATOR CLASS bit_hamming_ops OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops, FUNCTION 1 hamming_distance(bit, bit), FUNCTION 3 hamming_distance(bit, bit), - FUNCTION 6 ivfflat_bit_support(internal); + FUNCTION 6 ivfflat_bit_max_dims(internal), + FUNCTION 7 ivfflat_bit_support(internal); CREATE OPERATOR CLASS bit_hamming_ops FOR TYPE bit USING hnsw AS @@ -644,7 +651,8 @@ CREATE OPERATOR CLASS halfvec_l2_ops OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops, FUNCTION 1 halfvec_l2_squared_distance(halfvec, halfvec), FUNCTION 3 l2_distance(halfvec, halfvec), - FUNCTION 6 ivfflat_halfvec_support(internal); + FUNCTION 6 ivfflat_halfvec_max_dims(internal), + FUNCTION 7 ivfflat_halfvec_support(internal); CREATE OPERATOR CLASS halfvec_ip_ops FOR TYPE halfvec USING ivfflat AS @@ -653,7 +661,8 @@ CREATE OPERATOR CLASS halfvec_ip_ops FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec), FUNCTION 4 l2_norm(halfvec), FUNCTION 5 l2_normalize(halfvec), - FUNCTION 6 ivfflat_halfvec_support(internal); + FUNCTION 6 ivfflat_halfvec_max_dims(internal), + FUNCTION 7 ivfflat_halfvec_support(internal); CREATE OPERATOR CLASS halfvec_cosine_ops FOR TYPE halfvec USING ivfflat AS @@ -663,7 +672,8 @@ CREATE OPERATOR CLASS halfvec_cosine_ops FUNCTION 3 halfvec_spherical_distance(halfvec, halfvec), FUNCTION 4 l2_norm(halfvec), FUNCTION 5 l2_normalize(halfvec), - FUNCTION 6 ivfflat_halfvec_support(internal); + FUNCTION 6 ivfflat_halfvec_max_dims(internal), + FUNCTION 7 ivfflat_halfvec_support(internal); CREATE OPERATOR CLASS halfvec_l2_ops FOR TYPE halfvec USING hnsw AS diff --git a/src/ivfbuild.c b/src/ivfbuild.c index edc435a..ac85949 100644 --- a/src/ivfbuild.c +++ b/src/ivfbuild.c @@ -323,16 +323,14 @@ InsertTuples(Relation index, IvfflatBuildState * buildstate, ForkNumber forkNum) * Get max dimensions */ static int -GetMaxDimensions(IvfflatType type) +GetMaxDimensions(Relation index) { - int maxDimensions = IVFFLAT_MAX_DIM; + FmgrInfo *procinfo = IvfflatOptionalProcInfo(index, IVFFLAT_MAX_DIMS_PROC); - if (type == IVFFLAT_TYPE_HALFVEC) - maxDimensions *= 2; - else if (type == IVFFLAT_TYPE_BIT) - maxDimensions *= 32; + if (procinfo == NULL) + return IVFFLAT_MAX_DIM; - return maxDimensions; + return DatumGetInt32(FunctionCall1(procinfo, PointerGetDatum(NULL))); } /* @@ -367,7 +365,11 @@ InitBuildState(IvfflatBuildState * buildstate, Relation heap, Relation index, In buildstate->lists = IvfflatGetLists(index); buildstate->dimensions = TupleDescAttr(index->rd_att, 0)->atttypmod; - maxDimensions = GetMaxDimensions(buildstate->type); + /* Disallow varbit since require fixed dimensions */ + if (TupleDescAttr(index->rd_att, 0)->atttypid == VARBITOID) + elog(ERROR, "type not supported for ivfflat index"); + + maxDimensions = GetMaxDimensions(index); /* Require column to have dimensions to be indexed */ if (buildstate->dimensions < 0) diff --git a/src/ivfflat.c b/src/ivfflat.c index 6bb2422..4ff77fa 100644 --- a/src/ivfflat.c +++ b/src/ivfflat.c @@ -188,7 +188,7 @@ ivfflathandler(PG_FUNCTION_ARGS) IndexAmRoutine *amroutine = makeNode(IndexAmRoutine); amroutine->amstrategies = 0; - amroutine->amsupport = 6; + amroutine->amsupport = 7; #if PG_VERSION_NUM >= 130000 amroutine->amoptsprocnum = 0; #endif diff --git a/src/ivfflat.h b/src/ivfflat.h index bdce324..050ca3d 100644 --- a/src/ivfflat.h +++ b/src/ivfflat.h @@ -29,7 +29,8 @@ #define IVFFLAT_KMEANS_DISTANCE_PROC 3 #define IVFFLAT_KMEANS_NORM_PROC 4 #define IVFFLAT_NORMALIZE_PROC 5 -#define IVFFLAT_TYPE_SUPPORT_PROC 6 +#define IVFFLAT_MAX_DIMS_PROC 6 +#define IVFFLAT_TYPE_SUPPORT_PROC 7 #define IVFFLAT_VERSION 1 #define IVFFLAT_MAGIC_NUMBER 0x14FF1A7 diff --git a/src/ivfutils.c b/src/ivfutils.c index 6f40730..0c9d2cf 100644 --- a/src/ivfutils.c +++ b/src/ivfutils.c @@ -246,6 +246,20 @@ IvfflatUpdateList(Relation index, ListInfo listInfo, } } +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflat_halfvec_max_dims); +Datum +ivfflat_halfvec_max_dims(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT32(IVFFLAT_MAX_DIM * 2); +}; + +PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflat_bit_max_dims); +Datum +ivfflat_bit_max_dims(PG_FUNCTION_ARGS) +{ + PG_RETURN_INT32(IVFFLAT_MAX_DIM * 32); +}; + PGDLLEXPORT PG_FUNCTION_INFO_V1(ivfflat_halfvec_support); Datum ivfflat_halfvec_support(PG_FUNCTION_ARGS)