mirror of
https://github.com/pgvector/pgvector.git
synced 2026-06-06 05:51:21 +08:00
Improved support functions for HNSW - #527
This commit is contained in:
@@ -28,16 +28,13 @@ CREATE FUNCTION ivfflat_bit_support(internal) RETURNS internal
|
|||||||
CREATE FUNCTION ivfflat_halfvec_support(internal) RETURNS internal
|
CREATE FUNCTION ivfflat_halfvec_support(internal) RETURNS internal
|
||||||
AS 'MODULE_PATHNAME' LANGUAGE C;
|
AS 'MODULE_PATHNAME' LANGUAGE C;
|
||||||
|
|
||||||
CREATE FUNCTION hnsw_bit_max_dims(internal) RETURNS internal
|
CREATE FUNCTION hnsw_bit_support(internal) RETURNS internal
|
||||||
AS 'MODULE_PATHNAME' LANGUAGE C;
|
AS 'MODULE_PATHNAME' LANGUAGE C;
|
||||||
|
|
||||||
CREATE FUNCTION hnsw_halfvec_max_dims(internal) RETURNS internal
|
CREATE FUNCTION hnsw_halfvec_support(internal) RETURNS internal
|
||||||
AS 'MODULE_PATHNAME' LANGUAGE C;
|
AS 'MODULE_PATHNAME' LANGUAGE C;
|
||||||
|
|
||||||
CREATE FUNCTION hnsw_sparsevec_max_dims(internal) RETURNS internal
|
CREATE FUNCTION hnsw_sparsevec_support(internal) RETURNS internal
|
||||||
AS 'MODULE_PATHNAME' LANGUAGE C;
|
|
||||||
|
|
||||||
CREATE FUNCTION hnsw_sparsevec_check_value(internal) RETURNS internal
|
|
||||||
AS 'MODULE_PATHNAME' LANGUAGE C;
|
AS 'MODULE_PATHNAME' LANGUAGE C;
|
||||||
|
|
||||||
CREATE OPERATOR CLASS vector_l1_ops
|
CREATE OPERATOR CLASS vector_l1_ops
|
||||||
@@ -72,13 +69,13 @@ CREATE OPERATOR CLASS bit_hamming_ops
|
|||||||
FOR TYPE bit USING hnsw AS
|
FOR TYPE bit USING hnsw AS
|
||||||
OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops,
|
OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops,
|
||||||
FUNCTION 1 hamming_distance(bit, bit),
|
FUNCTION 1 hamming_distance(bit, bit),
|
||||||
FUNCTION 4 hnsw_bit_max_dims(internal);
|
FUNCTION 4 hnsw_bit_support(internal);
|
||||||
|
|
||||||
CREATE OPERATOR CLASS bit_jaccard_ops
|
CREATE OPERATOR CLASS bit_jaccard_ops
|
||||||
FOR TYPE bit USING hnsw AS
|
FOR TYPE bit USING hnsw AS
|
||||||
OPERATOR 1 <%> (bit, bit) FOR ORDER BY float_ops,
|
OPERATOR 1 <%> (bit, bit) FOR ORDER BY float_ops,
|
||||||
FUNCTION 1 jaccard_distance(bit, bit),
|
FUNCTION 1 jaccard_distance(bit, bit),
|
||||||
FUNCTION 4 hnsw_bit_max_dims(internal);
|
FUNCTION 4 hnsw_bit_support(internal);
|
||||||
|
|
||||||
CREATE TYPE halfvec;
|
CREATE TYPE halfvec;
|
||||||
|
|
||||||
@@ -358,13 +355,13 @@ CREATE OPERATOR CLASS halfvec_l2_ops
|
|||||||
FOR TYPE halfvec USING hnsw AS
|
FOR TYPE halfvec USING hnsw AS
|
||||||
OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops,
|
OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops,
|
||||||
FUNCTION 1 halfvec_l2_squared_distance(halfvec, halfvec),
|
FUNCTION 1 halfvec_l2_squared_distance(halfvec, halfvec),
|
||||||
FUNCTION 4 hnsw_halfvec_max_dims(internal);
|
FUNCTION 4 hnsw_halfvec_support(internal);
|
||||||
|
|
||||||
CREATE OPERATOR CLASS halfvec_ip_ops
|
CREATE OPERATOR CLASS halfvec_ip_ops
|
||||||
FOR TYPE halfvec USING hnsw AS
|
FOR TYPE halfvec USING hnsw AS
|
||||||
OPERATOR 1 <#> (halfvec, halfvec) FOR ORDER BY float_ops,
|
OPERATOR 1 <#> (halfvec, halfvec) FOR ORDER BY float_ops,
|
||||||
FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec),
|
FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec),
|
||||||
FUNCTION 4 hnsw_halfvec_max_dims(internal);
|
FUNCTION 4 hnsw_halfvec_support(internal);
|
||||||
|
|
||||||
CREATE OPERATOR CLASS halfvec_cosine_ops
|
CREATE OPERATOR CLASS halfvec_cosine_ops
|
||||||
FOR TYPE halfvec USING hnsw AS
|
FOR TYPE halfvec USING hnsw AS
|
||||||
@@ -372,13 +369,13 @@ CREATE OPERATOR CLASS halfvec_cosine_ops
|
|||||||
FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec),
|
FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec),
|
||||||
FUNCTION 2 l2_norm(halfvec),
|
FUNCTION 2 l2_norm(halfvec),
|
||||||
FUNCTION 3 l2_normalize(halfvec),
|
FUNCTION 3 l2_normalize(halfvec),
|
||||||
FUNCTION 4 hnsw_halfvec_max_dims(internal);
|
FUNCTION 4 hnsw_halfvec_support(internal);
|
||||||
|
|
||||||
CREATE OPERATOR CLASS halfvec_l1_ops
|
CREATE OPERATOR CLASS halfvec_l1_ops
|
||||||
FOR TYPE halfvec USING hnsw AS
|
FOR TYPE halfvec USING hnsw AS
|
||||||
OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops,
|
OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops,
|
||||||
FUNCTION 1 l1_distance(halfvec, halfvec),
|
FUNCTION 1 l1_distance(halfvec, halfvec),
|
||||||
FUNCTION 4 hnsw_halfvec_max_dims(internal);
|
FUNCTION 4 hnsw_halfvec_support(internal);
|
||||||
|
|
||||||
CREATE TYPE sparsevec;
|
CREATE TYPE sparsevec;
|
||||||
|
|
||||||
@@ -550,15 +547,13 @@ CREATE OPERATOR CLASS sparsevec_l2_ops
|
|||||||
FOR TYPE sparsevec USING hnsw AS
|
FOR TYPE sparsevec USING hnsw AS
|
||||||
OPERATOR 1 <-> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
OPERATOR 1 <-> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
||||||
FUNCTION 1 sparsevec_l2_squared_distance(sparsevec, sparsevec),
|
FUNCTION 1 sparsevec_l2_squared_distance(sparsevec, sparsevec),
|
||||||
FUNCTION 4 hnsw_sparsevec_max_dims(internal),
|
FUNCTION 4 hnsw_sparsevec_support(internal);
|
||||||
FUNCTION 5 hnsw_sparsevec_check_value(internal);
|
|
||||||
|
|
||||||
CREATE OPERATOR CLASS sparsevec_ip_ops
|
CREATE OPERATOR CLASS sparsevec_ip_ops
|
||||||
FOR TYPE sparsevec USING hnsw AS
|
FOR TYPE sparsevec USING hnsw AS
|
||||||
OPERATOR 1 <#> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
OPERATOR 1 <#> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
||||||
FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec),
|
FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec),
|
||||||
FUNCTION 4 hnsw_sparsevec_max_dims(internal),
|
FUNCTION 4 hnsw_sparsevec_support(internal);
|
||||||
FUNCTION 5 hnsw_sparsevec_check_value(internal);
|
|
||||||
|
|
||||||
CREATE OPERATOR CLASS sparsevec_cosine_ops
|
CREATE OPERATOR CLASS sparsevec_cosine_ops
|
||||||
FOR TYPE sparsevec USING hnsw AS
|
FOR TYPE sparsevec USING hnsw AS
|
||||||
@@ -566,12 +561,10 @@ CREATE OPERATOR CLASS sparsevec_cosine_ops
|
|||||||
FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec),
|
FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec),
|
||||||
FUNCTION 2 l2_norm(sparsevec),
|
FUNCTION 2 l2_norm(sparsevec),
|
||||||
FUNCTION 3 l2_normalize(sparsevec),
|
FUNCTION 3 l2_normalize(sparsevec),
|
||||||
FUNCTION 4 hnsw_sparsevec_max_dims(internal),
|
FUNCTION 4 hnsw_sparsevec_support(internal);
|
||||||
FUNCTION 5 hnsw_sparsevec_check_value(internal);
|
|
||||||
|
|
||||||
CREATE OPERATOR CLASS sparsevec_l1_ops
|
CREATE OPERATOR CLASS sparsevec_l1_ops
|
||||||
FOR TYPE sparsevec USING hnsw AS
|
FOR TYPE sparsevec USING hnsw AS
|
||||||
OPERATOR 1 <+> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
OPERATOR 1 <+> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
||||||
FUNCTION 1 l1_distance(sparsevec, sparsevec),
|
FUNCTION 1 l1_distance(sparsevec, sparsevec),
|
||||||
FUNCTION 4 hnsw_sparsevec_max_dims(internal),
|
FUNCTION 4 hnsw_sparsevec_support(internal);
|
||||||
FUNCTION 5 hnsw_sparsevec_check_value(internal);
|
|
||||||
|
|||||||
@@ -269,16 +269,13 @@ CREATE FUNCTION ivfflat_bit_support(internal) RETURNS internal
|
|||||||
CREATE FUNCTION ivfflat_halfvec_support(internal) RETURNS internal
|
CREATE FUNCTION ivfflat_halfvec_support(internal) RETURNS internal
|
||||||
AS 'MODULE_PATHNAME' LANGUAGE C;
|
AS 'MODULE_PATHNAME' LANGUAGE C;
|
||||||
|
|
||||||
CREATE FUNCTION hnsw_bit_max_dims(internal) RETURNS internal
|
CREATE FUNCTION hnsw_bit_support(internal) RETURNS internal
|
||||||
AS 'MODULE_PATHNAME' LANGUAGE C;
|
AS 'MODULE_PATHNAME' LANGUAGE C;
|
||||||
|
|
||||||
CREATE FUNCTION hnsw_halfvec_max_dims(internal) RETURNS internal
|
CREATE FUNCTION hnsw_halfvec_support(internal) RETURNS internal
|
||||||
AS 'MODULE_PATHNAME' LANGUAGE C;
|
AS 'MODULE_PATHNAME' LANGUAGE C;
|
||||||
|
|
||||||
CREATE FUNCTION hnsw_sparsevec_max_dims(internal) RETURNS internal
|
CREATE FUNCTION hnsw_sparsevec_support(internal) RETURNS internal
|
||||||
AS 'MODULE_PATHNAME' LANGUAGE C;
|
|
||||||
|
|
||||||
CREATE FUNCTION hnsw_sparsevec_check_value(internal) RETURNS internal
|
|
||||||
AS 'MODULE_PATHNAME' LANGUAGE C;
|
AS 'MODULE_PATHNAME' LANGUAGE C;
|
||||||
|
|
||||||
-- vector opclasses
|
-- vector opclasses
|
||||||
@@ -367,13 +364,13 @@ CREATE OPERATOR CLASS bit_hamming_ops
|
|||||||
FOR TYPE bit USING hnsw AS
|
FOR TYPE bit USING hnsw AS
|
||||||
OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops,
|
OPERATOR 1 <~> (bit, bit) FOR ORDER BY float_ops,
|
||||||
FUNCTION 1 hamming_distance(bit, bit),
|
FUNCTION 1 hamming_distance(bit, bit),
|
||||||
FUNCTION 4 hnsw_bit_max_dims(internal);
|
FUNCTION 4 hnsw_bit_support(internal);
|
||||||
|
|
||||||
CREATE OPERATOR CLASS bit_jaccard_ops
|
CREATE OPERATOR CLASS bit_jaccard_ops
|
||||||
FOR TYPE bit USING hnsw AS
|
FOR TYPE bit USING hnsw AS
|
||||||
OPERATOR 1 <%> (bit, bit) FOR ORDER BY float_ops,
|
OPERATOR 1 <%> (bit, bit) FOR ORDER BY float_ops,
|
||||||
FUNCTION 1 jaccard_distance(bit, bit),
|
FUNCTION 1 jaccard_distance(bit, bit),
|
||||||
FUNCTION 4 hnsw_bit_max_dims(internal);
|
FUNCTION 4 hnsw_bit_support(internal);
|
||||||
|
|
||||||
-- halfvec type
|
-- halfvec type
|
||||||
|
|
||||||
@@ -669,13 +666,13 @@ CREATE OPERATOR CLASS halfvec_l2_ops
|
|||||||
FOR TYPE halfvec USING hnsw AS
|
FOR TYPE halfvec USING hnsw AS
|
||||||
OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops,
|
OPERATOR 1 <-> (halfvec, halfvec) FOR ORDER BY float_ops,
|
||||||
FUNCTION 1 halfvec_l2_squared_distance(halfvec, halfvec),
|
FUNCTION 1 halfvec_l2_squared_distance(halfvec, halfvec),
|
||||||
FUNCTION 4 hnsw_halfvec_max_dims(internal);
|
FUNCTION 4 hnsw_halfvec_support(internal);
|
||||||
|
|
||||||
CREATE OPERATOR CLASS halfvec_ip_ops
|
CREATE OPERATOR CLASS halfvec_ip_ops
|
||||||
FOR TYPE halfvec USING hnsw AS
|
FOR TYPE halfvec USING hnsw AS
|
||||||
OPERATOR 1 <#> (halfvec, halfvec) FOR ORDER BY float_ops,
|
OPERATOR 1 <#> (halfvec, halfvec) FOR ORDER BY float_ops,
|
||||||
FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec),
|
FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec),
|
||||||
FUNCTION 4 hnsw_halfvec_max_dims(internal);
|
FUNCTION 4 hnsw_halfvec_support(internal);
|
||||||
|
|
||||||
CREATE OPERATOR CLASS halfvec_cosine_ops
|
CREATE OPERATOR CLASS halfvec_cosine_ops
|
||||||
FOR TYPE halfvec USING hnsw AS
|
FOR TYPE halfvec USING hnsw AS
|
||||||
@@ -683,13 +680,13 @@ CREATE OPERATOR CLASS halfvec_cosine_ops
|
|||||||
FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec),
|
FUNCTION 1 halfvec_negative_inner_product(halfvec, halfvec),
|
||||||
FUNCTION 2 l2_norm(halfvec),
|
FUNCTION 2 l2_norm(halfvec),
|
||||||
FUNCTION 3 l2_normalize(halfvec),
|
FUNCTION 3 l2_normalize(halfvec),
|
||||||
FUNCTION 4 hnsw_halfvec_max_dims(internal);
|
FUNCTION 4 hnsw_halfvec_support(internal);
|
||||||
|
|
||||||
CREATE OPERATOR CLASS halfvec_l1_ops
|
CREATE OPERATOR CLASS halfvec_l1_ops
|
||||||
FOR TYPE halfvec USING hnsw AS
|
FOR TYPE halfvec USING hnsw AS
|
||||||
OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops,
|
OPERATOR 1 <+> (halfvec, halfvec) FOR ORDER BY float_ops,
|
||||||
FUNCTION 1 l1_distance(halfvec, halfvec),
|
FUNCTION 1 l1_distance(halfvec, halfvec),
|
||||||
FUNCTION 4 hnsw_halfvec_max_dims(internal);
|
FUNCTION 4 hnsw_halfvec_support(internal);
|
||||||
|
|
||||||
--- sparsevec type
|
--- sparsevec type
|
||||||
|
|
||||||
@@ -875,15 +872,13 @@ CREATE OPERATOR CLASS sparsevec_l2_ops
|
|||||||
FOR TYPE sparsevec USING hnsw AS
|
FOR TYPE sparsevec USING hnsw AS
|
||||||
OPERATOR 1 <-> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
OPERATOR 1 <-> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
||||||
FUNCTION 1 sparsevec_l2_squared_distance(sparsevec, sparsevec),
|
FUNCTION 1 sparsevec_l2_squared_distance(sparsevec, sparsevec),
|
||||||
FUNCTION 4 hnsw_sparsevec_max_dims(internal),
|
FUNCTION 4 hnsw_sparsevec_support(internal);
|
||||||
FUNCTION 5 hnsw_sparsevec_check_value(internal);
|
|
||||||
|
|
||||||
CREATE OPERATOR CLASS sparsevec_ip_ops
|
CREATE OPERATOR CLASS sparsevec_ip_ops
|
||||||
FOR TYPE sparsevec USING hnsw AS
|
FOR TYPE sparsevec USING hnsw AS
|
||||||
OPERATOR 1 <#> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
OPERATOR 1 <#> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
||||||
FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec),
|
FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec),
|
||||||
FUNCTION 4 hnsw_sparsevec_max_dims(internal),
|
FUNCTION 4 hnsw_sparsevec_support(internal);
|
||||||
FUNCTION 5 hnsw_sparsevec_check_value(internal);
|
|
||||||
|
|
||||||
CREATE OPERATOR CLASS sparsevec_cosine_ops
|
CREATE OPERATOR CLASS sparsevec_cosine_ops
|
||||||
FOR TYPE sparsevec USING hnsw AS
|
FOR TYPE sparsevec USING hnsw AS
|
||||||
@@ -891,12 +886,10 @@ CREATE OPERATOR CLASS sparsevec_cosine_ops
|
|||||||
FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec),
|
FUNCTION 1 sparsevec_negative_inner_product(sparsevec, sparsevec),
|
||||||
FUNCTION 2 l2_norm(sparsevec),
|
FUNCTION 2 l2_norm(sparsevec),
|
||||||
FUNCTION 3 l2_normalize(sparsevec),
|
FUNCTION 3 l2_normalize(sparsevec),
|
||||||
FUNCTION 4 hnsw_sparsevec_max_dims(internal),
|
FUNCTION 4 hnsw_sparsevec_support(internal);
|
||||||
FUNCTION 5 hnsw_sparsevec_check_value(internal);
|
|
||||||
|
|
||||||
CREATE OPERATOR CLASS sparsevec_l1_ops
|
CREATE OPERATOR CLASS sparsevec_l1_ops
|
||||||
FOR TYPE sparsevec USING hnsw AS
|
FOR TYPE sparsevec USING hnsw AS
|
||||||
OPERATOR 1 <+> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
OPERATOR 1 <+> (sparsevec, sparsevec) FOR ORDER BY float_ops,
|
||||||
FUNCTION 1 l1_distance(sparsevec, sparsevec),
|
FUNCTION 1 l1_distance(sparsevec, sparsevec),
|
||||||
FUNCTION 4 hnsw_sparsevec_max_dims(internal),
|
FUNCTION 4 hnsw_sparsevec_support(internal);
|
||||||
FUNCTION 5 hnsw_sparsevec_check_value(internal);
|
|
||||||
|
|||||||
@@ -194,7 +194,7 @@ hnswhandler(PG_FUNCTION_ARGS)
|
|||||||
IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
|
IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
|
||||||
|
|
||||||
amroutine->amstrategies = 0;
|
amroutine->amstrategies = 0;
|
||||||
amroutine->amsupport = 5;
|
amroutine->amsupport = 4;
|
||||||
#if PG_VERSION_NUM >= 130000
|
#if PG_VERSION_NUM >= 130000
|
||||||
amroutine->amoptsprocnum = 0;
|
amroutine->amoptsprocnum = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
13
src/hnsw.h
13
src/hnsw.h
@@ -23,8 +23,7 @@
|
|||||||
#define HNSW_DISTANCE_PROC 1
|
#define HNSW_DISTANCE_PROC 1
|
||||||
#define HNSW_NORM_PROC 2
|
#define HNSW_NORM_PROC 2
|
||||||
#define HNSW_NORMALIZE_PROC 3
|
#define HNSW_NORMALIZE_PROC 3
|
||||||
#define HNSW_MAX_DIMS_PROC 4
|
#define HNSW_TYPE_INFO_PROC 4
|
||||||
#define HNSW_CHECK_VALUE_PROC 5
|
|
||||||
|
|
||||||
#define HNSW_VERSION 1
|
#define HNSW_VERSION 1
|
||||||
#define HNSW_MAGIC_NUMBER 0xA953A953
|
#define HNSW_MAGIC_NUMBER 0xA953A953
|
||||||
@@ -239,6 +238,12 @@ typedef struct HnswAllocator
|
|||||||
void *state;
|
void *state;
|
||||||
} HnswAllocator;
|
} HnswAllocator;
|
||||||
|
|
||||||
|
typedef struct HnswTypeInfo
|
||||||
|
{
|
||||||
|
int maxDimensions;
|
||||||
|
void (*checkValue) (Pointer v);
|
||||||
|
} HnswTypeInfo;
|
||||||
|
|
||||||
typedef struct HnswBuildState
|
typedef struct HnswBuildState
|
||||||
{
|
{
|
||||||
/* Info */
|
/* Info */
|
||||||
@@ -246,6 +251,7 @@ typedef struct HnswBuildState
|
|||||||
Relation index;
|
Relation index;
|
||||||
IndexInfo *indexInfo;
|
IndexInfo *indexInfo;
|
||||||
ForkNumber forkNum;
|
ForkNumber forkNum;
|
||||||
|
const HnswTypeInfo *typeInfo;
|
||||||
|
|
||||||
/* Settings */
|
/* Settings */
|
||||||
int dimensions;
|
int dimensions;
|
||||||
@@ -260,7 +266,6 @@ typedef struct HnswBuildState
|
|||||||
FmgrInfo *procinfo;
|
FmgrInfo *procinfo;
|
||||||
FmgrInfo *normprocinfo;
|
FmgrInfo *normprocinfo;
|
||||||
FmgrInfo *normalizeprocinfo;
|
FmgrInfo *normalizeprocinfo;
|
||||||
FmgrInfo *checkvalueprocinfo;
|
|
||||||
Oid collation;
|
Oid collation;
|
||||||
|
|
||||||
/* Variables */
|
/* Variables */
|
||||||
@@ -375,7 +380,6 @@ int HnswGetEfConstruction(Relation index);
|
|||||||
FmgrInfo *HnswOptionalProcInfo(Relation index, uint16 procnum);
|
FmgrInfo *HnswOptionalProcInfo(Relation index, uint16 procnum);
|
||||||
Datum HnswNormValue(FmgrInfo *procinfo, Oid collation, Datum value);
|
Datum HnswNormValue(FmgrInfo *procinfo, Oid collation, Datum value);
|
||||||
bool HnswCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value);
|
bool HnswCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value);
|
||||||
void HnswCheckValue(FmgrInfo *procinfo, Oid collation, Datum value);
|
|
||||||
Buffer HnswNewBuffer(Relation index, ForkNumber forkNum);
|
Buffer HnswNewBuffer(Relation index, ForkNumber forkNum);
|
||||||
void HnswInitPage(Buffer buf, Page page);
|
void HnswInitPage(Buffer buf, Page page);
|
||||||
void HnswInit(void);
|
void HnswInit(void);
|
||||||
@@ -399,6 +403,7 @@ void HnswSetElementTuple(char *base, HnswElementTuple etup, HnswElement element
|
|||||||
void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm, int lc, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation);
|
void HnswUpdateConnection(char *base, HnswElement element, HnswCandidate * hc, int lm, int lc, int *updateIdx, Relation index, FmgrInfo *procinfo, Oid collation);
|
||||||
void HnswLoadNeighbors(HnswElement element, Relation index, int m);
|
void HnswLoadNeighbors(HnswElement element, Relation index, int m);
|
||||||
void HnswInitLockTranche(void);
|
void HnswInitLockTranche(void);
|
||||||
|
const HnswTypeInfo *HnswGetTypeInfo(Relation index);
|
||||||
PGDLLEXPORT void HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc);
|
PGDLLEXPORT void HnswParallelBuildMain(dsm_segment *seg, shm_toc *toc);
|
||||||
|
|
||||||
/* Index access methods */
|
/* Index access methods */
|
||||||
|
|||||||
@@ -488,8 +488,8 @@ InsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heaptid, Hn
|
|||||||
Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
|
Datum value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
|
||||||
|
|
||||||
/* Check value */
|
/* Check value */
|
||||||
if (buildstate->checkvalueprocinfo != NULL)
|
if (buildstate->typeInfo->checkValue != NULL)
|
||||||
HnswCheckValue(buildstate->checkvalueprocinfo, buildstate->collation, value);
|
buildstate->typeInfo->checkValue(DatumGetPointer(value));
|
||||||
|
|
||||||
/* Normalize if needed */
|
/* Normalize if needed */
|
||||||
if (buildstate->normprocinfo != NULL)
|
if (buildstate->normprocinfo != NULL)
|
||||||
@@ -672,32 +672,17 @@ HnswSharedMemoryAlloc(Size size, void *state)
|
|||||||
return chunk;
|
return chunk;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Get max dimensions
|
|
||||||
*/
|
|
||||||
static int
|
|
||||||
GetMaxDimensions(Relation index)
|
|
||||||
{
|
|
||||||
FmgrInfo *procinfo = HnswOptionalProcInfo(index, HNSW_MAX_DIMS_PROC);
|
|
||||||
|
|
||||||
if (procinfo == NULL)
|
|
||||||
return HNSW_MAX_DIM;
|
|
||||||
|
|
||||||
return DatumGetInt32(FunctionCall1(procinfo, PointerGetDatum(NULL)));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Initialize the build state
|
* Initialize the build state
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, IndexInfo *indexInfo, ForkNumber forkNum)
|
InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, IndexInfo *indexInfo, ForkNumber forkNum)
|
||||||
{
|
{
|
||||||
int maxDimensions;
|
|
||||||
|
|
||||||
buildstate->heap = heap;
|
buildstate->heap = heap;
|
||||||
buildstate->index = index;
|
buildstate->index = index;
|
||||||
buildstate->indexInfo = indexInfo;
|
buildstate->indexInfo = indexInfo;
|
||||||
buildstate->forkNum = forkNum;
|
buildstate->forkNum = forkNum;
|
||||||
|
buildstate->typeInfo = HnswGetTypeInfo(index);
|
||||||
|
|
||||||
buildstate->m = HnswGetM(index);
|
buildstate->m = HnswGetM(index);
|
||||||
buildstate->efConstruction = HnswGetEfConstruction(index);
|
buildstate->efConstruction = HnswGetEfConstruction(index);
|
||||||
@@ -707,14 +692,12 @@ InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, Index
|
|||||||
if (TupleDescAttr(index->rd_att, 0)->atttypid == VARBITOID)
|
if (TupleDescAttr(index->rd_att, 0)->atttypid == VARBITOID)
|
||||||
elog(ERROR, "type not supported for hnsw index");
|
elog(ERROR, "type not supported for hnsw index");
|
||||||
|
|
||||||
maxDimensions = GetMaxDimensions(index);
|
|
||||||
|
|
||||||
/* Require column to have dimensions to be indexed */
|
/* Require column to have dimensions to be indexed */
|
||||||
if (buildstate->dimensions < 0)
|
if (buildstate->dimensions < 0)
|
||||||
elog(ERROR, "column does not have dimensions");
|
elog(ERROR, "column does not have dimensions");
|
||||||
|
|
||||||
if (buildstate->dimensions > maxDimensions)
|
if (buildstate->dimensions > buildstate->typeInfo->maxDimensions)
|
||||||
elog(ERROR, "column cannot have more than %d dimensions for hnsw index", maxDimensions);
|
elog(ERROR, "column cannot have more than %d dimensions for hnsw index", buildstate->typeInfo->maxDimensions);
|
||||||
|
|
||||||
if (buildstate->efConstruction < 2 * buildstate->m)
|
if (buildstate->efConstruction < 2 * buildstate->m)
|
||||||
elog(ERROR, "ef_construction must be greater than or equal to 2 * m");
|
elog(ERROR, "ef_construction must be greater than or equal to 2 * m");
|
||||||
@@ -726,7 +709,6 @@ InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, Index
|
|||||||
buildstate->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC);
|
buildstate->procinfo = index_getprocinfo(index, 1, HNSW_DISTANCE_PROC);
|
||||||
buildstate->normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC);
|
buildstate->normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC);
|
||||||
buildstate->normalizeprocinfo = HnswOptionalProcInfo(index, HNSW_NORMALIZE_PROC);
|
buildstate->normalizeprocinfo = HnswOptionalProcInfo(index, HNSW_NORMALIZE_PROC);
|
||||||
buildstate->checkvalueprocinfo = HnswOptionalProcInfo(index, HNSW_CHECK_VALUE_PROC);
|
|
||||||
buildstate->collation = index->rd_indcollation[0];
|
buildstate->collation = index->rd_indcollation[0];
|
||||||
|
|
||||||
InitGraph(&buildstate->graphData, NULL, maintenance_work_mem * 1024L);
|
InitGraph(&buildstate->graphData, NULL, maintenance_work_mem * 1024L);
|
||||||
|
|||||||
@@ -612,7 +612,7 @@ static void
|
|||||||
HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid)
|
HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_tid)
|
||||||
{
|
{
|
||||||
Datum value;
|
Datum value;
|
||||||
FmgrInfo *checkvalueprocinfo = HnswOptionalProcInfo(index, HNSW_CHECK_VALUE_PROC);
|
const HnswTypeInfo *typeInfo = HnswGetTypeInfo(index);
|
||||||
FmgrInfo *normprocinfo;
|
FmgrInfo *normprocinfo;
|
||||||
Oid collation = index->rd_indcollation[0];
|
Oid collation = index->rd_indcollation[0];
|
||||||
|
|
||||||
@@ -620,8 +620,8 @@ HnswInsertTuple(Relation index, Datum *values, bool *isnull, ItemPointer heap_ti
|
|||||||
value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
|
value = PointerGetDatum(PG_DETOAST_DATUM(values[0]));
|
||||||
|
|
||||||
/* Check value */
|
/* Check value */
|
||||||
if (checkvalueprocinfo != NULL)
|
if (typeInfo->checkValue != NULL)
|
||||||
HnswCheckValue(checkvalueprocinfo, collation, value);
|
typeInfo->checkValue(DatumGetPointer(value));
|
||||||
|
|
||||||
/* Normalize if needed */
|
/* Normalize if needed */
|
||||||
normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC);
|
normprocinfo = HnswOptionalProcInfo(index, HNSW_NORM_PROC);
|
||||||
|
|||||||
@@ -173,15 +173,6 @@ HnswCheckNorm(FmgrInfo *procinfo, Oid collation, Datum value)
|
|||||||
return DatumGetFloat8(FunctionCall1Coll(procinfo, collation, value)) > 0;
|
return DatumGetFloat8(FunctionCall1Coll(procinfo, collation, value)) > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Check if a value can be indexed
|
|
||||||
*/
|
|
||||||
void
|
|
||||||
HnswCheckValue(FmgrInfo *procinfo, Oid collation, Datum value)
|
|
||||||
{
|
|
||||||
FunctionCall1Coll(procinfo, collation, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* New buffer
|
* New buffer
|
||||||
*/
|
*/
|
||||||
@@ -1276,35 +1267,68 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(hnsw_halfvec_max_dims);
|
static void
|
||||||
Datum
|
SparsevecCheckValue(Pointer v)
|
||||||
hnsw_halfvec_max_dims(PG_FUNCTION_ARGS)
|
|
||||||
{
|
{
|
||||||
PG_RETURN_INT32(HNSW_MAX_DIM * 2);
|
SparseVector *vec = (SparseVector *) v;
|
||||||
};
|
|
||||||
|
|
||||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(hnsw_bit_max_dims);
|
|
||||||
Datum
|
|
||||||
hnsw_bit_max_dims(PG_FUNCTION_ARGS)
|
|
||||||
{
|
|
||||||
PG_RETURN_INT32(HNSW_MAX_DIM * 32);
|
|
||||||
};
|
|
||||||
|
|
||||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(hnsw_sparsevec_max_dims);
|
|
||||||
Datum
|
|
||||||
hnsw_sparsevec_max_dims(PG_FUNCTION_ARGS)
|
|
||||||
{
|
|
||||||
PG_RETURN_INT32(SPARSEVEC_MAX_DIM);
|
|
||||||
};
|
|
||||||
|
|
||||||
PGDLLEXPORT PG_FUNCTION_INFO_V1(hnsw_sparsevec_check_value);
|
|
||||||
Datum
|
|
||||||
hnsw_sparsevec_check_value(PG_FUNCTION_ARGS)
|
|
||||||
{
|
|
||||||
SparseVector *vec = PG_GETARG_SPARSEVEC_P(0);
|
|
||||||
|
|
||||||
if (vec->nnz > HNSW_MAX_NNZ)
|
if (vec->nnz > HNSW_MAX_NNZ)
|
||||||
elog(ERROR, "sparsevec cannot have more than %d non-zero elements for hnsw index", HNSW_MAX_NNZ);
|
elog(ERROR, "sparsevec cannot have more than %d non-zero elements for hnsw index", HNSW_MAX_NNZ);
|
||||||
|
|
||||||
PG_RETURN_VOID();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Get type info
|
||||||
|
*/
|
||||||
|
const HnswTypeInfo *
|
||||||
|
HnswGetTypeInfo(Relation index)
|
||||||
|
{
|
||||||
|
FmgrInfo *procinfo = HnswOptionalProcInfo(index, HNSW_TYPE_INFO_PROC);
|
||||||
|
|
||||||
|
if (procinfo == NULL)
|
||||||
|
{
|
||||||
|
static const HnswTypeInfo typeInfo = {
|
||||||
|
.maxDimensions = HNSW_MAX_DIM,
|
||||||
|
.checkValue = NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
return (&typeInfo);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
return (const HnswTypeInfo *) DatumGetPointer(FunctionCall0Coll(procinfo, InvalidOid));
|
||||||
|
}
|
||||||
|
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(hnsw_halfvec_support);
|
||||||
|
Datum
|
||||||
|
hnsw_halfvec_support(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
static const HnswTypeInfo typeInfo = {
|
||||||
|
.maxDimensions = HNSW_MAX_DIM * 2,
|
||||||
|
.checkValue = NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(&typeInfo);
|
||||||
|
};
|
||||||
|
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(hnsw_bit_support);
|
||||||
|
Datum
|
||||||
|
hnsw_bit_support(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
static const HnswTypeInfo typeInfo = {
|
||||||
|
.maxDimensions = HNSW_MAX_DIM * 32,
|
||||||
|
.checkValue = NULL
|
||||||
|
};
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(&typeInfo);
|
||||||
|
};
|
||||||
|
|
||||||
|
PGDLLEXPORT PG_FUNCTION_INFO_V1(hnsw_sparsevec_support);
|
||||||
|
Datum
|
||||||
|
hnsw_sparsevec_support(PG_FUNCTION_ARGS)
|
||||||
|
{
|
||||||
|
static const HnswTypeInfo typeInfo = {
|
||||||
|
.maxDimensions = SPARSEVEC_MAX_DIM,
|
||||||
|
.checkValue = SparsevecCheckValue
|
||||||
|
};
|
||||||
|
|
||||||
|
PG_RETURN_POINTER(&typeInfo);
|
||||||
|
};
|
||||||
|
|||||||
Reference in New Issue
Block a user