mirror of
https://github.com/pgvector/pgvector.git
synced 2026-06-06 14:01:31 +08:00
Added minivec type
This commit is contained in:
4
Makefile
4
Makefile
@@ -4,8 +4,8 @@ EXTVERSION = 0.7.4
|
||||
MODULE_big = vector
|
||||
DATA = $(wildcard sql/*--*--*.sql)
|
||||
DATA_built = sql/$(EXTENSION)--$(EXTVERSION).sql
|
||||
OBJS = src/bitutils.o src/bitvec.o src/halfutils.o src/halfvec.o src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/sparsevec.o src/vector.o
|
||||
HEADERS = src/halfvec.h src/sparsevec.h src/vector.h
|
||||
OBJS = src/bitutils.o src/bitvec.o src/halfutils.o src/halfvec.o src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/minivec.o src/ivfvacuum.o src/sparsevec.o src/vector.o
|
||||
HEADERS = src/halfvec.h src/minivec.h src/sparsevec.h src/vector.h
|
||||
|
||||
TESTS = $(wildcard test/sql/*.sql)
|
||||
REGRESS = $(patsubst test/sql/%.sql,%,$(TESTS))
|
||||
|
||||
@@ -2,10 +2,10 @@ EXTENSION = vector
|
||||
EXTVERSION = 0.7.4
|
||||
|
||||
DATA_built = sql\$(EXTENSION)--$(EXTVERSION).sql
|
||||
OBJS = src\bitutils.obj src\bitvec.obj src\halfutils.obj src\halfvec.obj src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\sparsevec.obj src\vector.obj
|
||||
HEADERS = src\halfvec.h src\sparsevec.h src\vector.h
|
||||
OBJS = src\bitutils.obj src\bitvec.obj src\halfutils.obj src\halfvec.obj src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\minivec.obj src\sparsevec.obj src\vector.obj
|
||||
HEADERS = src\halfvec.h src\minivec.h src\sparsevec.h src\vector.h
|
||||
|
||||
REGRESS = bit btree cast copy halfvec hnsw_bit hnsw_halfvec hnsw_sparsevec hnsw_vector ivfflat_bit ivfflat_halfvec ivfflat_vector sparsevec vector_type
|
||||
REGRESS = bit btree cast copy halfvec hnsw_bit hnsw_halfvec hnsw_sparsevec hnsw_vector ivfflat_bit ivfflat_halfvec ivfflat_vector minivec sparsevec vector_type
|
||||
REGRESS_OPTS = --inputdir=test --load-extension=$(EXTENSION)
|
||||
|
||||
# For /arch flags
|
||||
|
||||
31
README.md
31
README.md
@@ -934,6 +934,37 @@ Function | Description | Added
|
||||
avg(halfvec) → halfvec | average | 0.7.0
|
||||
sum(halfvec) → halfvec | sum | 0.7.0
|
||||
|
||||
### Minivec Type
|
||||
|
||||
Each mini vector takes `dimensions + 8` bytes of storage. Each element is a E4M3 8-bit floating-point number, and all elements must be finite (no `NaN`). Mini vectors can have up to 16,000 dimensions.
|
||||
|
||||
### Minivec Operators
|
||||
|
||||
Operator | Description | Added
|
||||
--- | --- | ---
|
||||
\+ | element-wise addition | 0.8.0
|
||||
\- | element-wise subtraction | 0.8.0
|
||||
\* | element-wise multiplication | 0.8.0
|
||||
\|\| | concatenate | 0.8.0
|
||||
<-> | Euclidean distance | 0.8.0
|
||||
<#> | negative inner product | 0.8.0
|
||||
<=> | cosine distance | 0.8.0
|
||||
<+> | taxicab distance | 0.8.0
|
||||
|
||||
### Minivec Functions
|
||||
|
||||
Function | Description | Added
|
||||
--- | --- | ---
|
||||
binary_quantize(minivec) → bit | binary quantize | 0.8.0
|
||||
cosine_distance(minivec, minivec) → double precision | cosine distance | 0.8.0
|
||||
inner_product(minivec, minivec) → double precision | inner product | 0.8.0
|
||||
l1_distance(minivec, minivec) → double precision | taxicab distance | 0.8.0
|
||||
l2_distance(minivec, minivec) → double precision | Euclidean distance | 0.8.0
|
||||
l2_norm(minivec) → double precision | Euclidean norm | 0.8.0
|
||||
l2_normalize(minivec) → minivec | Normalize with Euclidean norm | 0.8.0
|
||||
subvector(minivec, integer, integer) → minivec | subvector | 0.8.0
|
||||
vector_dims(minivec) → integer | number of dimensions | 0.8.0
|
||||
|
||||
### Bit Type
|
||||
|
||||
Each bit vector takes `dimensions / 8 + 8` bytes of storage. See the [Postgres docs](https://www.postgresql.org/docs/current/datatype-bit.html) for more info.
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
|
||||
\echo Use "ALTER EXTENSION vector UPDATE TO '0.8.0'" to load this file. \quit
|
||||
|
||||
-- TODO minivec functions
|
||||
|
||||
CREATE FUNCTION array_to_sparsevec(integer[], integer, boolean) RETURNS sparsevec
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
|
||||
265
sql/vector.sql
265
sql/vector.sql
@@ -272,6 +272,9 @@ CREATE FUNCTION ivfflat_bit_support(internal) RETURNS internal
|
||||
CREATE FUNCTION hnsw_halfvec_support(internal) RETURNS internal
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C;
|
||||
|
||||
CREATE FUNCTION hnsw_minivec_support(internal) RETURNS internal
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C;
|
||||
|
||||
CREATE FUNCTION hnsw_bit_support(internal) RETURNS internal
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C;
|
||||
|
||||
@@ -647,6 +650,268 @@ CREATE OPERATOR CLASS halfvec_l1_ops
|
||||
FUNCTION 1 l1_distance(halfvec, halfvec),
|
||||
FUNCTION 3 hnsw_halfvec_support(internal);
|
||||
|
||||
-- minivec type
|
||||
|
||||
CREATE TYPE minivec;
|
||||
|
||||
CREATE FUNCTION minivec_in(cstring, oid, integer) RETURNS minivec
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_out(minivec) RETURNS cstring
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_typmod_in(cstring[]) RETURNS integer
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_recv(internal, oid, integer) RETURNS minivec
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_send(minivec) RETURNS bytea
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE TYPE minivec (
|
||||
INPUT = minivec_in,
|
||||
OUTPUT = minivec_out,
|
||||
TYPMOD_IN = minivec_typmod_in,
|
||||
RECEIVE = minivec_recv,
|
||||
SEND = minivec_send,
|
||||
STORAGE = external
|
||||
);
|
||||
|
||||
-- minivec functions
|
||||
|
||||
CREATE FUNCTION l2_distance(minivec, minivec) RETURNS float8
|
||||
AS 'MODULE_PATHNAME', 'minivec_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION inner_product(minivec, minivec) RETURNS float8
|
||||
AS 'MODULE_PATHNAME', 'minivec_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION cosine_distance(minivec, minivec) RETURNS float8
|
||||
AS 'MODULE_PATHNAME', 'minivec_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION l1_distance(minivec, minivec) RETURNS float8
|
||||
AS 'MODULE_PATHNAME', 'minivec_l1_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION vector_dims(minivec) RETURNS integer
|
||||
AS 'MODULE_PATHNAME', 'minivec_vector_dims' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION l2_norm(minivec) RETURNS float8
|
||||
AS 'MODULE_PATHNAME', 'minivec_l2_norm' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION l2_normalize(minivec) RETURNS minivec
|
||||
AS 'MODULE_PATHNAME', 'minivec_l2_normalize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION binary_quantize(minivec) RETURNS bit
|
||||
AS 'MODULE_PATHNAME', 'minivec_binary_quantize' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION subvector(minivec, int, int) RETURNS minivec
|
||||
AS 'MODULE_PATHNAME', 'minivec_subvector' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
-- minivec private functions
|
||||
|
||||
CREATE FUNCTION minivec_add(minivec, minivec) RETURNS minivec
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_sub(minivec, minivec) RETURNS minivec
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_mul(minivec, minivec) RETURNS minivec
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_concat(minivec, minivec) RETURNS minivec
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_lt(minivec, minivec) RETURNS bool
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_le(minivec, minivec) RETURNS bool
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_eq(minivec, minivec) RETURNS bool
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_ne(minivec, minivec) RETURNS bool
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_ge(minivec, minivec) RETURNS bool
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_gt(minivec, minivec) RETURNS bool
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_cmp(minivec, minivec) RETURNS int4
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_l2_squared_distance(minivec, minivec) RETURNS float8
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_negative_inner_product(minivec, minivec) RETURNS float8
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
-- minivec cast functions
|
||||
|
||||
CREATE FUNCTION minivec(minivec, integer, boolean) RETURNS minivec
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_to_vector(minivec, integer, boolean) RETURNS vector
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION vector_to_minivec(vector, integer, boolean) RETURNS minivec
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION array_to_minivec(integer[], integer, boolean) RETURNS minivec
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION array_to_minivec(real[], integer, boolean) RETURNS minivec
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION array_to_minivec(double precision[], integer, boolean) RETURNS minivec
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION array_to_minivec(numeric[], integer, boolean) RETURNS minivec
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
CREATE FUNCTION minivec_to_float4(minivec, integer, boolean) RETURNS real[]
|
||||
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
|
||||
|
||||
-- minivec casts
|
||||
|
||||
CREATE CAST (minivec AS minivec)
|
||||
WITH FUNCTION minivec(minivec, integer, boolean) AS IMPLICIT;
|
||||
|
||||
CREATE CAST (minivec AS vector)
|
||||
WITH FUNCTION minivec_to_vector(minivec, integer, boolean) AS ASSIGNMENT;
|
||||
|
||||
CREATE CAST (vector AS minivec)
|
||||
WITH FUNCTION vector_to_minivec(vector, integer, boolean) AS IMPLICIT;
|
||||
|
||||
CREATE CAST (minivec AS real[])
|
||||
WITH FUNCTION minivec_to_float4(minivec, integer, boolean) AS ASSIGNMENT;
|
||||
|
||||
CREATE CAST (integer[] AS minivec)
|
||||
WITH FUNCTION array_to_minivec(integer[], integer, boolean) AS ASSIGNMENT;
|
||||
|
||||
CREATE CAST (real[] AS minivec)
|
||||
WITH FUNCTION array_to_minivec(real[], integer, boolean) AS ASSIGNMENT;
|
||||
|
||||
CREATE CAST (double precision[] AS minivec)
|
||||
WITH FUNCTION array_to_minivec(double precision[], integer, boolean) AS ASSIGNMENT;
|
||||
|
||||
CREATE CAST (numeric[] AS minivec)
|
||||
WITH FUNCTION array_to_minivec(numeric[], integer, boolean) AS ASSIGNMENT;
|
||||
|
||||
-- minivec operators
|
||||
|
||||
CREATE OPERATOR <-> (
|
||||
LEFTARG = minivec, RIGHTARG = minivec, PROCEDURE = l2_distance,
|
||||
COMMUTATOR = '<->'
|
||||
);
|
||||
|
||||
CREATE OPERATOR <#> (
|
||||
LEFTARG = minivec, RIGHTARG = minivec, PROCEDURE = minivec_negative_inner_product,
|
||||
COMMUTATOR = '<#>'
|
||||
);
|
||||
|
||||
CREATE OPERATOR <=> (
|
||||
LEFTARG = minivec, RIGHTARG = minivec, PROCEDURE = cosine_distance,
|
||||
COMMUTATOR = '<=>'
|
||||
);
|
||||
|
||||
CREATE OPERATOR <+> (
|
||||
LEFTARG = minivec, RIGHTARG = minivec, PROCEDURE = l1_distance,
|
||||
COMMUTATOR = '<+>'
|
||||
);
|
||||
|
||||
CREATE OPERATOR + (
|
||||
LEFTARG = minivec, RIGHTARG = minivec, PROCEDURE = minivec_add,
|
||||
COMMUTATOR = +
|
||||
);
|
||||
|
||||
CREATE OPERATOR - (
|
||||
LEFTARG = minivec, RIGHTARG = minivec, PROCEDURE = minivec_sub
|
||||
);
|
||||
|
||||
CREATE OPERATOR * (
|
||||
LEFTARG = minivec, RIGHTARG = minivec, PROCEDURE = minivec_mul,
|
||||
COMMUTATOR = *
|
||||
);
|
||||
|
||||
CREATE OPERATOR || (
|
||||
LEFTARG = minivec, RIGHTARG = minivec, PROCEDURE = minivec_concat
|
||||
);
|
||||
|
||||
CREATE OPERATOR < (
|
||||
LEFTARG = minivec, RIGHTARG = minivec, PROCEDURE = minivec_lt,
|
||||
COMMUTATOR = > , NEGATOR = >= ,
|
||||
RESTRICT = scalarltsel, JOIN = scalarltjoinsel
|
||||
);
|
||||
|
||||
CREATE OPERATOR <= (
|
||||
LEFTARG = minivec, RIGHTARG = minivec, PROCEDURE = minivec_le,
|
||||
COMMUTATOR = >= , NEGATOR = > ,
|
||||
RESTRICT = scalarlesel, JOIN = scalarlejoinsel
|
||||
);
|
||||
|
||||
CREATE OPERATOR = (
|
||||
LEFTARG = minivec, RIGHTARG = minivec, PROCEDURE = minivec_eq,
|
||||
COMMUTATOR = = , NEGATOR = <> ,
|
||||
RESTRICT = eqsel, JOIN = eqjoinsel
|
||||
);
|
||||
|
||||
CREATE OPERATOR <> (
|
||||
LEFTARG = minivec, RIGHTARG = minivec, PROCEDURE = minivec_ne,
|
||||
COMMUTATOR = <> , NEGATOR = = ,
|
||||
RESTRICT = eqsel, JOIN = eqjoinsel
|
||||
);
|
||||
|
||||
CREATE OPERATOR >= (
|
||||
LEFTARG = minivec, RIGHTARG = minivec, PROCEDURE = minivec_ge,
|
||||
COMMUTATOR = <= , NEGATOR = < ,
|
||||
RESTRICT = scalargesel, JOIN = scalargejoinsel
|
||||
);
|
||||
|
||||
CREATE OPERATOR > (
|
||||
LEFTARG = minivec, RIGHTARG = minivec, PROCEDURE = minivec_gt,
|
||||
COMMUTATOR = < , NEGATOR = <= ,
|
||||
RESTRICT = scalargtsel, JOIN = scalargtjoinsel
|
||||
);
|
||||
|
||||
-- minivec op classes
|
||||
|
||||
CREATE OPERATOR CLASS minivec_ops
|
||||
DEFAULT FOR TYPE minivec USING btree AS
|
||||
OPERATOR 1 < ,
|
||||
OPERATOR 2 <= ,
|
||||
OPERATOR 3 = ,
|
||||
OPERATOR 4 >= ,
|
||||
OPERATOR 5 > ,
|
||||
FUNCTION 1 minivec_cmp(minivec, minivec);
|
||||
|
||||
CREATE OPERATOR CLASS minivec_l2_ops
|
||||
FOR TYPE minivec USING hnsw AS
|
||||
OPERATOR 1 <-> (minivec, minivec) FOR ORDER BY float_ops,
|
||||
FUNCTION 1 minivec_l2_squared_distance(minivec, minivec),
|
||||
FUNCTION 3 hnsw_minivec_support(internal);
|
||||
|
||||
CREATE OPERATOR CLASS minivec_ip_ops
|
||||
FOR TYPE minivec USING hnsw AS
|
||||
OPERATOR 1 <#> (minivec, minivec) FOR ORDER BY float_ops,
|
||||
FUNCTION 1 minivec_negative_inner_product(minivec, minivec),
|
||||
FUNCTION 3 hnsw_minivec_support(internal);
|
||||
|
||||
CREATE OPERATOR CLASS minivec_cosine_ops
|
||||
FOR TYPE minivec USING hnsw AS
|
||||
OPERATOR 1 <=> (minivec, minivec) FOR ORDER BY float_ops,
|
||||
FUNCTION 1 minivec_negative_inner_product(minivec, minivec),
|
||||
FUNCTION 2 l2_norm(minivec),
|
||||
FUNCTION 3 hnsw_minivec_support(internal);
|
||||
|
||||
CREATE OPERATOR CLASS minivec_l1_ops
|
||||
FOR TYPE minivec USING hnsw AS
|
||||
OPERATOR 1 <+> (minivec, minivec) FOR ORDER BY float_ops,
|
||||
FUNCTION 1 l1_distance(minivec, minivec),
|
||||
FUNCTION 3 hnsw_minivec_support(internal);
|
||||
|
||||
-- bit functions
|
||||
|
||||
CREATE FUNCTION hamming_distance(bit, bit) RETURNS float8
|
||||
|
||||
@@ -1327,6 +1327,7 @@ HnswFindElementNeighbors(char *base, HnswElement element, HnswElement entryPoint
|
||||
|
||||
PGDLLEXPORT Datum l2_normalize(PG_FUNCTION_ARGS);
|
||||
PGDLLEXPORT Datum halfvec_l2_normalize(PG_FUNCTION_ARGS);
|
||||
PGDLLEXPORT Datum minivec_l2_normalize(PG_FUNCTION_ARGS);
|
||||
PGDLLEXPORT Datum sparsevec_l2_normalize(PG_FUNCTION_ARGS);
|
||||
|
||||
static void
|
||||
@@ -1375,6 +1376,19 @@ hnsw_halfvec_support(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_POINTER(&typeInfo);
|
||||
};
|
||||
|
||||
FUNCTION_PREFIX PG_FUNCTION_INFO_V1(hnsw_minivec_support);
|
||||
Datum
|
||||
hnsw_minivec_support(PG_FUNCTION_ARGS)
|
||||
{
|
||||
static const HnswTypeInfo typeInfo = {
|
||||
.maxDimensions = HNSW_MAX_DIM * 4,
|
||||
.normalize = minivec_l2_normalize,
|
||||
.checkValue = NULL
|
||||
};
|
||||
|
||||
PG_RETURN_POINTER(&typeInfo);
|
||||
};
|
||||
|
||||
FUNCTION_PREFIX PG_FUNCTION_INFO_V1(hnsw_bit_support);
|
||||
Datum
|
||||
hnsw_bit_support(PG_FUNCTION_ARGS)
|
||||
|
||||
1075
src/minivec.c
Normal file
1075
src/minivec.c
Normal file
File diff suppressed because it is too large
Load Diff
156
src/minivec.h
Normal file
156
src/minivec.h
Normal file
@@ -0,0 +1,156 @@
|
||||
#ifndef MINIVEC_H
|
||||
#define MINIVEC_H
|
||||
|
||||
#include <float.h>
|
||||
|
||||
#define MINIVEC_MAX_DIM 16000
|
||||
|
||||
#define fp8 uint8
|
||||
|
||||
#define MINIVEC_SIZE(_dim) (offsetof(MiniVector, x) + sizeof(fp8)*(_dim))
|
||||
#define DatumGetMiniVector(x) ((MiniVector *) PG_DETOAST_DATUM(x))
|
||||
#define PG_GETARG_MINIVEC_P(x) DatumGetMiniVector(PG_GETARG_DATUM(x))
|
||||
#define PG_RETURN_MINIVEC_P(x) PG_RETURN_POINTER(x)
|
||||
|
||||
typedef struct MiniVector
|
||||
{
|
||||
int32 vl_len_; /* varlena header (do not touch directly!) */
|
||||
int16 dim; /* number of dimensions */
|
||||
int16 unused; /* reserved for future use, always zero */
|
||||
fp8 x[FLEXIBLE_ARRAY_MEMBER];
|
||||
} MiniVector;
|
||||
|
||||
MiniVector *InitMiniVector(int dim);
|
||||
|
||||
/*
|
||||
* Check if fp8 is NaN
|
||||
*/
|
||||
static inline bool
|
||||
Fp8IsNan(fp8 num)
|
||||
{
|
||||
return (num & 0x7F) == 0x7F;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if fp8 is zero
|
||||
*/
|
||||
static inline bool
|
||||
Fp8IsZero(fp8 num)
|
||||
{
|
||||
return num == 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a fp8 to a float4
|
||||
*/
|
||||
static inline float
|
||||
Fp8ToFloat4(fp8 num)
|
||||
{
|
||||
float lookup[128] = {0, 0.00195312, 0.00390625, 0.00585938, 0.0078125, 0.00976562, 0.0117188, 0.0136719, 0.015625, 0.0175781, 0.0195312, 0.0214844, 0.0234375, 0.0253906, 0.0273438, 0.0292969, 0.03125, 0.0351562, 0.0390625, 0.0429688, 0.046875, 0.0507812, 0.0546875, 0.0585938, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.101562, 0.109375, 0.117188, 0.125, 0.140625, 0.15625, 0.171875, 0.1875, 0.203125, 0.21875, 0.234375, 0.25, 0.28125, 0.3125, 0.34375, 0.375, 0.40625, 0.4375, 0.46875, 0.5, 0.5625, 0.625, 0.6875, 0.75, 0.8125, 0.875, 0.9375, 1, 1.125, 1.25, 1.375, 1.5, 1.625, 1.75, 1.875, 2, 2.25, 2.5, 2.75, 3, 3.25, 3.5, 3.75, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 26, 28, 30, 32, 36, 40, 44, 48, 52, 56, 60, 64, 72, 80, 88, 96, 104, 112, 120, 128, 144, 160, 176, 192, 208, 224, 240, 256, 288, 320, 352, 384, 416, 448, NAN};
|
||||
float v = lookup[num & 0x7F];
|
||||
|
||||
return (num & 0x80) == 0x80 ? -v : v;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a float4 to a fp8
|
||||
*/
|
||||
static inline fp8
|
||||
Float4ToFp8Unchecked(float num)
|
||||
{
|
||||
union
|
||||
{
|
||||
float f;
|
||||
uint32 i;
|
||||
} swapfloat;
|
||||
|
||||
uint32 bin;
|
||||
int exponent;
|
||||
int mantissa;
|
||||
uint8 result;
|
||||
|
||||
swapfloat.f = num;
|
||||
bin = swapfloat.i;
|
||||
exponent = (bin & 0x7F800000) >> 23;
|
||||
mantissa = bin & 0x007FFFFF;
|
||||
|
||||
/* Sign */
|
||||
result = (bin & 0x80000000) >> 24;
|
||||
|
||||
if (isinf(num) || isnan(num))
|
||||
{
|
||||
/* NaN */
|
||||
result |= 0x7F;
|
||||
}
|
||||
else if (exponent > 116)
|
||||
{
|
||||
int m;
|
||||
int gr;
|
||||
int s;
|
||||
|
||||
exponent -= 127;
|
||||
s = mantissa & 0x000FFFFF;
|
||||
|
||||
/* Subnormal */
|
||||
if (exponent < -6)
|
||||
{
|
||||
int diff = -exponent - 6;
|
||||
|
||||
mantissa >>= diff;
|
||||
mantissa += 1 << (23 - diff);
|
||||
s |= mantissa & 0x000FFFFF;
|
||||
}
|
||||
|
||||
m = mantissa >> 20;
|
||||
|
||||
/* Round */
|
||||
gr = (mantissa >> 19) % 4;
|
||||
if (gr == 3 || (gr == 1 && s != 0))
|
||||
m += 1;
|
||||
|
||||
if (m == 8)
|
||||
{
|
||||
m = 0;
|
||||
exponent += 1;
|
||||
}
|
||||
|
||||
if (exponent > 8)
|
||||
{
|
||||
/* Infinite, which is NaN */
|
||||
result |= 0x7F;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (exponent >= -7)
|
||||
result |= (exponent + 7) << 3;
|
||||
|
||||
result |= m;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert a float4 to a fp8
|
||||
*/
|
||||
static inline fp8
|
||||
Float4ToFp8(float num)
|
||||
{
|
||||
fp8 result = Float4ToFp8Unchecked(num);
|
||||
|
||||
if (unlikely(Fp8IsNan(result)) && !isnan(num))
|
||||
{
|
||||
char *buf = palloc(FLOAT_SHORTEST_DECIMAL_LEN);
|
||||
|
||||
float_to_shortest_decimal_buf(num, buf);
|
||||
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
||||
errmsg("\"%s\" is out of range for type minivec", buf)));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
#endif
|
||||
23
src/vector.c
23
src/vector.c
@@ -13,6 +13,7 @@
|
||||
#include "ivfflat.h"
|
||||
#include "lib/stringinfo.h"
|
||||
#include "libpq/pqformat.h"
|
||||
#include "minivec.h"
|
||||
#include "port.h" /* for strtof() */
|
||||
#include "sparsevec.h"
|
||||
#include "utils/array.h"
|
||||
@@ -542,6 +543,28 @@ halfvec_to_vector(PG_FUNCTION_ARGS)
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
/*
|
||||
* Convert fp8 vector to vector
|
||||
*/
|
||||
FUNCTION_PREFIX PG_FUNCTION_INFO_V1(minivec_to_vector);
|
||||
Datum
|
||||
minivec_to_vector(PG_FUNCTION_ARGS)
|
||||
{
|
||||
MiniVector *vec = PG_GETARG_MINIVEC_P(0);
|
||||
int32 typmod = PG_GETARG_INT32(1);
|
||||
Vector *result;
|
||||
|
||||
CheckDim(vec->dim);
|
||||
CheckExpectedDim(typmod, vec->dim);
|
||||
|
||||
result = InitVector(vec->dim);
|
||||
|
||||
for (int i = 0; i < vec->dim; i++)
|
||||
result->x[i] = Fp8ToFloat4(vec->x[i]);
|
||||
|
||||
PG_RETURN_POINTER(result);
|
||||
}
|
||||
|
||||
VECTOR_TARGET_CLONES static float
|
||||
VectorL2SquaredDistance(int dim, float *ax, float *bx)
|
||||
{
|
||||
|
||||
@@ -38,6 +38,26 @@ SELECT * FROM t ORDER BY val;
|
||||
|
||||
(4 rows)
|
||||
|
||||
DROP TABLE t;
|
||||
-- minivec
|
||||
CREATE TABLE t (val minivec(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t (val);
|
||||
SELECT * FROM t WHERE val = '[1,2,3]';
|
||||
val
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT * FROM t ORDER BY val;
|
||||
val
|
||||
---------
|
||||
[0,0,0]
|
||||
[1,1,1]
|
||||
[1,2,3]
|
||||
|
||||
(4 rows)
|
||||
|
||||
DROP TABLE t;
|
||||
-- sparsevec
|
||||
CREATE TABLE t (val sparsevec(3));
|
||||
|
||||
@@ -140,6 +140,64 @@ SELECT '{1e-8,-1e-8}'::real[]::halfvec;
|
||||
[0,-0]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::vector::minivec;
|
||||
minivec
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::vector::minivec(3);
|
||||
minivec
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::vector::minivec(2);
|
||||
ERROR: expected 2 dimensions, not 3
|
||||
SELECT '[465]'::vector::minivec;
|
||||
ERROR: "465" is out of range for type minivec
|
||||
SELECT '[1e-8]'::vector::minivec;
|
||||
minivec
|
||||
---------
|
||||
[0]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::minivec::vector;
|
||||
vector
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::minivec::vector(3);
|
||||
vector
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::minivec::vector(2);
|
||||
ERROR: expected 2 dimensions, not 3
|
||||
SELECT '{1,2,3}'::real[]::minivec;
|
||||
minivec
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '{1,2,3}'::real[]::minivec(3);
|
||||
minivec
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '{1,2,3}'::real[]::minivec(2);
|
||||
ERROR: expected 2 dimensions, not 3
|
||||
SELECT '{465,-465}'::real[]::minivec;
|
||||
ERROR: "465" is out of range for type minivec
|
||||
SELECT '{1e-8,-1e-8}'::real[]::minivec;
|
||||
minivec
|
||||
---------
|
||||
[0,-0]
|
||||
(1 row)
|
||||
|
||||
SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec;
|
||||
sparsevec
|
||||
-----------------
|
||||
|
||||
@@ -30,6 +30,23 @@ SELECT * FROM t2 ORDER BY val;
|
||||
|
||||
(4 rows)
|
||||
|
||||
DROP TABLE t;
|
||||
DROP TABLE t2;
|
||||
-- minivec
|
||||
CREATE TABLE t (val minivec(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE TABLE t2 (val minivec(3));
|
||||
\copy t TO 'results/minivec.bin' WITH (FORMAT binary)
|
||||
\copy t2 FROM 'results/minivec.bin' WITH (FORMAT binary)
|
||||
SELECT * FROM t2 ORDER BY val;
|
||||
val
|
||||
---------
|
||||
[0,0,0]
|
||||
[1,1,1]
|
||||
[1,2,3]
|
||||
|
||||
(4 rows)
|
||||
|
||||
DROP TABLE t;
|
||||
DROP TABLE t2;
|
||||
-- sparsevec
|
||||
|
||||
588
test/expected/minivec.out
Normal file
588
test/expected/minivec.out
Normal file
@@ -0,0 +1,588 @@
|
||||
SELECT '[1,2,3]'::minivec;
|
||||
minivec
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[-1,-2,-3]'::minivec;
|
||||
minivec
|
||||
------------
|
||||
[-1,-2,-3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1.,2.,3.]'::minivec;
|
||||
minivec
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT ' [ 1, 2 , 3 ] '::minivec;
|
||||
minivec
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1.23456]'::minivec;
|
||||
minivec
|
||||
---------
|
||||
[1.25]
|
||||
(1 row)
|
||||
|
||||
SELECT '[hello,1]'::minivec;
|
||||
ERROR: invalid input syntax for type minivec: "[hello,1]"
|
||||
LINE 1: SELECT '[hello,1]'::minivec;
|
||||
^
|
||||
SELECT '[NaN,1]'::minivec;
|
||||
ERROR: NaN not allowed in minivec
|
||||
LINE 1: SELECT '[NaN,1]'::minivec;
|
||||
^
|
||||
SELECT '[Infinity,1]'::minivec;
|
||||
ERROR: "Infinity" is out of range for type minivec
|
||||
LINE 1: SELECT '[Infinity,1]'::minivec;
|
||||
^
|
||||
SELECT '[-Infinity,1]'::minivec;
|
||||
ERROR: "-Infinity" is out of range for type minivec
|
||||
LINE 1: SELECT '[-Infinity,1]'::minivec;
|
||||
^
|
||||
SELECT '[65519,-65519]'::minivec;
|
||||
ERROR: "65519" is out of range for type minivec
|
||||
LINE 1: SELECT '[65519,-65519]'::minivec;
|
||||
^
|
||||
SELECT '[65520,-65520]'::minivec;
|
||||
ERROR: "65520" is out of range for type minivec
|
||||
LINE 1: SELECT '[65520,-65520]'::minivec;
|
||||
^
|
||||
SELECT '[1e-8,-1e-8]'::minivec;
|
||||
minivec
|
||||
---------
|
||||
[0,-0]
|
||||
(1 row)
|
||||
|
||||
SELECT '[4e38,1]'::minivec;
|
||||
ERROR: "4e38" is out of range for type minivec
|
||||
LINE 1: SELECT '[4e38,1]'::minivec;
|
||||
^
|
||||
SELECT '[1e-46,1]'::minivec;
|
||||
minivec
|
||||
---------
|
||||
[0,1]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3'::minivec;
|
||||
ERROR: invalid input syntax for type minivec: "[1,2,3"
|
||||
LINE 1: SELECT '[1,2,3'::minivec;
|
||||
^
|
||||
SELECT '[1,2,3]9'::minivec;
|
||||
ERROR: invalid input syntax for type minivec: "[1,2,3]9"
|
||||
LINE 1: SELECT '[1,2,3]9'::minivec;
|
||||
^
|
||||
DETAIL: Junk after closing right brace.
|
||||
SELECT '1,2,3'::minivec;
|
||||
ERROR: invalid input syntax for type minivec: "1,2,3"
|
||||
LINE 1: SELECT '1,2,3'::minivec;
|
||||
^
|
||||
DETAIL: Vector contents must start with "[".
|
||||
SELECT ''::minivec;
|
||||
ERROR: invalid input syntax for type minivec: ""
|
||||
LINE 1: SELECT ''::minivec;
|
||||
^
|
||||
DETAIL: Vector contents must start with "[".
|
||||
SELECT '['::minivec;
|
||||
ERROR: invalid input syntax for type minivec: "["
|
||||
LINE 1: SELECT '['::minivec;
|
||||
^
|
||||
SELECT '[ '::minivec;
|
||||
ERROR: invalid input syntax for type minivec: "[ "
|
||||
LINE 1: SELECT '[ '::minivec;
|
||||
^
|
||||
SELECT '[,'::minivec;
|
||||
ERROR: invalid input syntax for type minivec: "[,"
|
||||
LINE 1: SELECT '[,'::minivec;
|
||||
^
|
||||
SELECT '[]'::minivec;
|
||||
ERROR: minivec must have at least 1 dimension
|
||||
LINE 1: SELECT '[]'::minivec;
|
||||
^
|
||||
SELECT '[ ]'::minivec;
|
||||
ERROR: minivec must have at least 1 dimension
|
||||
LINE 1: SELECT '[ ]'::minivec;
|
||||
^
|
||||
SELECT '[,]'::minivec;
|
||||
ERROR: invalid input syntax for type minivec: "[,]"
|
||||
LINE 1: SELECT '[,]'::minivec;
|
||||
^
|
||||
SELECT '[1,]'::minivec;
|
||||
ERROR: invalid input syntax for type minivec: "[1,]"
|
||||
LINE 1: SELECT '[1,]'::minivec;
|
||||
^
|
||||
SELECT '[1a]'::minivec;
|
||||
ERROR: invalid input syntax for type minivec: "[1a]"
|
||||
LINE 1: SELECT '[1a]'::minivec;
|
||||
^
|
||||
SELECT '[1,,3]'::minivec;
|
||||
ERROR: invalid input syntax for type minivec: "[1,,3]"
|
||||
LINE 1: SELECT '[1,,3]'::minivec;
|
||||
^
|
||||
SELECT '[1, ,3]'::minivec;
|
||||
ERROR: invalid input syntax for type minivec: "[1, ,3]"
|
||||
LINE 1: SELECT '[1, ,3]'::minivec;
|
||||
^
|
||||
SELECT '[1,2,3]'::minivec(3);
|
||||
minivec
|
||||
---------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::minivec(2);
|
||||
ERROR: expected 2 dimensions, not 3
|
||||
SELECT '[1,2,3]'::minivec(3, 2);
|
||||
ERROR: invalid type modifier
|
||||
LINE 1: SELECT '[1,2,3]'::minivec(3, 2);
|
||||
^
|
||||
SELECT '[1,2,3]'::minivec('a');
|
||||
ERROR: invalid input syntax for type integer: "a"
|
||||
LINE 1: SELECT '[1,2,3]'::minivec('a');
|
||||
^
|
||||
SELECT '[1,2,3]'::minivec(0);
|
||||
ERROR: dimensions for type minivec must be at least 1
|
||||
LINE 1: SELECT '[1,2,3]'::minivec(0);
|
||||
^
|
||||
SELECT '[1,2,3]'::minivec(16001);
|
||||
ERROR: dimensions for type minivec cannot exceed 16000
|
||||
LINE 1: SELECT '[1,2,3]'::minivec(16001);
|
||||
^
|
||||
SELECT unnest('{"[1,2,3]", "[4,5,6]"}'::minivec[]);
|
||||
unnest
|
||||
---------
|
||||
[1,2,3]
|
||||
[4,5,6]
|
||||
(2 rows)
|
||||
|
||||
SELECT '{"[1,2,3]"}'::minivec(2)[];
|
||||
ERROR: expected 2 dimensions, not 3
|
||||
SELECT '[1,2,3]'::minivec + '[4,5,6]';
|
||||
?column?
|
||||
----------
|
||||
[5,7,9]
|
||||
(1 row)
|
||||
|
||||
SELECT '[448]'::minivec + '[448]';
|
||||
ERROR: value out of range: overflow
|
||||
SELECT '[1,2]'::minivec + '[3]';
|
||||
ERROR: different minivec dimensions 2 and 1
|
||||
SELECT '[1,2,3]'::minivec - '[4,5,6]';
|
||||
?column?
|
||||
------------
|
||||
[-3,-3,-3]
|
||||
(1 row)
|
||||
|
||||
SELECT '[-448]'::minivec - '[448]';
|
||||
ERROR: value out of range: overflow
|
||||
SELECT '[1,2]'::minivec - '[3]';
|
||||
ERROR: different minivec dimensions 2 and 1
|
||||
SELECT '[1,2,3]'::minivec * '[4,5,6]';
|
||||
?column?
|
||||
-----------
|
||||
[4,10,18]
|
||||
(1 row)
|
||||
|
||||
SELECT '[448]'::minivec * '[448]';
|
||||
ERROR: value out of range: overflow
|
||||
SELECT '[1e-7]'::minivec * '[1e-7]';
|
||||
?column?
|
||||
----------
|
||||
[0]
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2]'::minivec * '[3]';
|
||||
ERROR: different minivec dimensions 2 and 1
|
||||
SELECT '[1,2,3]'::minivec || '[4,5]';
|
||||
?column?
|
||||
-------------
|
||||
[1,2,3,4,5]
|
||||
(1 row)
|
||||
|
||||
SELECT array_fill(0, ARRAY[16000])::minivec || '[1]';
|
||||
ERROR: minivec cannot have more than 16000 dimensions
|
||||
SELECT '[1,2,3]'::minivec < '[1,2,3]';
|
||||
?column?
|
||||
----------
|
||||
f
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::minivec < '[1,2]';
|
||||
?column?
|
||||
----------
|
||||
f
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::minivec <= '[1,2,3]';
|
||||
?column?
|
||||
----------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::minivec <= '[1,2]';
|
||||
?column?
|
||||
----------
|
||||
f
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::minivec = '[1,2,3]';
|
||||
?column?
|
||||
----------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::minivec = '[1,2]';
|
||||
?column?
|
||||
----------
|
||||
f
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::minivec != '[1,2,3]';
|
||||
?column?
|
||||
----------
|
||||
f
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::minivec != '[1,2]';
|
||||
?column?
|
||||
----------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::minivec >= '[1,2,3]';
|
||||
?column?
|
||||
----------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::minivec >= '[1,2]';
|
||||
?column?
|
||||
----------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::minivec > '[1,2,3]';
|
||||
?column?
|
||||
----------
|
||||
f
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2,3]'::minivec > '[1,2]';
|
||||
?column?
|
||||
----------
|
||||
t
|
||||
(1 row)
|
||||
|
||||
SELECT minivec_cmp('[1,2,3]', '[1,2,3]');
|
||||
minivec_cmp
|
||||
-------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT minivec_cmp('[1,2,3]', '[0,0,0]');
|
||||
minivec_cmp
|
||||
-------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT minivec_cmp('[0,0,0]', '[1,2,3]');
|
||||
minivec_cmp
|
||||
-------------
|
||||
-1
|
||||
(1 row)
|
||||
|
||||
SELECT minivec_cmp('[1,2]', '[1,2,3]');
|
||||
minivec_cmp
|
||||
-------------
|
||||
-1
|
||||
(1 row)
|
||||
|
||||
SELECT minivec_cmp('[1,2,3]', '[1,2]');
|
||||
minivec_cmp
|
||||
-------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT minivec_cmp('[1,2]', '[2,3,4]');
|
||||
minivec_cmp
|
||||
-------------
|
||||
-1
|
||||
(1 row)
|
||||
|
||||
SELECT minivec_cmp('[2,3]', '[1,2,3]');
|
||||
minivec_cmp
|
||||
-------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT vector_dims('[1,2,3]'::minivec);
|
||||
vector_dims
|
||||
-------------
|
||||
3
|
||||
(1 row)
|
||||
|
||||
SELECT round(l2_norm('[1,1]'::minivec)::numeric, 5);
|
||||
round
|
||||
---------
|
||||
1.41421
|
||||
(1 row)
|
||||
|
||||
SELECT l2_norm('[3,4]'::minivec);
|
||||
l2_norm
|
||||
---------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
SELECT l2_norm('[0,1]'::minivec);
|
||||
l2_norm
|
||||
---------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT l2_norm('[0,0]'::minivec);
|
||||
l2_norm
|
||||
---------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT l2_norm('[2]'::minivec);
|
||||
l2_norm
|
||||
---------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
SELECT l2_distance('[0,0]'::minivec, '[3,4]');
|
||||
l2_distance
|
||||
-------------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
SELECT l2_distance('[0,0]'::minivec, '[0,1]');
|
||||
l2_distance
|
||||
-------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT l2_distance('[1,2]'::minivec, '[3]');
|
||||
ERROR: different minivec dimensions 2 and 1
|
||||
SELECT l2_distance('[1,1,1,1,1,1,1,1,1]'::minivec, '[1,1,1,1,1,1,1,4,5]');
|
||||
l2_distance
|
||||
-------------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
SELECT '[0,0]'::minivec <-> '[3,4]';
|
||||
?column?
|
||||
----------
|
||||
5
|
||||
(1 row)
|
||||
|
||||
SELECT inner_product('[1,2]'::minivec, '[3,4]');
|
||||
inner_product
|
||||
---------------
|
||||
11
|
||||
(1 row)
|
||||
|
||||
SELECT inner_product('[1,2]'::minivec, '[3]');
|
||||
ERROR: different minivec dimensions 2 and 1
|
||||
SELECT inner_product('[448]'::minivec, '[448]');
|
||||
inner_product
|
||||
---------------
|
||||
200704
|
||||
(1 row)
|
||||
|
||||
SELECT inner_product('[1,1,1,1,1,1,1,1,1]'::minivec, '[1,2,3,4,5,6,7,8,9]');
|
||||
inner_product
|
||||
---------------
|
||||
45
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2]'::minivec <#> '[3,4]';
|
||||
?column?
|
||||
----------
|
||||
-11
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2]'::minivec, '[2,4]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2]'::minivec, '[0,0]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
NaN
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,1]'::minivec, '[1,1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,0]'::minivec, '[0,2]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,1]'::minivec, '[-1,-1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2]'::minivec, '[3]');
|
||||
ERROR: different minivec dimensions 2 and 1
|
||||
SELECT cosine_distance('[1,1]'::minivec, '[1.1,1.1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,1]'::minivec, '[-1.1,-1.1]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2,3,4,5,6,7,8,9]'::minivec, '[1,2,3,4,5,6,7,8,9]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT cosine_distance('[1,2,3,4,5,6,7,8,9]'::minivec, '[-1,-2,-3,-4,-5,-6,-7,-8,-9]');
|
||||
cosine_distance
|
||||
-----------------
|
||||
2
|
||||
(1 row)
|
||||
|
||||
SELECT '[1,2]'::minivec <=> '[2,4]';
|
||||
?column?
|
||||
----------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT l1_distance('[0,0]'::minivec, '[3,4]');
|
||||
l1_distance
|
||||
-------------
|
||||
7
|
||||
(1 row)
|
||||
|
||||
SELECT l1_distance('[0,0]'::minivec, '[0,1]');
|
||||
l1_distance
|
||||
-------------
|
||||
1
|
||||
(1 row)
|
||||
|
||||
SELECT l1_distance('[1,2]'::minivec, '[3]');
|
||||
ERROR: different minivec dimensions 2 and 1
|
||||
SELECT l1_distance('[1,2,3,4,5,6,7,8,9]'::minivec, '[1,2,3,4,5,6,7,8,9]');
|
||||
l1_distance
|
||||
-------------
|
||||
0
|
||||
(1 row)
|
||||
|
||||
SELECT l1_distance('[1,2,3,4,5,6,7,8,9]'::minivec, '[0,3,2,5,4,7,6,9,8]');
|
||||
l1_distance
|
||||
-------------
|
||||
9
|
||||
(1 row)
|
||||
|
||||
SELECT '[0,0]'::minivec <+> '[3,4]';
|
||||
?column?
|
||||
----------
|
||||
7
|
||||
(1 row)
|
||||
|
||||
SELECT l2_normalize('[3,4]'::minivec);
|
||||
l2_normalize
|
||||
----------------
|
||||
[0.625,0.8125]
|
||||
(1 row)
|
||||
|
||||
SELECT l2_normalize('[3,0]'::minivec);
|
||||
l2_normalize
|
||||
--------------
|
||||
[1,0]
|
||||
(1 row)
|
||||
|
||||
SELECT l2_normalize('[0,0.1]'::minivec);
|
||||
l2_normalize
|
||||
--------------
|
||||
[0,1]
|
||||
(1 row)
|
||||
|
||||
SELECT l2_normalize('[0,0]'::minivec);
|
||||
l2_normalize
|
||||
--------------
|
||||
[0,0]
|
||||
(1 row)
|
||||
|
||||
SELECT l2_normalize('[448]'::minivec);
|
||||
l2_normalize
|
||||
--------------
|
||||
[1]
|
||||
(1 row)
|
||||
|
||||
SELECT binary_quantize('[1,0,-1]'::minivec);
|
||||
binary_quantize
|
||||
-----------------
|
||||
100
|
||||
(1 row)
|
||||
|
||||
SELECT binary_quantize('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::minivec);
|
||||
binary_quantize
|
||||
-----------------
|
||||
01001110101
|
||||
(1 row)
|
||||
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, 1, 3);
|
||||
subvector
|
||||
-----------
|
||||
[1,2,3]
|
||||
(1 row)
|
||||
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, 3, 2);
|
||||
subvector
|
||||
-----------
|
||||
[3,4]
|
||||
(1 row)
|
||||
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, -1, 3);
|
||||
subvector
|
||||
-----------
|
||||
[1]
|
||||
(1 row)
|
||||
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, 3, 9);
|
||||
subvector
|
||||
-----------
|
||||
[3,4,5]
|
||||
(1 row)
|
||||
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, 1, 0);
|
||||
ERROR: minivec must have at least 1 dimension
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, 3, -1);
|
||||
ERROR: minivec must have at least 1 dimension
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, -1, 2);
|
||||
ERROR: minivec must have at least 1 dimension
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, 2147483647, 10);
|
||||
ERROR: minivec must have at least 1 dimension
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, 3, 2147483647);
|
||||
subvector
|
||||
-----------
|
||||
[3,4,5]
|
||||
(1 row)
|
||||
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, -2147483644, 2147483647);
|
||||
subvector
|
||||
-----------
|
||||
[1,2]
|
||||
(1 row)
|
||||
|
||||
@@ -22,6 +22,17 @@ SELECT * FROM t ORDER BY val;
|
||||
|
||||
DROP TABLE t;
|
||||
|
||||
-- minivec
|
||||
|
||||
CREATE TABLE t (val minivec(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
CREATE INDEX ON t (val);
|
||||
|
||||
SELECT * FROM t WHERE val = '[1,2,3]';
|
||||
SELECT * FROM t ORDER BY val;
|
||||
|
||||
DROP TABLE t;
|
||||
|
||||
-- sparsevec
|
||||
|
||||
CREATE TABLE t (val sparsevec(3));
|
||||
|
||||
@@ -38,6 +38,22 @@ SELECT '{1,2,3}'::real[]::halfvec(2);
|
||||
SELECT '{65520,-65520}'::real[]::halfvec;
|
||||
SELECT '{1e-8,-1e-8}'::real[]::halfvec;
|
||||
|
||||
SELECT '[1,2,3]'::vector::minivec;
|
||||
SELECT '[1,2,3]'::vector::minivec(3);
|
||||
SELECT '[1,2,3]'::vector::minivec(2);
|
||||
SELECT '[465]'::vector::minivec;
|
||||
SELECT '[1e-8]'::vector::minivec;
|
||||
|
||||
SELECT '[1,2,3]'::minivec::vector;
|
||||
SELECT '[1,2,3]'::minivec::vector(3);
|
||||
SELECT '[1,2,3]'::minivec::vector(2);
|
||||
|
||||
SELECT '{1,2,3}'::real[]::minivec;
|
||||
SELECT '{1,2,3}'::real[]::minivec(3);
|
||||
SELECT '{1,2,3}'::real[]::minivec(2);
|
||||
SELECT '{465,-465}'::real[]::minivec;
|
||||
SELECT '{1e-8,-1e-8}'::real[]::minivec;
|
||||
|
||||
SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec;
|
||||
SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec(5);
|
||||
SELECT '[0,1.5,0,3.5,0]'::vector::sparsevec(4);
|
||||
|
||||
@@ -28,6 +28,21 @@ SELECT * FROM t2 ORDER BY val;
|
||||
DROP TABLE t;
|
||||
DROP TABLE t2;
|
||||
|
||||
-- minivec
|
||||
|
||||
CREATE TABLE t (val minivec(3));
|
||||
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
|
||||
|
||||
CREATE TABLE t2 (val minivec(3));
|
||||
|
||||
\copy t TO 'results/minivec.bin' WITH (FORMAT binary)
|
||||
\copy t2 FROM 'results/minivec.bin' WITH (FORMAT binary)
|
||||
|
||||
SELECT * FROM t2 ORDER BY val;
|
||||
|
||||
DROP TABLE t;
|
||||
DROP TABLE t2;
|
||||
|
||||
-- sparsevec
|
||||
|
||||
CREATE TABLE t (val sparsevec(3));
|
||||
|
||||
134
test/sql/minivec.sql
Normal file
134
test/sql/minivec.sql
Normal file
@@ -0,0 +1,134 @@
|
||||
SELECT '[1,2,3]'::minivec;
|
||||
SELECT '[-1,-2,-3]'::minivec;
|
||||
SELECT '[1.,2.,3.]'::minivec;
|
||||
SELECT ' [ 1, 2 , 3 ] '::minivec;
|
||||
SELECT '[1.23456]'::minivec;
|
||||
SELECT '[hello,1]'::minivec;
|
||||
SELECT '[NaN,1]'::minivec;
|
||||
SELECT '[Infinity,1]'::minivec;
|
||||
SELECT '[-Infinity,1]'::minivec;
|
||||
SELECT '[65519,-65519]'::minivec;
|
||||
SELECT '[65520,-65520]'::minivec;
|
||||
SELECT '[1e-8,-1e-8]'::minivec;
|
||||
SELECT '[4e38,1]'::minivec;
|
||||
SELECT '[1e-46,1]'::minivec;
|
||||
SELECT '[1,2,3'::minivec;
|
||||
SELECT '[1,2,3]9'::minivec;
|
||||
SELECT '1,2,3'::minivec;
|
||||
SELECT ''::minivec;
|
||||
SELECT '['::minivec;
|
||||
SELECT '[ '::minivec;
|
||||
SELECT '[,'::minivec;
|
||||
SELECT '[]'::minivec;
|
||||
SELECT '[ ]'::minivec;
|
||||
SELECT '[,]'::minivec;
|
||||
SELECT '[1,]'::minivec;
|
||||
SELECT '[1a]'::minivec;
|
||||
SELECT '[1,,3]'::minivec;
|
||||
SELECT '[1, ,3]'::minivec;
|
||||
|
||||
SELECT '[1,2,3]'::minivec(3);
|
||||
SELECT '[1,2,3]'::minivec(2);
|
||||
SELECT '[1,2,3]'::minivec(3, 2);
|
||||
SELECT '[1,2,3]'::minivec('a');
|
||||
SELECT '[1,2,3]'::minivec(0);
|
||||
SELECT '[1,2,3]'::minivec(16001);
|
||||
|
||||
SELECT unnest('{"[1,2,3]", "[4,5,6]"}'::minivec[]);
|
||||
SELECT '{"[1,2,3]"}'::minivec(2)[];
|
||||
|
||||
SELECT '[1,2,3]'::minivec + '[4,5,6]';
|
||||
SELECT '[448]'::minivec + '[448]';
|
||||
SELECT '[1,2]'::minivec + '[3]';
|
||||
|
||||
SELECT '[1,2,3]'::minivec - '[4,5,6]';
|
||||
SELECT '[-448]'::minivec - '[448]';
|
||||
SELECT '[1,2]'::minivec - '[3]';
|
||||
|
||||
SELECT '[1,2,3]'::minivec * '[4,5,6]';
|
||||
SELECT '[448]'::minivec * '[448]';
|
||||
SELECT '[1e-7]'::minivec * '[1e-7]';
|
||||
SELECT '[1,2]'::minivec * '[3]';
|
||||
|
||||
SELECT '[1,2,3]'::minivec || '[4,5]';
|
||||
SELECT array_fill(0, ARRAY[16000])::minivec || '[1]';
|
||||
|
||||
SELECT '[1,2,3]'::minivec < '[1,2,3]';
|
||||
SELECT '[1,2,3]'::minivec < '[1,2]';
|
||||
SELECT '[1,2,3]'::minivec <= '[1,2,3]';
|
||||
SELECT '[1,2,3]'::minivec <= '[1,2]';
|
||||
SELECT '[1,2,3]'::minivec = '[1,2,3]';
|
||||
SELECT '[1,2,3]'::minivec = '[1,2]';
|
||||
SELECT '[1,2,3]'::minivec != '[1,2,3]';
|
||||
SELECT '[1,2,3]'::minivec != '[1,2]';
|
||||
SELECT '[1,2,3]'::minivec >= '[1,2,3]';
|
||||
SELECT '[1,2,3]'::minivec >= '[1,2]';
|
||||
SELECT '[1,2,3]'::minivec > '[1,2,3]';
|
||||
SELECT '[1,2,3]'::minivec > '[1,2]';
|
||||
|
||||
SELECT minivec_cmp('[1,2,3]', '[1,2,3]');
|
||||
SELECT minivec_cmp('[1,2,3]', '[0,0,0]');
|
||||
SELECT minivec_cmp('[0,0,0]', '[1,2,3]');
|
||||
SELECT minivec_cmp('[1,2]', '[1,2,3]');
|
||||
SELECT minivec_cmp('[1,2,3]', '[1,2]');
|
||||
SELECT minivec_cmp('[1,2]', '[2,3,4]');
|
||||
SELECT minivec_cmp('[2,3]', '[1,2,3]');
|
||||
|
||||
SELECT vector_dims('[1,2,3]'::minivec);
|
||||
|
||||
SELECT round(l2_norm('[1,1]'::minivec)::numeric, 5);
|
||||
SELECT l2_norm('[3,4]'::minivec);
|
||||
SELECT l2_norm('[0,1]'::minivec);
|
||||
SELECT l2_norm('[0,0]'::minivec);
|
||||
SELECT l2_norm('[2]'::minivec);
|
||||
|
||||
SELECT l2_distance('[0,0]'::minivec, '[3,4]');
|
||||
SELECT l2_distance('[0,0]'::minivec, '[0,1]');
|
||||
SELECT l2_distance('[1,2]'::minivec, '[3]');
|
||||
SELECT l2_distance('[1,1,1,1,1,1,1,1,1]'::minivec, '[1,1,1,1,1,1,1,4,5]');
|
||||
SELECT '[0,0]'::minivec <-> '[3,4]';
|
||||
|
||||
SELECT inner_product('[1,2]'::minivec, '[3,4]');
|
||||
SELECT inner_product('[1,2]'::minivec, '[3]');
|
||||
SELECT inner_product('[448]'::minivec, '[448]');
|
||||
SELECT inner_product('[1,1,1,1,1,1,1,1,1]'::minivec, '[1,2,3,4,5,6,7,8,9]');
|
||||
SELECT '[1,2]'::minivec <#> '[3,4]';
|
||||
|
||||
SELECT cosine_distance('[1,2]'::minivec, '[2,4]');
|
||||
SELECT cosine_distance('[1,2]'::minivec, '[0,0]');
|
||||
SELECT cosine_distance('[1,1]'::minivec, '[1,1]');
|
||||
SELECT cosine_distance('[1,0]'::minivec, '[0,2]');
|
||||
SELECT cosine_distance('[1,1]'::minivec, '[-1,-1]');
|
||||
SELECT cosine_distance('[1,2]'::minivec, '[3]');
|
||||
SELECT cosine_distance('[1,1]'::minivec, '[1.1,1.1]');
|
||||
SELECT cosine_distance('[1,1]'::minivec, '[-1.1,-1.1]');
|
||||
SELECT cosine_distance('[1,2,3,4,5,6,7,8,9]'::minivec, '[1,2,3,4,5,6,7,8,9]');
|
||||
SELECT cosine_distance('[1,2,3,4,5,6,7,8,9]'::minivec, '[-1,-2,-3,-4,-5,-6,-7,-8,-9]');
|
||||
SELECT '[1,2]'::minivec <=> '[2,4]';
|
||||
|
||||
SELECT l1_distance('[0,0]'::minivec, '[3,4]');
|
||||
SELECT l1_distance('[0,0]'::minivec, '[0,1]');
|
||||
SELECT l1_distance('[1,2]'::minivec, '[3]');
|
||||
SELECT l1_distance('[1,2,3,4,5,6,7,8,9]'::minivec, '[1,2,3,4,5,6,7,8,9]');
|
||||
SELECT l1_distance('[1,2,3,4,5,6,7,8,9]'::minivec, '[0,3,2,5,4,7,6,9,8]');
|
||||
SELECT '[0,0]'::minivec <+> '[3,4]';
|
||||
|
||||
SELECT l2_normalize('[3,4]'::minivec);
|
||||
SELECT l2_normalize('[3,0]'::minivec);
|
||||
SELECT l2_normalize('[0,0.1]'::minivec);
|
||||
SELECT l2_normalize('[0,0]'::minivec);
|
||||
SELECT l2_normalize('[448]'::minivec);
|
||||
|
||||
SELECT binary_quantize('[1,0,-1]'::minivec);
|
||||
SELECT binary_quantize('[0,0.1,-0.2,-0.3,0.4,0.5,0.6,-0.7,0.8,-0.9,1]'::minivec);
|
||||
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, 1, 3);
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, 3, 2);
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, -1, 3);
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, 3, 9);
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, 1, 0);
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, 3, -1);
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, -1, 2);
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, 2147483647, 10);
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, 3, 2147483647);
|
||||
SELECT subvector('[1,2,3,4,5]'::minivec, -2147483644, 2147483647);
|
||||
136
test/t/039_hnsw_minivec_build_recall.pl
Normal file
136
test/t/039_hnsw_minivec_build_recall.pl
Normal file
@@ -0,0 +1,136 @@
|
||||
use strict;
|
||||
use warnings FATAL => 'all';
|
||||
use PostgreSQL::Test::Cluster;
|
||||
use PostgreSQL::Test::Utils;
|
||||
use Test::More;
|
||||
|
||||
my $node;
|
||||
my @queries = ();
|
||||
my @expected;
|
||||
my $limit = 20;
|
||||
my $dim = 10;
|
||||
my $array_sql = join(",", ('2 * random() * random()') x $dim);
|
||||
|
||||
sub test_recall
|
||||
{
|
||||
my ($min, $operator) = @_;
|
||||
my $correct = 0;
|
||||
my $total = 0;
|
||||
|
||||
my $explain = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
EXPLAIN ANALYZE SELECT i FROM tst ORDER BY v $operator '$queries[0]' LIMIT $limit;
|
||||
));
|
||||
like($explain, qr/Index Scan/);
|
||||
|
||||
for my $i (0 .. $#queries)
|
||||
{
|
||||
my $actual = $node->safe_psql("postgres", qq(
|
||||
SET enable_seqscan = off;
|
||||
SELECT i FROM tst ORDER BY v $operator '$queries[$i]' LIMIT $limit;
|
||||
));
|
||||
my @actual_ids = split("\n", $actual);
|
||||
my %actual_set = map { $_ => 1 } @actual_ids;
|
||||
|
||||
my @expected_ids = split("\n", $expected[$i]);
|
||||
|
||||
foreach (@expected_ids)
|
||||
{
|
||||
if (exists($actual_set{$_}))
|
||||
{
|
||||
$correct++;
|
||||
}
|
||||
$total++;
|
||||
}
|
||||
}
|
||||
|
||||
cmp_ok($correct / $total, ">=", $min, $operator);
|
||||
}
|
||||
|
||||
# Initialize node
|
||||
$node = PostgreSQL::Test::Cluster->new('node');
|
||||
$node->init;
|
||||
$node->start;
|
||||
|
||||
# Create table
|
||||
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
|
||||
$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v minivec($dim));");
|
||||
$node->safe_psql("postgres",
|
||||
"INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, 10000) i;"
|
||||
);
|
||||
|
||||
# Generate queries
|
||||
for (1 .. 20)
|
||||
{
|
||||
my @r = ();
|
||||
for (1 .. $dim)
|
||||
{
|
||||
push(@r, rand());
|
||||
}
|
||||
push(@queries, "[" . join(",", @r) . "]");
|
||||
}
|
||||
|
||||
# Check each index type
|
||||
my @operators = ("<->", "<#>", "<=>", "<+>");
|
||||
my @opclasses = ("minivec_l2_ops", "minivec_ip_ops", "minivec_cosine_ops", "minivec_l1_ops");
|
||||
|
||||
for my $i (0 .. $#operators)
|
||||
{
|
||||
my $operator = $operators[$i];
|
||||
my $opclass = $opclasses[$i];
|
||||
|
||||
# Get exact results
|
||||
@expected = ();
|
||||
foreach (@queries)
|
||||
{
|
||||
my $res = $node->safe_psql("postgres", "SELECT i FROM tst ORDER BY v $operator '$_' LIMIT $limit;");
|
||||
push(@expected, $res);
|
||||
}
|
||||
|
||||
# Build index serially
|
||||
$node->safe_psql("postgres", qq(
|
||||
SET max_parallel_maintenance_workers = 0;
|
||||
CREATE INDEX idx ON tst USING hnsw (v $opclass);
|
||||
));
|
||||
|
||||
# Test approximate results
|
||||
my $min = 0.98;
|
||||
if ($operator eq '<=>')
|
||||
{
|
||||
$min = 0.65;
|
||||
}
|
||||
test_recall($min, $operator);
|
||||
|
||||
$node->safe_psql("postgres", "DROP INDEX idx;");
|
||||
|
||||
# Build index in parallel in memory
|
||||
my ($ret, $stdout, $stderr) = $node->psql("postgres", qq(
|
||||
SET client_min_messages = DEBUG;
|
||||
SET min_parallel_table_scan_size = 1;
|
||||
CREATE INDEX idx ON tst USING hnsw (v $opclass);
|
||||
));
|
||||
is($ret, 0, $stderr);
|
||||
like($stderr, qr/using \d+ parallel workers/);
|
||||
|
||||
# Test approximate results
|
||||
test_recall($min, $operator);
|
||||
|
||||
$node->safe_psql("postgres", "DROP INDEX idx;");
|
||||
|
||||
# Build index in parallel on disk
|
||||
# Set parallel_workers on table to use workers with low maintenance_work_mem
|
||||
($ret, $stdout, $stderr) = $node->psql("postgres", qq(
|
||||
ALTER TABLE tst SET (parallel_workers = 2);
|
||||
SET client_min_messages = DEBUG;
|
||||
SET maintenance_work_mem = '4MB';
|
||||
CREATE INDEX idx ON tst USING hnsw (v $opclass);
|
||||
ALTER TABLE tst RESET (parallel_workers);
|
||||
));
|
||||
is($ret, 0, $stderr);
|
||||
like($stderr, qr/using \d+ parallel workers/);
|
||||
like($stderr, qr/hnsw graph no longer fits into maintenance_work_mem/);
|
||||
|
||||
$node->safe_psql("postgres", "DROP INDEX idx;");
|
||||
}
|
||||
|
||||
done_testing();
|
||||
Reference in New Issue
Block a user