Added intvec type

This commit is contained in:
Andrew Kane
2024-03-29 17:19:07 -07:00
parent 2c48e3edc2
commit 0d82124bca
27 changed files with 1425 additions and 58 deletions

View File

@@ -1,3 +1,7 @@
## 0.7.0 (unreleased)
- Added `intvec` type
## 0.6.2 (2024-03-18)
- Reduced lock contention with parallel HNSW index builds

View File

@@ -3,8 +3,8 @@ EXTVERSION = 0.6.2
MODULE_big = vector
DATA = $(wildcard sql/*--*.sql)
OBJS = src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/vector.o
HEADERS = src/vector.h
OBJS = src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/intvec.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/vector.o
HEADERS = src/intvec.h src/vector.h
TESTS = $(wildcard test/sql/*.sql)
REGRESS = $(patsubst test/sql/%.sql,%,$(TESTS))

View File

@@ -1,8 +1,8 @@
EXTENSION = vector
EXTVERSION = 0.6.2
OBJS = src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\vector.obj
HEADERS = src\vector.h
OBJS = src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\intvec.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\vector.obj
HEADERS = src\intvec.h src\vector.h
REGRESS = btree cast copy functions input ivfflat_cosine ivfflat_ip ivfflat_l2 ivfflat_options ivfflat_unlogged
REGRESS_OPTS = --inputdir=test --load-extension=$(EXTENSION)

View File

@@ -732,6 +732,27 @@ Function | Description | Added
avg(vector) → vector | average |
sum(vector) → vector | sum | 0.5.0
### Intvec Type
Each int vector takes `dimensions + 8` bytes of storage. Each element is a single byte signed integer. Int vectors can have up to 16,000 dimensions.
### Intvec Operators
Operator | Description | Added
--- | --- | ---
<-> | Euclidean distance | 0.7.0
<#> | negative inner product | 0.7.0
<=> | cosine distance | 0.7.0
### Intvec Functions
Function | Description | Added
--- | --- | ---
cosine_distance(intvec, intvec) → double precision | cosine distance | 0.7.0
inner_product(intvec, intvec) → double precision | inner product | 0.7.0
l2_distance(intvec, intvec) → double precision | Euclidean distance | 0.7.0
l1_distance(intvec, intvec) → double precision | taxicab distance | 0.7.0
## Installation Notes - Linux and Mac
### Postgres Location

View File

@@ -0,0 +1,88 @@
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "ALTER EXTENSION vector UPDATE TO '0.7.0'" to load this file. \quit
CREATE TYPE intvec;
CREATE FUNCTION intvec_in(cstring, oid, integer) RETURNS intvec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION intvec_out(intvec) RETURNS cstring
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION intvec_typmod_in(cstring[]) RETURNS integer
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION intvec_recv(internal, oid, integer) RETURNS intvec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION intvec_send(intvec) RETURNS bytea
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE TYPE intvec (
INPUT = intvec_in,
OUTPUT = intvec_out,
TYPMOD_IN = intvec_typmod_in,
RECEIVE = intvec_recv,
SEND = intvec_send,
STORAGE = external
);
CREATE FUNCTION l2_distance(intvec, intvec) RETURNS float8
AS 'MODULE_PATHNAME', 'intvec_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION inner_product(intvec, intvec) RETURNS float8
AS 'MODULE_PATHNAME', 'intvec_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION cosine_distance(intvec, intvec) RETURNS float8
AS 'MODULE_PATHNAME', 'intvec_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION l1_distance(intvec, intvec) RETURNS float8
AS 'MODULE_PATHNAME', 'intvec_l1_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION intvec_l2_squared_distance(intvec, intvec) RETURNS float8
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION intvec_negative_inner_product(intvec, intvec) RETURNS float8
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION intvec(intvec, integer, boolean) RETURNS intvec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION array_to_intvec(integer[], integer, boolean) RETURNS intvec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE CAST (intvec AS intvec)
WITH FUNCTION intvec(intvec, integer, boolean) AS IMPLICIT;
CREATE CAST (integer[] AS intvec)
WITH FUNCTION array_to_intvec(integer[], integer, boolean) AS ASSIGNMENT;
CREATE OPERATOR <-> (
LEFTARG = intvec, RIGHTARG = intvec, PROCEDURE = l2_distance,
COMMUTATOR = '<->'
);
CREATE OPERATOR <#> (
LEFTARG = intvec, RIGHTARG = intvec, PROCEDURE = intvec_negative_inner_product,
COMMUTATOR = '<#>'
);
CREATE OPERATOR <=> (
LEFTARG = intvec, RIGHTARG = intvec, PROCEDURE = cosine_distance,
COMMUTATOR = '<=>'
);
CREATE OPERATOR CLASS intvec_l2_ops
FOR TYPE intvec USING hnsw AS
OPERATOR 1 <-> (intvec, intvec) FOR ORDER BY float_ops,
FUNCTION 1 intvec_l2_squared_distance(intvec, intvec);
CREATE OPERATOR CLASS intvec_ip_ops
FOR TYPE intvec USING hnsw AS
OPERATOR 1 <#> (intvec, intvec) FOR ORDER BY float_ops,
FUNCTION 1 intvec_negative_inner_product(intvec, intvec);
CREATE OPERATOR CLASS intvec_cosine_ops
FOR TYPE intvec USING hnsw AS
OPERATOR 1 <=> (intvec, intvec) FOR ORDER BY float_ops,
FUNCTION 1 cosine_distance(intvec, intvec);

View File

@@ -287,3 +287,103 @@ CREATE OPERATOR CLASS vector_cosine_ops
OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops,
FUNCTION 1 vector_negative_inner_product(vector, vector),
FUNCTION 2 vector_norm(vector);
-- intvec type
CREATE TYPE intvec;
CREATE FUNCTION intvec_in(cstring, oid, integer) RETURNS intvec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION intvec_out(intvec) RETURNS cstring
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION intvec_typmod_in(cstring[]) RETURNS integer
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION intvec_recv(internal, oid, integer) RETURNS intvec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION intvec_send(intvec) RETURNS bytea
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE TYPE intvec (
INPUT = intvec_in,
OUTPUT = intvec_out,
TYPMOD_IN = intvec_typmod_in,
RECEIVE = intvec_recv,
SEND = intvec_send,
STORAGE = external
);
-- intvec functions
CREATE FUNCTION l2_distance(intvec, intvec) RETURNS float8
AS 'MODULE_PATHNAME', 'intvec_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION inner_product(intvec, intvec) RETURNS float8
AS 'MODULE_PATHNAME', 'intvec_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION cosine_distance(intvec, intvec) RETURNS float8
AS 'MODULE_PATHNAME', 'intvec_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION l1_distance(intvec, intvec) RETURNS float8
AS 'MODULE_PATHNAME', 'intvec_l1_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
-- intvec private functions
CREATE FUNCTION intvec_l2_squared_distance(intvec, intvec) RETURNS float8
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION intvec_negative_inner_product(intvec, intvec) RETURNS float8
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
-- intvec cast functions
CREATE FUNCTION intvec(intvec, integer, boolean) RETURNS intvec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION array_to_intvec(integer[], integer, boolean) RETURNS intvec
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
-- intvec casts
CREATE CAST (intvec AS intvec)
WITH FUNCTION intvec(intvec, integer, boolean) AS IMPLICIT;
CREATE CAST (integer[] AS intvec)
WITH FUNCTION array_to_intvec(integer[], integer, boolean) AS ASSIGNMENT;
-- intvec operators
CREATE OPERATOR <-> (
LEFTARG = intvec, RIGHTARG = intvec, PROCEDURE = l2_distance,
COMMUTATOR = '<->'
);
CREATE OPERATOR <#> (
LEFTARG = intvec, RIGHTARG = intvec, PROCEDURE = intvec_negative_inner_product,
COMMUTATOR = '<#>'
);
CREATE OPERATOR <=> (
LEFTARG = intvec, RIGHTARG = intvec, PROCEDURE = cosine_distance,
COMMUTATOR = '<=>'
);
-- intvec opclasses
CREATE OPERATOR CLASS intvec_l2_ops
FOR TYPE intvec USING hnsw AS
OPERATOR 1 <-> (intvec, intvec) FOR ORDER BY float_ops,
FUNCTION 1 intvec_l2_squared_distance(intvec, intvec);
CREATE OPERATOR CLASS intvec_ip_ops
FOR TYPE intvec USING hnsw AS
OPERATOR 1 <#> (intvec, intvec) FOR ORDER BY float_ops,
FUNCTION 1 intvec_negative_inner_product(intvec, intvec);
CREATE OPERATOR CLASS intvec_cosine_ops
FOR TYPE intvec USING hnsw AS
OPERATOR 1 <=> (intvec, intvec) FOR ORDER BY float_ops,
FUNCTION 1 cosine_distance(intvec, intvec);

View File

@@ -57,7 +57,8 @@
typedef enum HnswType
{
HNSW_TYPE_VECTOR
HNSW_TYPE_VECTOR,
HNSW_TYPE_INTVEC
} HnswType;
/* Build phases */

View File

@@ -683,6 +683,9 @@ InitBuildState(HnswBuildState * buildstate, Relation heap, Relation index, Index
buildstate->efConstruction = HnswGetEfConstruction(index);
buildstate->dimensions = TupleDescAttr(index->rd_att, 0)->atttypmod;
if (buildstate->type == HNSW_TYPE_INTVEC)
maxDimensions *= 4;
/* Require column to have dimensions to be indexed */
if (buildstate->dimensions < 0)
elog(ERROR, "column does not have dimensions");

View File

@@ -1,14 +1,17 @@
#include "postgres.h"
#include <float.h>
#include <math.h>
#include "access/generic_xlog.h"
#include "catalog/pg_type.h"
#include "hnsw.h"
#include "lib/pairingheap.h"
#include "storage/bufmgr.h"
#include "utils/datum.h"
#include "utils/memdebug.h"
#include "utils/rel.h"
#include "utils/syscache.h"
#include "vector.h"
#if PG_VERSION_NUM >= 130000
@@ -155,7 +158,24 @@ HnswOptionalProcInfo(Relation index, uint16 procnum)
HnswType
HnswGetType(Relation index)
{
return HNSW_TYPE_VECTOR;
Oid typeOid = TupleDescAttr(index->rd_att, 0)->atttypid;
HeapTuple tuple;
Form_pg_type type;
int result;
tuple = SearchSysCache1(TYPEOID, ObjectIdGetDatum(typeOid));
if (!HeapTupleIsValid(tuple))
elog(ERROR, "cache lookup failed for type %u", typeOid);
type = (Form_pg_type) GETSTRUCT(tuple);
if (strcmp(NameStr(type->typname), "intvec") == 0)
result = HNSW_TYPE_INTVEC;
else
result = HNSW_TYPE_VECTOR;
ReleaseSysCache(tuple);
return result;
}
/*
@@ -592,7 +612,14 @@ HnswLoadElement(HnswElement element, float *distance, Datum *q, Relation index,
if (DatumGetPointer(*q) == NULL)
*distance = 0;
else
{
*distance = (float) DatumGetFloat8(FunctionCall2Coll(procinfo, collation, *q, PointerGetDatum(&etup->data)));
/* Needed for intvec cosine distance */
/* TODO Improve */
if (isnan(*distance))
*distance = FLT_MAX;
}
}
UnlockReleaseBuffer(buf);

586
src/intvec.c Normal file
View File

@@ -0,0 +1,586 @@
#include "postgres.h"
#include <limits.h>
#include <math.h>
#include "catalog/pg_type.h"
#include "fmgr.h"
#include "intvec.h"
#include "lib/stringinfo.h"
#include "libpq/pqformat.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/lsyscache.h"
/*
* Ensure same dimensions
*/
static inline void
CheckDims(IntVector * a, IntVector * b)
{
if (a->dim != b->dim)
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("different intvec dimensions %d and %d", a->dim, b->dim)));
}
/*
* Ensure expected dimensions
*/
static inline void
CheckExpectedDim(int32 typmod, int dim)
{
if (typmod != -1 && typmod != dim)
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("expected %d dimensions, not %d", typmod, dim)));
}
/*
* Ensure valid dimensions
*/
static inline void
CheckDim(int dim)
{
if (dim < 1)
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("intvec must have at least 1 dimension")));
if (dim > INTVEC_MAX_DIM)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("intvec cannot have more than %d dimensions", INTVEC_MAX_DIM)));
}
/*
* Ensure element in range
*/
static inline void
CheckElement(long value)
{
if (value < SCHAR_MIN || value > SCHAR_MAX)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%ld\" is out of range for type intvec", value)));
}
/*
* Allocate and initialize a new int vector
*/
IntVector *
InitIntVector(int dim)
{
IntVector *result;
int size;
size = INTVEC_SIZE(dim);
result = (IntVector *) palloc0(size);
SET_VARSIZE(result, size);
result->dim = dim;
return result;
}
/*
* Check for whitespace, since array_isspace() is static
*/
static inline bool
intvec_isspace(char ch)
{
if (ch == ' ' ||
ch == '\t' ||
ch == '\n' ||
ch == '\r' ||
ch == '\v' ||
ch == '\f')
return true;
return false;
}
/*
* Convert textual representation to internal representation
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(intvec_in);
Datum
intvec_in(PG_FUNCTION_ARGS)
{
char *lit = PG_GETARG_CSTRING(0);
int32 typmod = PG_GETARG_INT32(2);
int8 x[INTVEC_MAX_DIM];
int dim = 0;
char *pt;
char *stringEnd;
IntVector *result;
char *litcopy = pstrdup(lit);
char *str = litcopy;
while (intvec_isspace(*str))
str++;
if (*str != '[')
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed intvec literal: \"%s\"", lit),
errdetail("Vector contents must start with \"[\".")));
str++;
pt = strtok(str, ",");
stringEnd = pt;
while (pt != NULL && *stringEnd != ']')
{
long l;
if (dim == INTVEC_MAX_DIM)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("intvec cannot have more than %d dimensions", INTVEC_MAX_DIM)));
while (intvec_isspace(*pt))
pt++;
/* Check for empty string like float4in */
if (*pt == '\0')
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type intvec: \"%s\"", lit)));
/* Use similar logic as int2vectorin */
errno = 0;
l = strtol(pt, &stringEnd, 10);
if (stringEnd == pt)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type intvec: \"%s\"", lit)));
if (errno == ERANGE || l < SCHAR_MIN || l > SCHAR_MAX)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for type intvec", pt)));
x[dim++] = l;
while (intvec_isspace(*stringEnd))
stringEnd++;
if (*stringEnd != '\0' && *stringEnd != ']')
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type intvec: \"%s\"", lit)));
pt = strtok(NULL, ",");
}
if (stringEnd == NULL || *stringEnd != ']')
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed intvec literal: \"%s\"", lit),
errdetail("Unexpected end of input.")));
stringEnd++;
/* Only whitespace is allowed after the closing brace */
while (intvec_isspace(*stringEnd))
stringEnd++;
if (*stringEnd != '\0')
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed intvec literal: \"%s\"", lit),
errdetail("Junk after closing right brace.")));
/* Ensure no consecutive delimiters since strtok skips */
for (pt = lit + 1; *pt != '\0'; pt++)
{
if (pt[-1] == ',' && *pt == ',')
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("malformed intvec literal: \"%s\"", lit)));
}
if (dim < 1)
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("intvec must have at least 1 dimension")));
pfree(litcopy);
CheckExpectedDim(typmod, dim);
result = InitIntVector(dim);
for (int i = 0; i < dim; i++)
result->x[i] = x[i];
PG_RETURN_POINTER(result);
}
/*
* Convert internal representation to textual representation
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(intvec_out);
Datum
intvec_out(PG_FUNCTION_ARGS)
{
IntVector *vector = PG_GETARG_INTVEC_P(0);
int dim = vector->dim;
char *buf;
char *ptr;
int n;
/*
* Need:
*
* dim * 4 bytes for elements (-128 to 127)
*
* dim - 1 bytes for separator
*
* 3 bytes for [, ], and \0
*/
buf = (char *) palloc(5 * dim + 2);
ptr = buf;
*ptr = '[';
ptr++;
for (int i = 0; i < dim; i++)
{
if (i > 0)
{
*ptr = ',';
ptr++;
}
n = pg_ltoa(vector->x[i], ptr);
ptr += n;
}
*ptr = ']';
ptr++;
*ptr = '\0';
PG_FREE_IF_COPY(vector, 0);
PG_RETURN_CSTRING(buf);
}
/*
* Convert type modifier
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(intvec_typmod_in);
Datum
intvec_typmod_in(PG_FUNCTION_ARGS)
{
ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
int32 *tl;
int n;
tl = ArrayGetIntegerTypmods(ta, &n);
if (n != 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid type modifier")));
if (*tl < 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("dimensions for type intvec must be at least 1")));
if (*tl > INTVEC_MAX_DIM)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("dimensions for type intvec cannot exceed %d", INTVEC_MAX_DIM)));
PG_RETURN_INT32(*tl);
}
/*
* Convert external binary representation to internal representation
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(intvec_recv);
Datum
intvec_recv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
int32 typmod = PG_GETARG_INT32(2);
IntVector *result;
int16 dim;
int16 unused;
dim = pq_getmsgint(buf, sizeof(int16));
unused = pq_getmsgint(buf, sizeof(int16));
CheckDim(dim);
CheckExpectedDim(typmod, dim);
if (unused != 0)
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("expected unused to be 0, not %d", unused)));
result = InitIntVector(dim);
for (int i = 0; i < dim; i++)
result->x[i] = pq_getmsgint(buf, sizeof(int8));
PG_RETURN_POINTER(result);
}
/*
* Convert internal representation to the external binary representation
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(intvec_send);
Datum
intvec_send(PG_FUNCTION_ARGS)
{
IntVector *vec = PG_GETARG_INTVEC_P(0);
StringInfoData buf;
pq_begintypsend(&buf);
pq_sendint(&buf, vec->dim, sizeof(int16));
pq_sendint(&buf, vec->unused, sizeof(int16));
for (int i = 0; i < vec->dim; i++)
pq_sendint8(&buf, vec->x[i]);
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
/*
* Convert int vector to int vector
* This is needed to check the type modifier
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(intvec);
Datum
intvec(PG_FUNCTION_ARGS)
{
IntVector *vec = PG_GETARG_INTVEC_P(0);
int32 typmod = PG_GETARG_INT32(1);
CheckExpectedDim(typmod, vec->dim);
PG_RETURN_POINTER(vec);
}
/*
* Convert array to intvec vector
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(array_to_intvec);
Datum
array_to_intvec(PG_FUNCTION_ARGS)
{
ArrayType *array = PG_GETARG_ARRAYTYPE_P(0);
int32 typmod = PG_GETARG_INT32(1);
Vector *result;
int16 typlen;
bool typbyval;
char typalign;
Datum *elemsp;
int nelemsp;
if (ARR_NDIM(array) > 1)
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("array must be 1-D")));
if (ARR_HASNULL(array) && array_contains_nulls(array))
ereport(ERROR,
(errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
errmsg("array must not contain nulls")));
get_typlenbyvalalign(ARR_ELEMTYPE(array), &typlen, &typbyval, &typalign);
deconstruct_array(array, ARR_ELEMTYPE(array), typlen, typbyval, typalign, &elemsp, NULL, &nelemsp);
CheckDim(nelemsp);
CheckExpectedDim(typmod, nelemsp);
result = InitVector(nelemsp);
if (ARR_ELEMTYPE(array) == INT4OID)
{
for (int i = 0; i < nelemsp; i++)
{
long l = DatumGetInt32(elemsp[i]);
CheckElement(l);
result->x[i] = l;
}
}
else
{
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("unsupported array type")));
}
/*
* Free allocation from deconstruct_array. Do not free individual elements
* when pass-by-reference since they point to original array.
*/
pfree(elemsp);
PG_RETURN_POINTER(result);
}
/*
* Get the L2 distance between int vectors
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(intvec_l2_distance);
Datum
intvec_l2_distance(PG_FUNCTION_ARGS)
{
IntVector *a = PG_GETARG_INTVEC_P(0);
IntVector *b = PG_GETARG_INTVEC_P(1);
int8 *ax = a->x;
int8 *bx = b->x;
int distance = 0;
CheckDims(a, b);
/* Auto-vectorized */
for (int i = 0; i < a->dim; i++)
{
int diff = ax[i] - bx[i];
distance += diff * diff;
}
PG_RETURN_FLOAT8(sqrt((double) distance));
}
/*
* Get the L2 squared distance between int vectors
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(intvec_l2_squared_distance);
Datum
intvec_l2_squared_distance(PG_FUNCTION_ARGS)
{
IntVector *a = PG_GETARG_INTVEC_P(0);
IntVector *b = PG_GETARG_INTVEC_P(1);
int8 *ax = a->x;
int8 *bx = b->x;
int distance = 0;
CheckDims(a, b);
/* Auto-vectorized */
for (int i = 0; i < a->dim; i++)
{
int diff = ax[i] - bx[i];
distance += diff * diff;
}
PG_RETURN_FLOAT8((double) distance);
}
/*
* Get the inner product of two int vectors
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(intvec_inner_product);
Datum
intvec_inner_product(PG_FUNCTION_ARGS)
{
IntVector *a = PG_GETARG_INTVEC_P(0);
IntVector *b = PG_GETARG_INTVEC_P(1);
int8 *ax = a->x;
int8 *bx = b->x;
int distance = 0;
CheckDims(a, b);
/* Auto-vectorized */
for (int i = 0; i < a->dim; i++)
distance += ax[i] * bx[i];
PG_RETURN_FLOAT8((double) distance);
}
/*
* Get the negative inner product of two int vectors
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(intvec_negative_inner_product);
Datum
intvec_negative_inner_product(PG_FUNCTION_ARGS)
{
IntVector *a = PG_GETARG_INTVEC_P(0);
IntVector *b = PG_GETARG_INTVEC_P(1);
int8 *ax = a->x;
int8 *bx = b->x;
int distance = 0;
CheckDims(a, b);
/* Auto-vectorized */
for (int i = 0; i < a->dim; i++)
distance += ax[i] * bx[i];
PG_RETURN_FLOAT8((double) -distance);
}
/*
* Get the cosine distance between two int vectors
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(intvec_cosine_distance);
Datum
intvec_cosine_distance(PG_FUNCTION_ARGS)
{
IntVector *a = PG_GETARG_INTVEC_P(0);
IntVector *b = PG_GETARG_INTVEC_P(1);
int8 *ax = a->x;
int8 *bx = b->x;
int distance = 0;
int norma = 0;
int normb = 0;
double similarity;
CheckDims(a, b);
/* Auto-vectorized */
for (int i = 0; i < a->dim; i++)
{
int8 axi = ax[i];
int8 bxi = bx[i];
distance += axi * bxi;
norma += axi * axi;
normb += bxi * bxi;
}
/* Use sqrt(a * b) over sqrt(a) * sqrt(b) */
similarity = (double) distance / sqrt((double) norma * (double) normb);
#ifdef _MSC_VER
/* /fp:fast may not propagate NaN */
if (isnan(similarity))
PG_RETURN_FLOAT8(NAN);
#endif
/* Keep in range */
if (similarity > 1)
similarity = 1;
else if (similarity < -1)
similarity = -1;
PG_RETURN_FLOAT8(1 - similarity);
}
/*
* Get the L1 distance between two int vectors
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(intvec_l1_distance);
Datum
intvec_l1_distance(PG_FUNCTION_ARGS)
{
IntVector *a = PG_GETARG_INTVEC_P(0);
IntVector *b = PG_GETARG_INTVEC_P(1);
int8 *ax = a->x;
int8 *bx = b->x;
int distance = 0;
CheckDims(a, b);
/* Auto-vectorized */
for (int i = 0; i < a->dim; i++)
distance += abs(ax[i] - bx[i]);
PG_RETURN_FLOAT8((double) distance);
}

23
src/intvec.h Normal file
View File

@@ -0,0 +1,23 @@
#ifndef INTVEC_H
#define INTVEC_H
#include "vector.h"
#define INTVEC_MAX_DIM VECTOR_MAX_DIM
#define INTVEC_SIZE(_dim) (offsetof(IntVector, x) + sizeof(int8)*(_dim))
#define DatumGetIntVector(x) ((IntVector *) PG_DETOAST_DATUM(x))
#define PG_GETARG_INTVEC_P(x) DatumGetIntVector(PG_GETARG_DATUM(x))
#define PG_RETURN_INTVEC_P(x) PG_RETURN_POINTER(x)
typedef struct IntVector
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int16 dim; /* number of dimensions */
int16 unused;
int8 x[FLEXIBLE_ARRAY_MEMBER];
} IntVector;
IntVector *InitIntVector(int dim);
#endif

View File

@@ -1,15 +1,15 @@
CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE TABLE t2 (val vector(3));
CREATE TABLE t (val vector(3), val2 intvec(3));
INSERT INTO t (val, val2) VALUES ('[0,0,0]', '[0,0,0]'), ('[1,2,3]', '[1,2,3]'), ('[1,1,1]', '[1,1,1]'), (NULL, NULL);
CREATE TABLE t2 (val vector(3), val2 intvec(3));
\copy t TO 'results/data.bin' WITH (FORMAT binary)
\copy t2 FROM 'results/data.bin' WITH (FORMAT binary)
SELECT * FROM t2 ORDER BY val;
val
---------
[0,0,0]
[1,1,1]
[1,2,3]
val | val2
---------+---------
[0,0,0] | [0,0,0]
[1,1,1] | [1,1,1]
[1,2,3] | [1,2,3]
|
(4 rows)
DROP TABLE t;

View File

@@ -104,105 +104,105 @@ SELECT vector_norm('[3e37,4e37]')::real;
5e+37
(1 row)
SELECT l2_distance('[0,0]', '[3,4]');
SELECT l2_distance('[0,0]'::vector, '[3,4]');
l2_distance
-------------
5
(1 row)
SELECT l2_distance('[0,0]', '[0,1]');
SELECT l2_distance('[0,0]'::vector, '[0,1]');
l2_distance
-------------
1
(1 row)
SELECT l2_distance('[1,2]', '[3]');
SELECT l2_distance('[1,2]'::vector, '[3]');
ERROR: different vector dimensions 2 and 1
SELECT l2_distance('[3e38]', '[-3e38]');
SELECT l2_distance('[3e38]'::vector, '[-3e38]');
l2_distance
-------------
Infinity
(1 row)
SELECT inner_product('[1,2]', '[3,4]');
SELECT inner_product('[1,2]'::vector, '[3,4]');
inner_product
---------------
11
(1 row)
SELECT inner_product('[1,2]', '[3]');
SELECT inner_product('[1,2]'::vector, '[3]');
ERROR: different vector dimensions 2 and 1
SELECT inner_product('[3e38]', '[3e38]');
SELECT inner_product('[3e38]'::vector, '[3e38]');
inner_product
---------------
Infinity
(1 row)
SELECT cosine_distance('[1,2]', '[2,4]');
SELECT cosine_distance('[1,2]'::vector, '[2,4]');
cosine_distance
-----------------
0
(1 row)
SELECT cosine_distance('[1,2]', '[0,0]');
SELECT cosine_distance('[1,2]'::vector, '[0,0]');
cosine_distance
-----------------
NaN
(1 row)
SELECT cosine_distance('[1,1]', '[1,1]');
SELECT cosine_distance('[1,1]'::vector, '[1,1]');
cosine_distance
-----------------
0
(1 row)
SELECT cosine_distance('[1,0]', '[0,2]');
SELECT cosine_distance('[1,0]'::vector, '[0,2]');
cosine_distance
-----------------
1
(1 row)
SELECT cosine_distance('[1,1]', '[-1,-1]');
SELECT cosine_distance('[1,1]'::vector, '[-1,-1]');
cosine_distance
-----------------
2
(1 row)
SELECT cosine_distance('[1,2]', '[3]');
SELECT cosine_distance('[1,2]'::vector, '[3]');
ERROR: different vector dimensions 2 and 1
SELECT cosine_distance('[1,1]', '[1.1,1.1]');
SELECT cosine_distance('[1,1]'::vector, '[1.1,1.1]');
cosine_distance
-----------------
0
(1 row)
SELECT cosine_distance('[1,1]', '[-1.1,-1.1]');
SELECT cosine_distance('[1,1]'::vector, '[-1.1,-1.1]');
cosine_distance
-----------------
2
(1 row)
SELECT cosine_distance('[3e38]', '[3e38]');
SELECT cosine_distance('[3e38]'::vector, '[3e38]');
cosine_distance
-----------------
NaN
(1 row)
SELECT l1_distance('[0,0]', '[3,4]');
SELECT l1_distance('[0,0]'::vector, '[3,4]');
l1_distance
-------------
7
(1 row)
SELECT l1_distance('[0,0]', '[0,1]');
SELECT l1_distance('[0,0]'::vector, '[0,1]');
l1_distance
-------------
1
(1 row)
SELECT l1_distance('[1,2]', '[3]');
SELECT l1_distance('[1,2]'::vector, '[3]');
ERROR: different vector dimensions 2 and 1
SELECT l1_distance('[3e38]', '[-3e38]');
SELECT l1_distance('[3e38]'::vector, '[-3e38]');
l1_distance
-------------
Infinity

View File

@@ -0,0 +1,27 @@
SET enable_seqscan = off;
CREATE TABLE t (val intvec(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val intvec_cosine_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <=> '[3,3,3]';
val
---------
[1,1,1]
[1,2,3]
[1,2,4]
[0,0,0]
(4 rows)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2;
count
-------
4
(1 row)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::intvec)) t2;
count
-------
4
(1 row)
DROP TABLE t;

View File

@@ -0,0 +1,21 @@
SET enable_seqscan = off;
CREATE TABLE t (val intvec(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val intvec_ip_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <#> '[3,3,3]';
val
---------
[1,2,4]
[1,2,3]
[1,1,1]
[0,0,0]
(4 rows)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::intvec)) t2;
count
-------
4
(1 row)
DROP TABLE t;

View File

@@ -0,0 +1,33 @@
SET enable_seqscan = off;
CREATE TABLE t (val intvec(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val intvec_l2_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
val
---------
[1,2,3]
[1,2,4]
[1,1,1]
[0,0,0]
(4 rows)
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::intvec)) t2;
count
-------
4
(1 row)
SELECT COUNT(*) FROM t;
count
-------
5
(1 row)
TRUNCATE t;
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
val
-----
(0 rows)
DROP TABLE t;

View File

@@ -0,0 +1,92 @@
SELECT l2_distance('[0,0]'::intvec, '[3,4]');
l2_distance
-------------
5
(1 row)
SELECT l2_distance('[0,0]'::intvec, '[0,1]');
l2_distance
-------------
1
(1 row)
SELECT l2_distance('[1,2]'::intvec, '[3]');
ERROR: different intvec dimensions 2 and 1
SELECT '[0,0]'::intvec <-> '[3,4]';
?column?
----------
5
(1 row)
SELECT inner_product('[1,2]'::intvec, '[3,4]');
inner_product
---------------
11
(1 row)
SELECT inner_product('[1,2]'::intvec, '[3]');
ERROR: different intvec dimensions 2 and 1
SELECT inner_product('[127]'::intvec, '[127]');
inner_product
---------------
16129
(1 row)
SELECT '[1,2]'::intvec <#> '[3,4]';
?column?
----------
-11
(1 row)
SELECT cosine_distance('[1,2]'::intvec, '[2,4]');
cosine_distance
-----------------
0
(1 row)
SELECT cosine_distance('[1,2]'::intvec, '[0,0]');
cosine_distance
-----------------
NaN
(1 row)
SELECT cosine_distance('[1,1]'::intvec, '[1,1]');
cosine_distance
-----------------
0
(1 row)
SELECT cosine_distance('[1,0]'::intvec, '[0,2]');
cosine_distance
-----------------
1
(1 row)
SELECT cosine_distance('[1,1]'::intvec, '[-1,-1]');
cosine_distance
-----------------
2
(1 row)
SELECT cosine_distance('[1,2]'::intvec, '[3]');
ERROR: different intvec dimensions 2 and 1
SELECT '[1,2]'::intvec <=> '[2,4]';
?column?
----------
0
(1 row)
SELECT l1_distance('[0,0]'::intvec, '[3,4]');
l1_distance
-------------
7
(1 row)
SELECT l1_distance('[0,0]'::intvec, '[0,1]');
l1_distance
-------------
1
(1 row)
SELECT l1_distance('[1,2]'::intvec, '[3]');
ERROR: different intvec dimensions 2 and 1

View File

@@ -0,0 +1,119 @@
SELECT '[1,2,3]'::intvec;
intvec
---------
[1,2,3]
(1 row)
SELECT '[-1,-2,-3]'::intvec;
intvec
------------
[-1,-2,-3]
(1 row)
SELECT ' [ 1, 2 , 3 ] '::intvec;
intvec
---------
[1,2,3]
(1 row)
SELECT '[1.23456]'::intvec;
ERROR: invalid input syntax for type intvec: "[1.23456]"
LINE 1: SELECT '[1.23456]'::intvec;
^
SELECT '[hello,1]'::intvec;
ERROR: invalid input syntax for type intvec: "[hello,1]"
LINE 1: SELECT '[hello,1]'::intvec;
^
SELECT '[127,-128]'::intvec;
intvec
------------
[127,-128]
(1 row)
SELECT '[128,-129]'::intvec;
ERROR: value "128" is out of range for type intvec
LINE 1: SELECT '[128,-129]'::intvec;
^
SELECT '[1,2,3'::intvec;
ERROR: malformed intvec literal: "[1,2,3"
LINE 1: SELECT '[1,2,3'::intvec;
^
DETAIL: Unexpected end of input.
SELECT '[1,2,3]9'::intvec;
ERROR: malformed intvec literal: "[1,2,3]9"
LINE 1: SELECT '[1,2,3]9'::intvec;
^
DETAIL: Junk after closing right brace.
SELECT '1,2,3'::intvec;
ERROR: malformed intvec literal: "1,2,3"
LINE 1: SELECT '1,2,3'::intvec;
^
DETAIL: Vector contents must start with "[".
SELECT ''::intvec;
ERROR: malformed intvec literal: ""
LINE 1: SELECT ''::intvec;
^
DETAIL: Vector contents must start with "[".
SELECT '['::intvec;
ERROR: malformed intvec literal: "["
LINE 1: SELECT '['::intvec;
^
DETAIL: Unexpected end of input.
SELECT '[,'::intvec;
ERROR: malformed intvec literal: "[,"
LINE 1: SELECT '[,'::intvec;
^
DETAIL: Unexpected end of input.
SELECT '[]'::intvec;
ERROR: intvec must have at least 1 dimension
LINE 1: SELECT '[]'::intvec;
^
SELECT '[1,]'::intvec;
ERROR: invalid input syntax for type intvec: "[1,]"
LINE 1: SELECT '[1,]'::intvec;
^
SELECT '[1a]'::intvec;
ERROR: invalid input syntax for type intvec: "[1a]"
LINE 1: SELECT '[1a]'::intvec;
^
SELECT '[1,,3]'::intvec;
ERROR: malformed intvec literal: "[1,,3]"
LINE 1: SELECT '[1,,3]'::intvec;
^
SELECT '[1, ,3]'::intvec;
ERROR: invalid input syntax for type intvec: "[1, ,3]"
LINE 1: SELECT '[1, ,3]'::intvec;
^
SELECT '[1,2,3]'::intvec(3);
intvec
---------
[1,2,3]
(1 row)
SELECT '[1,2,3]'::intvec(2);
ERROR: expected 2 dimensions, not 3
SELECT '[1,2,3]'::intvec(3, 2);
ERROR: invalid type modifier
LINE 1: SELECT '[1,2,3]'::intvec(3, 2);
^
SELECT '[1,2,3]'::intvec('a');
ERROR: invalid input syntax for type integer: "a"
LINE 1: SELECT '[1,2,3]'::intvec('a');
^
SELECT '[1,2,3]'::intvec(0);
ERROR: dimensions for type intvec must be at least 1
LINE 1: SELECT '[1,2,3]'::intvec(0);
^
SELECT '[1,2,3]'::intvec(16001);
ERROR: dimensions for type intvec cannot exceed 16000
LINE 1: SELECT '[1,2,3]'::intvec(16001);
^
SELECT unnest('{"[1,2,3]", "[4,5,6]"}'::intvec[]);
unnest
---------
[1,2,3]
[4,5,6]
(2 rows)
SELECT '{"[1,2,3]"}'::intvec(2)[];
ERROR: expected 2 dimensions, not 3

View File

@@ -1,7 +1,7 @@
CREATE TABLE t (val vector(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE TABLE t (val vector(3), val2 intvec(3));
INSERT INTO t (val, val2) VALUES ('[0,0,0]', '[0,0,0]'), ('[1,2,3]', '[1,2,3]'), ('[1,1,1]', '[1,1,1]'), (NULL, NULL);
CREATE TABLE t2 (val vector(3));
CREATE TABLE t2 (val vector(3), val2 intvec(3));
\copy t TO 'results/data.bin' WITH (FORMAT binary)
\copy t2 FROM 'results/data.bin' WITH (FORMAT binary)

View File

@@ -24,29 +24,29 @@ SELECT vector_norm('[3,4]');
SELECT vector_norm('[0,1]');
SELECT vector_norm('[3e37,4e37]')::real;
SELECT l2_distance('[0,0]', '[3,4]');
SELECT l2_distance('[0,0]', '[0,1]');
SELECT l2_distance('[1,2]', '[3]');
SELECT l2_distance('[3e38]', '[-3e38]');
SELECT l2_distance('[0,0]'::vector, '[3,4]');
SELECT l2_distance('[0,0]'::vector, '[0,1]');
SELECT l2_distance('[1,2]'::vector, '[3]');
SELECT l2_distance('[3e38]'::vector, '[-3e38]');
SELECT inner_product('[1,2]', '[3,4]');
SELECT inner_product('[1,2]', '[3]');
SELECT inner_product('[3e38]', '[3e38]');
SELECT inner_product('[1,2]'::vector, '[3,4]');
SELECT inner_product('[1,2]'::vector, '[3]');
SELECT inner_product('[3e38]'::vector, '[3e38]');
SELECT cosine_distance('[1,2]', '[2,4]');
SELECT cosine_distance('[1,2]', '[0,0]');
SELECT cosine_distance('[1,1]', '[1,1]');
SELECT cosine_distance('[1,0]', '[0,2]');
SELECT cosine_distance('[1,1]', '[-1,-1]');
SELECT cosine_distance('[1,2]', '[3]');
SELECT cosine_distance('[1,1]', '[1.1,1.1]');
SELECT cosine_distance('[1,1]', '[-1.1,-1.1]');
SELECT cosine_distance('[3e38]', '[3e38]');
SELECT cosine_distance('[1,2]'::vector, '[2,4]');
SELECT cosine_distance('[1,2]'::vector, '[0,0]');
SELECT cosine_distance('[1,1]'::vector, '[1,1]');
SELECT cosine_distance('[1,0]'::vector, '[0,2]');
SELECT cosine_distance('[1,1]'::vector, '[-1,-1]');
SELECT cosine_distance('[1,2]'::vector, '[3]');
SELECT cosine_distance('[1,1]'::vector, '[1.1,1.1]');
SELECT cosine_distance('[1,1]'::vector, '[-1.1,-1.1]');
SELECT cosine_distance('[3e38]'::vector, '[3e38]');
SELECT l1_distance('[0,0]', '[3,4]');
SELECT l1_distance('[0,0]', '[0,1]');
SELECT l1_distance('[1,2]', '[3]');
SELECT l1_distance('[3e38]', '[-3e38]');
SELECT l1_distance('[0,0]'::vector, '[3,4]');
SELECT l1_distance('[0,0]'::vector, '[0,1]');
SELECT l1_distance('[1,2]'::vector, '[3]');
SELECT l1_distance('[3e38]'::vector, '[-3e38]');
SELECT avg(v) FROM unnest(ARRAY['[1,2,3]'::vector, '[3,5,7]']) v;
SELECT avg(v) FROM unnest(ARRAY['[1,2,3]'::vector, '[3,5,7]', NULL]) v;

View File

@@ -0,0 +1,13 @@
SET enable_seqscan = off;
CREATE TABLE t (val intvec(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val intvec_cosine_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <=> '[3,3,3]';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> '[0,0,0]') t2;
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <=> (SELECT NULL::intvec)) t2;
DROP TABLE t;

View File

@@ -0,0 +1,12 @@
SET enable_seqscan = off;
CREATE TABLE t (val intvec(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val intvec_ip_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <#> '[3,3,3]';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <#> (SELECT NULL::intvec)) t2;
DROP TABLE t;

View File

@@ -0,0 +1,16 @@
SET enable_seqscan = off;
CREATE TABLE t (val intvec(3));
INSERT INTO t (val) VALUES ('[0,0,0]'), ('[1,2,3]'), ('[1,1,1]'), (NULL);
CREATE INDEX ON t USING hnsw (val intvec_l2_ops);
INSERT INTO t (val) VALUES ('[1,2,4]');
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
SELECT COUNT(*) FROM (SELECT * FROM t ORDER BY val <-> (SELECT NULL::intvec)) t2;
SELECT COUNT(*) FROM t;
TRUNCATE t;
SELECT * FROM t ORDER BY val <-> '[3,3,3]';
DROP TABLE t;

View File

@@ -0,0 +1,21 @@
SELECT l2_distance('[0,0]'::intvec, '[3,4]');
SELECT l2_distance('[0,0]'::intvec, '[0,1]');
SELECT l2_distance('[1,2]'::intvec, '[3]');
SELECT '[0,0]'::intvec <-> '[3,4]';
SELECT inner_product('[1,2]'::intvec, '[3,4]');
SELECT inner_product('[1,2]'::intvec, '[3]');
SELECT inner_product('[127]'::intvec, '[127]');
SELECT '[1,2]'::intvec <#> '[3,4]';
SELECT cosine_distance('[1,2]'::intvec, '[2,4]');
SELECT cosine_distance('[1,2]'::intvec, '[0,0]');
SELECT cosine_distance('[1,1]'::intvec, '[1,1]');
SELECT cosine_distance('[1,0]'::intvec, '[0,2]');
SELECT cosine_distance('[1,1]'::intvec, '[-1,-1]');
SELECT cosine_distance('[1,2]'::intvec, '[3]');
SELECT '[1,2]'::intvec <=> '[2,4]';
SELECT l1_distance('[0,0]'::intvec, '[3,4]');
SELECT l1_distance('[0,0]'::intvec, '[0,1]');
SELECT l1_distance('[1,2]'::intvec, '[3]');

28
test/sql/intvec_input.sql Normal file
View File

@@ -0,0 +1,28 @@
SELECT '[1,2,3]'::intvec;
SELECT '[-1,-2,-3]'::intvec;
SELECT ' [ 1, 2 , 3 ] '::intvec;
SELECT '[1.23456]'::intvec;
SELECT '[hello,1]'::intvec;
SELECT '[127,-128]'::intvec;
SELECT '[128,-129]'::intvec;
SELECT '[1,2,3'::intvec;
SELECT '[1,2,3]9'::intvec;
SELECT '1,2,3'::intvec;
SELECT ''::intvec;
SELECT '['::intvec;
SELECT '[,'::intvec;
SELECT '[]'::intvec;
SELECT '[1,]'::intvec;
SELECT '[1a]'::intvec;
SELECT '[1,,3]'::intvec;
SELECT '[1, ,3]'::intvec;
SELECT '[1,2,3]'::intvec(3);
SELECT '[1,2,3]'::intvec(2);
SELECT '[1,2,3]'::intvec(3, 2);
SELECT '[1,2,3]'::intvec('a');
SELECT '[1,2,3]'::intvec(0);
SELECT '[1,2,3]'::intvec(16001);
SELECT unnest('{"[1,2,3]", "[4,5,6]"}'::intvec[]);
SELECT '{"[1,2,3]"}'::intvec(2)[];

View File

@@ -0,0 +1,132 @@
use strict;
use warnings;
use PostgresNode;
use TestLib;
use Test::More;
my $node;
my @queries = ();
my @expected;
my $limit = 20;
my $dim = 20;
my $array_sql = join(",", ('(random() * 255)::int - 128') x $dim);
sub test_recall
{
my ($min, $operator) = @_;
my $correct = 0;
my $total = 0;
my $explain = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
EXPLAIN ANALYZE SELECT i FROM tst ORDER BY v $operator '$queries[0]' LIMIT $limit;
));
like($explain, qr/Index Scan/);
for my $i (0 .. $#queries)
{
my $actual = $node->safe_psql("postgres", qq(
SET enable_seqscan = off;
SELECT i FROM tst ORDER BY v $operator '$queries[$i]' LIMIT $limit;
));
my @actual_ids = split("\n", $actual);
my %actual_set = map { $_ => 1 } @actual_ids;
my @expected_ids = split("\n", $expected[$i]);
foreach (@expected_ids)
{
if (exists($actual_set{$_}))
{
$correct++;
}
$total++;
}
}
cmp_ok($correct / $total, ">=", $min, $operator);
}
# Initialize node
$node = get_new_node('node');
$node->init;
$node->start;
# Create table
$node->safe_psql("postgres", "CREATE EXTENSION vector;");
$node->safe_psql("postgres", "CREATE TABLE tst (i int4, v intvec($dim));");
$node->safe_psql("postgres",
"INSERT INTO tst SELECT i, ARRAY[$array_sql] FROM generate_series(1, 10000) i;"
);
# Generate queries
for (1 .. 20)
{
my @r = ();
for (1 .. $dim)
{
push(@r, int(rand(256)) - 128);
}
push(@queries, "[" . join(",", @r) . "]");
}
# Check each index type
my @operators = ("<->", "<#>", "<=>");
my @opclasses = ("intvec_l2_ops", "intvec_ip_ops", "intvec_cosine_ops");
for my $i (0 .. $#operators)
{
my $operator = $operators[$i];
my $opclass = $opclasses[$i];
# Get exact results
@expected = ();
foreach (@queries)
{
my $res = $node->safe_psql("postgres", "SELECT i FROM tst ORDER BY v $operator '$_' LIMIT $limit;");
push(@expected, $res);
}
# Build index serially
$node->safe_psql("postgres", qq(
SET max_parallel_maintenance_workers = 0;
CREATE INDEX idx ON tst USING hnsw (v $opclass);
));
# Test approximate results
my $min = 0.99;
test_recall($min, $operator);
$node->safe_psql("postgres", "DROP INDEX idx;");
# Build index in parallel in memory
my ($ret, $stdout, $stderr) = $node->psql("postgres", qq(
SET client_min_messages = DEBUG;
SET min_parallel_table_scan_size = 1;
CREATE INDEX idx ON tst USING hnsw (v $opclass);
));
is($ret, 0, $stderr);
like($stderr, qr/using \d+ parallel workers/);
# Test approximate results
test_recall($min, $operator);
$node->safe_psql("postgres", "DROP INDEX idx;");
# Build index in parallel on disk
# Set parallel_workers on table to use workers with low maintenance_work_mem
($ret, $stdout, $stderr) = $node->psql("postgres", qq(
ALTER TABLE tst SET (parallel_workers = 2);
SET client_min_messages = DEBUG;
SET maintenance_work_mem = '4MB';
CREATE INDEX idx ON tst USING hnsw (v $opclass);
ALTER TABLE tst RESET (parallel_workers);
));
is($ret, 0, $stderr);
like($stderr, qr/using \d+ parallel workers/);
like($stderr, qr/hnsw graph no longer fits into maintenance_work_mem/);
$node->safe_psql("postgres", "DROP INDEX idx;");
}
done_testing();