Added tinyint type

This commit is contained in:
Andrew Kane
2023-10-05 00:42:52 -07:00
parent 6e1312ddbe
commit 4914511cf6
8 changed files with 622 additions and 4 deletions

View File

@@ -3,8 +3,8 @@ EXTVERSION = 0.5.0
MODULE_big = vector
DATA = $(wildcard sql/*--*.sql)
OBJS = src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/vector.o
HEADERS = src/vector.h
OBJS = src/hnsw.o src/hnswbuild.o src/hnswinsert.o src/hnswscan.o src/hnswutils.o src/hnswvacuum.o src/ivfbuild.o src/ivfflat.o src/ivfinsert.o src/ivfkmeans.o src/ivfscan.o src/ivfutils.o src/ivfvacuum.o src/tinyint.o src/vector.o
HEADERS = src/tinyint.h src/vector.h
TESTS = $(wildcard test/sql/*.sql)
REGRESS = $(patsubst test/sql/%.sql,%,$(TESTS))

View File

@@ -1,8 +1,8 @@
EXTENSION = vector
EXTVERSION = 0.5.0
OBJS = src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\vector.obj
HEADERS = src\vector.h
OBJS = src\hnsw.obj src\hnswbuild.obj src\hnswinsert.obj src\hnswscan.obj src\hnswutils.obj src\hnswvacuum.obj src\ivfbuild.obj src\ivfflat.obj src\ivfinsert.obj src\ivfkmeans.obj src\ivfscan.obj src\ivfutils.obj src\ivfvacuum.obj src\tinyint.obj src\vector.obj
HEADERS = src\tinyint.h src\vector.h
REGRESS = btree cast copy functions input ivfflat_cosine ivfflat_ip ivfflat_l2 ivfflat_options ivfflat_unlogged
REGRESS_OPTS = --inputdir=test --load-extension=$(EXTENSION)

View File

@@ -0,0 +1,67 @@
-- complain if script is sourced in psql, rather than via CREATE EXTENSION
\echo Use "ALTER EXTENSION vector UPDATE TO '0.5.1'" to load this file. \quit
-- tinyint
CREATE TYPE tinyint;
CREATE FUNCTION tinyint_in(cstring, oid, integer) RETURNS tinyint
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION tinyint_out(tinyint) RETURNS cstring
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION tinyint_recv(internal, oid, integer) RETURNS tinyint
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION tinyint_send(tinyint) RETURNS bytea
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE TYPE tinyint (
INPUT = tinyint_in,
OUTPUT = tinyint_out,
RECEIVE = tinyint_recv,
SEND = tinyint_send,
INTERNALLENGTH = 1,
PASSEDBYVALUE,
ALIGNMENT = char
);
CREATE FUNCTION integer_to_tinyint(integer, integer, boolean) RETURNS tinyint
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION numeric_to_tinyint(numeric, integer, boolean) RETURNS tinyint
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE CAST (integer AS tinyint)
WITH FUNCTION integer_to_tinyint(integer, integer, boolean) AS IMPLICIT;
CREATE CAST (numeric AS tinyint)
WITH FUNCTION numeric_to_tinyint(numeric, integer, boolean) AS IMPLICIT;
CREATE FUNCTION l2_distance(tinyint[], tinyint[]) RETURNS float8
AS 'MODULE_PATHNAME', 'tinyint_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION inner_product(tinyint[], tinyint[]) RETURNS float8
AS 'MODULE_PATHNAME', 'tinyint_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION cosine_distance(tinyint[], tinyint[]) RETURNS float8
AS 'MODULE_PATHNAME', 'tinyint_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION tinyint_negative_inner_product(tinyint[], tinyint[]) RETURNS float8
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE OPERATOR <-> (
LEFTARG = tinyint[], RIGHTARG = tinyint[], PROCEDURE = l2_distance,
COMMUTATOR = '<->'
);
CREATE OPERATOR <#> (
LEFTARG = tinyint[], RIGHTARG = tinyint[], PROCEDURE = tinyint_negative_inner_product,
COMMUTATOR = '<#>'
);
CREATE OPERATOR <=> (
LEFTARG = tinyint[], RIGHTARG = tinyint[], PROCEDURE = cosine_distance,
COMMUTATOR = '<=>'
);

View File

@@ -290,3 +290,68 @@ CREATE OPERATOR CLASS vector_cosine_ops
OPERATOR 1 <=> (vector, vector) FOR ORDER BY float_ops,
FUNCTION 1 vector_negative_inner_product(vector, vector),
FUNCTION 2 vector_norm(vector);
-- tinyint
CREATE TYPE tinyint;
CREATE FUNCTION tinyint_in(cstring, oid, integer) RETURNS tinyint
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION tinyint_out(tinyint) RETURNS cstring
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION tinyint_recv(internal, oid, integer) RETURNS tinyint
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION tinyint_send(tinyint) RETURNS bytea
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE TYPE tinyint (
INPUT = tinyint_in,
OUTPUT = tinyint_out,
RECEIVE = tinyint_recv,
SEND = tinyint_send,
INTERNALLENGTH = 1,
PASSEDBYVALUE,
ALIGNMENT = char
);
CREATE FUNCTION integer_to_tinyint(integer, integer, boolean) RETURNS tinyint
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION numeric_to_tinyint(numeric, integer, boolean) RETURNS tinyint
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE CAST (integer AS tinyint)
WITH FUNCTION integer_to_tinyint(integer, integer, boolean) AS IMPLICIT;
CREATE CAST (numeric AS tinyint)
WITH FUNCTION numeric_to_tinyint(numeric, integer, boolean) AS IMPLICIT;
CREATE FUNCTION l2_distance(tinyint[], tinyint[]) RETURNS float8
AS 'MODULE_PATHNAME', 'tinyint_l2_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION inner_product(tinyint[], tinyint[]) RETURNS float8
AS 'MODULE_PATHNAME', 'tinyint_inner_product' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION cosine_distance(tinyint[], tinyint[]) RETURNS float8
AS 'MODULE_PATHNAME', 'tinyint_cosine_distance' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE FUNCTION tinyint_negative_inner_product(tinyint[], tinyint[]) RETURNS float8
AS 'MODULE_PATHNAME' LANGUAGE C IMMUTABLE STRICT PARALLEL SAFE;
CREATE OPERATOR <-> (
LEFTARG = tinyint[], RIGHTARG = tinyint[], PROCEDURE = l2_distance,
COMMUTATOR = '<->'
);
CREATE OPERATOR <#> (
LEFTARG = tinyint[], RIGHTARG = tinyint[], PROCEDURE = tinyint_negative_inner_product,
COMMUTATOR = '<#>'
);
CREATE OPERATOR <=> (
LEFTARG = tinyint[], RIGHTARG = tinyint[], PROCEDURE = cosine_distance,
COMMUTATOR = '<=>'
);

293
src/tinyint.c Normal file
View File

@@ -0,0 +1,293 @@
#include "postgres.h"
#include <stdint.h>
#include "fmgr.h"
#include "lib/stringinfo.h"
#include "libpq/pqformat.h"
#include "tinyint.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/numeric.h"
/*
* Check if array is a vector
*/
static bool
ArrayIsVector(ArrayType *a)
{
return ARR_NDIM(a) == 1 && !array_contains_nulls(a);
}
/*
* Check if dimensions are the same
*/
static int
CheckDims(ArrayType *a, ArrayType *b)
{
int dima;
int dimb;
if (!ArrayIsVector(a) || !ArrayIsVector(b))
return 0;
dima = ARR_DIMS(a)[0];
dimb = ARR_DIMS(b)[0];
if (dima != dimb)
return 0;
return dima;
}
/*
* Check range
*/
static void
CheckRange(long i)
{
if (i < INT8_MIN || i > INT8_MAX)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%ld\" is out of range for type tinyint", i)));
}
/*
* Convert textual representation to internal representation
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(tinyint_in);
Datum
tinyint_in(PG_FUNCTION_ARGS)
{
char *s = PG_GETARG_CSTRING(0);
const char *ptr = s;
long i;
char *end;
/* skip leading spaces */
while (*ptr != '\0' && isspace((unsigned char) *ptr))
ptr++;
if (*ptr == '\0')
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type tinyint: \"%s\"", s)));
i = strtol(ptr, &end, 10);
ptr = end;
if (i < INT8_MIN || i > INT8_MAX)
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("value \"%s\" is out of range for type tinyint", s)));
/* allow trailing whitespace, but not other trailing chars */
while (*ptr != '\0' && isspace((unsigned char) *ptr))
ptr++;
if (*ptr != '\0')
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type tinyint: \"%s\"", s)));
PG_RETURN_INT8(i);
}
/*
* Convert internal representation to textual representation
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(tinyint_out);
Datum
tinyint_out(PG_FUNCTION_ARGS)
{
int8 num = PG_GETARG_INT8(0);
char *result = (char *) palloc(5); /* sign, 3 digits, '\0' */
pg_ltoa((int32) num, result);
PG_RETURN_CSTRING(result);
}
/*
* Convert external binary representation to internal representation
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(tinyint_recv);
Datum
tinyint_recv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
PG_RETURN_INT8((int8) pq_getmsgint(buf, sizeof(int8)));
}
/*
* Convert internal representation to the external binary representation
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(tinyint_send);
Datum
tinyint_send(PG_FUNCTION_ARGS)
{
int8 arg1 = PG_GETARG_INT8(0);
StringInfoData buf;
pq_begintypsend(&buf);
pq_sendint8(&buf, arg1);
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}
/*
* Convert integer to tinyint
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(integer_to_tinyint);
Datum
integer_to_tinyint(PG_FUNCTION_ARGS)
{
int32 i = PG_GETARG_INT32(0);
CheckRange(i);
PG_RETURN_INT8(i);
}
/*
* Convert numeric to tinyint
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(numeric_to_tinyint);
Datum
numeric_to_tinyint(PG_FUNCTION_ARGS)
{
Numeric num = PG_GETARG_NUMERIC(0);
int32 i = numeric_int4_opt_error(num, NULL);
CheckRange(i);
PG_RETURN_INT8(i);
}
/*
* Get the L2 distance between tinyint arrays
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(tinyint_l2_distance);
Datum
tinyint_l2_distance(PG_FUNCTION_ARGS)
{
ArrayType *a = PG_GETARG_ARRAYTYPE_P(0);
ArrayType *b = PG_GETARG_ARRAYTYPE_P(1);
int8 *ax = (int8 *) ARR_DATA_PTR(a);
int8 *bx = (int8 *) ARR_DATA_PTR(b);
double distance = 0.0;
int dim = CheckDims(a, b);
/* TODO Decide on error or NULL */
if (!dim)
PG_RETURN_NULL();
/* Auto-vectorized */
for (int i = 0; i < dim; i++)
{
double diff = ax[i] - bx[i];
distance += diff * diff;
}
PG_RETURN_FLOAT8(sqrt(distance));
}
/*
* Get the inner product of two tinyint arrays
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(tinyint_inner_product);
Datum
tinyint_inner_product(PG_FUNCTION_ARGS)
{
ArrayType *a = PG_GETARG_ARRAYTYPE_P(0);
ArrayType *b = PG_GETARG_ARRAYTYPE_P(1);
int8 *ax = (int8 *) ARR_DATA_PTR(a);
int8 *bx = (int8 *) ARR_DATA_PTR(b);
double distance = 0.0;
int dim = CheckDims(a, b);
/* TODO Decide on error or NULL */
if (!dim)
PG_RETURN_NULL();
/* Auto-vectorized */
for (int i = 0; i < dim; i++)
distance += ax[i] * bx[i];
PG_RETURN_FLOAT8(distance);
}
/*
* Get the negative inner product of two tinyint arrays
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(tinyint_negative_inner_product);
Datum
tinyint_negative_inner_product(PG_FUNCTION_ARGS)
{
ArrayType *a = PG_GETARG_ARRAYTYPE_P(0);
ArrayType *b = PG_GETARG_ARRAYTYPE_P(1);
int8 *ax = (int8 *) ARR_DATA_PTR(a);
int8 *bx = (int8 *) ARR_DATA_PTR(b);
double distance = 0.0;
int dim = CheckDims(a, b);
/* TODO Decide on error or NULL */
if (!dim)
PG_RETURN_NULL();
/* Auto-vectorized */
for (int i = 0; i < dim; i++)
distance += ax[i] * bx[i];
PG_RETURN_FLOAT8(distance * -1);
}
/*
* Get the cosine distance between two float2 arrays
*/
PGDLLEXPORT PG_FUNCTION_INFO_V1(tinyint_cosine_distance);
Datum
tinyint_cosine_distance(PG_FUNCTION_ARGS)
{
ArrayType *a = PG_GETARG_ARRAYTYPE_P(0);
ArrayType *b = PG_GETARG_ARRAYTYPE_P(1);
int8 *ax = (int8 *) ARR_DATA_PTR(a);
int8 *bx = (int8 *) ARR_DATA_PTR(b);
double distance = 0.0;
double norma = 0.0;
double normb = 0.0;
double similarity;
int dim = CheckDims(a, b);
/* TODO Decide on error or NULL */
if (!dim)
PG_RETURN_NULL();
/* Auto-vectorized */
for (int i = 0; i < dim; i++)
{
float axi = ax[i];
float bxi = bx[i];
distance += axi * bxi;
norma += axi * axi;
normb += bxi * bxi;
}
/* Use sqrt(a * b) over sqrt(a) * sqrt(b) */
similarity = distance / sqrt(norma * normb);
#ifdef _MSC_VER
/* /fp:fast may not propagate NaN */
if (isnan(similarity))
PG_RETURN_FLOAT8(NAN);
#endif
/* Keep in range */
if (similarity > 1)
similarity = 1;
else if (similarity < -1)
similarity = -1;
PG_RETURN_FLOAT8(1 - similarity);
}

8
src/tinyint.h Normal file
View File

@@ -0,0 +1,8 @@
#ifndef TINYINT_H
#define TINYINT_H
#define DatumGetInt8(X) ((int8) (X))
#define PG_GETARG_INT8(n) DatumGetInt8(PG_GETARG_DATUM(n))
#define PG_RETURN_INT8(x) return Int8GetDatum(x)
#endif

150
test/expected/tinyint.out Normal file
View File

@@ -0,0 +1,150 @@
SELECT '127'::tinyint;
tinyint
---------
127
(1 row)
SELECT '128'::tinyint;
ERROR: value "128" is out of range for type tinyint
LINE 1: SELECT '128'::tinyint;
^
SELECT '-128'::tinyint;
tinyint
---------
-128
(1 row)
SELECT '-129'::tinyint;
ERROR: value "-129" is out of range for type tinyint
LINE 1: SELECT '-129'::tinyint;
^
SELECT ''::tinyint;
ERROR: invalid input syntax for type tinyint: ""
LINE 1: SELECT ''::tinyint;
^
SELECT ' 1'::tinyint;
tinyint
---------
1
(1 row)
SELECT '1 '::tinyint;
tinyint
---------
1
(1 row)
SELECT '1a'::tinyint;
ERROR: invalid input syntax for type tinyint: "1a"
LINE 1: SELECT '1a'::tinyint;
^
SELECT '{1,2,3}'::tinyint[];
tinyint
---------
{1,2,3}
(1 row)
SELECT '128'::numeric::tinyint;
ERROR: value "128" is out of range for type tinyint
SELECT 'NaN'::numeric::tinyint;
ERROR: cannot convert NaN to integer
SELECT 'Infinity'::numeric::tinyint;
ERROR: cannot convert infinity to integer
SELECT l2_distance('{0,0}'::tinyint[], '{3,4}'::tinyint[]);
l2_distance
-------------
5
(1 row)
SELECT l2_distance('{0,0}'::tinyint[], '{0,1}'::tinyint[]);
l2_distance
-------------
1
(1 row)
SELECT l2_distance('{1,2}'::tinyint[], '{3}'::tinyint[]);
l2_distance
-------------
(1 row)
SELECT l2_distance('{3e38}'::tinyint[], '{-3e38}'::tinyint[]);
ERROR: invalid input syntax for type tinyint: "3e38"
LINE 1: SELECT l2_distance('{3e38}'::tinyint[], '{-3e38}'::tinyint[]...
^
SELECT '{0,0}'::tinyint[] <-> '{3,4}'::tinyint[];
?column?
----------
5
(1 row)
SELECT inner_product('{1,2}'::tinyint[], '{3,4}'::tinyint[]);
inner_product
---------------
11
(1 row)
SELECT inner_product('{1,2}'::tinyint[], '{3}'::tinyint[]);
inner_product
---------------
(1 row)
SELECT inner_product('{127}'::tinyint[], '{127}'::tinyint[]);
inner_product
---------------
16129
(1 row)
SELECT '{1,2}'::tinyint[] <#> '{3,4}'::tinyint[];
?column?
----------
-11
(1 row)
SELECT cosine_distance('{1,2}'::tinyint[], '{2,4}'::tinyint[]);
cosine_distance
-----------------
0
(1 row)
SELECT cosine_distance('{1,2}'::tinyint[], '{0,0}'::tinyint[]);
cosine_distance
-----------------
NaN
(1 row)
SELECT cosine_distance('{1,1}'::tinyint[], '{1,1}'::tinyint[]);
cosine_distance
-----------------
0
(1 row)
SELECT cosine_distance('{1,0}'::tinyint[], '{0,2}'::tinyint[]);
cosine_distance
-----------------
1
(1 row)
SELECT cosine_distance('{1,1}'::tinyint[], '{-1,-1}'::tinyint[]);
cosine_distance
-----------------
2
(1 row)
SELECT cosine_distance('{1,2}'::tinyint[], '{3}'::tinyint[]);
cosine_distance
-----------------
(1 row)
SELECT cosine_distance('{3e38}'::tinyint[], '{3e38}'::tinyint[]);
ERROR: invalid input syntax for type tinyint: "3e38"
LINE 1: SELECT cosine_distance('{3e38}'::tinyint[], '{3e38}'::tinyin...
^
SELECT '{1,2}'::tinyint[] <=> '{2,4}'::tinyint[];
?column?
----------
0
(1 row)

35
test/sql/tinyint.sql Normal file
View File

@@ -0,0 +1,35 @@
SELECT '127'::tinyint;
SELECT '128'::tinyint;
SELECT '-128'::tinyint;
SELECT '-129'::tinyint;
SELECT ''::tinyint;
SELECT ' 1'::tinyint;
SELECT '1 '::tinyint;
SELECT '1a'::tinyint;
SELECT '{1,2,3}'::tinyint[];
SELECT '128'::numeric::tinyint;
SELECT 'NaN'::numeric::tinyint;
SELECT 'Infinity'::numeric::tinyint;
SELECT l2_distance('{0,0}'::tinyint[], '{3,4}'::tinyint[]);
SELECT l2_distance('{0,0}'::tinyint[], '{0,1}'::tinyint[]);
SELECT l2_distance('{1,2}'::tinyint[], '{3}'::tinyint[]);
SELECT l2_distance('{3e38}'::tinyint[], '{-3e38}'::tinyint[]);
SELECT '{0,0}'::tinyint[] <-> '{3,4}'::tinyint[];
SELECT inner_product('{1,2}'::tinyint[], '{3,4}'::tinyint[]);
SELECT inner_product('{1,2}'::tinyint[], '{3}'::tinyint[]);
SELECT inner_product('{127}'::tinyint[], '{127}'::tinyint[]);
SELECT '{1,2}'::tinyint[] <#> '{3,4}'::tinyint[];
SELECT cosine_distance('{1,2}'::tinyint[], '{2,4}'::tinyint[]);
SELECT cosine_distance('{1,2}'::tinyint[], '{0,0}'::tinyint[]);
SELECT cosine_distance('{1,1}'::tinyint[], '{1,1}'::tinyint[]);
SELECT cosine_distance('{1,0}'::tinyint[], '{0,2}'::tinyint[]);
SELECT cosine_distance('{1,1}'::tinyint[], '{-1,-1}'::tinyint[]);
SELECT cosine_distance('{1,2}'::tinyint[], '{3}'::tinyint[]);
SELECT cosine_distance('{3e38}'::tinyint[], '{3e38}'::tinyint[]);
SELECT '{1,2}'::tinyint[] <=> '{2,4}'::tinyint[];