Added minivec type

This commit is contained in:
Andrew Kane
2024-09-23 16:45:19 -07:00
parent 87ac108bf7
commit 035a31ac91
10 changed files with 753 additions and 5 deletions

336
src/minivec.c Normal file
View File

@@ -0,0 +1,336 @@
#include "postgres.h"
#include <math.h>
#include "catalog/pg_type.h"
#include "common/shortest_dec.h"
#include "fmgr.h"
#include "minivec.h"
#include "lib/stringinfo.h"
#include "libpq/pqformat.h"
#include "port.h" /* for strtof() */
#include "sparsevec.h"
#include "utils/array.h"
#include "utils/builtins.h"
#include "utils/float.h"
#include "utils/lsyscache.h"
#include "utils/numeric.h"
#include "vector.h"
/*
* Ensure expected dimensions
*/
static inline void
CheckExpectedDim(int32 typmod, int dim)
{
if (typmod != -1 && typmod != dim)
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("expected %d dimensions, not %d", typmod, dim)));
}
/*
* Ensure valid dimensions
*/
static inline void
CheckDim(int dim)
{
if (dim < 1)
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("minivec must have at least 1 dimension")));
if (dim > MINIVEC_MAX_DIM)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("minivec cannot have more than %d dimensions", MINIVEC_MAX_DIM)));
}
/*
* Ensure finite element
*/
static inline void
CheckElement(fp8 value)
{
if (Fp8IsNan(value))
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("NaN not allowed in minivec")));
}
/*
* Allocate and initialize a new half vector
*/
MiniVector *
InitMiniVector(int dim)
{
MiniVector *result;
int size;
size = MINIVEC_SIZE(dim);
result = (MiniVector *) palloc0(size);
SET_VARSIZE(result, size);
result->dim = dim;
return result;
}
/*
* Check for whitespace, since array_isspace() is static
*/
static inline bool
minivec_isspace(char ch)
{
if (ch == ' ' ||
ch == '\t' ||
ch == '\n' ||
ch == '\r' ||
ch == '\v' ||
ch == '\f')
return true;
return false;
}
/*
* Convert textual representation to internal representation
*/
FUNCTION_PREFIX PG_FUNCTION_INFO_V1(minivec_in);
Datum
minivec_in(PG_FUNCTION_ARGS)
{
char *lit = PG_GETARG_CSTRING(0);
int32 typmod = PG_GETARG_INT32(2);
fp8 x[MINIVEC_MAX_DIM];
int dim = 0;
char *pt = lit;
MiniVector *result;
while (minivec_isspace(*pt))
pt++;
if (*pt != '[')
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type minivec: \"%s\"", lit),
errdetail("Vector contents must start with \"[\".")));
pt++;
while (minivec_isspace(*pt))
pt++;
if (*pt == ']')
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("minivec must have at least 1 dimension")));
for (;;)
{
float val;
char *stringEnd;
if (dim == MINIVEC_MAX_DIM)
ereport(ERROR,
(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
errmsg("minivec cannot have more than %d dimensions", MINIVEC_MAX_DIM)));
while (minivec_isspace(*pt))
pt++;
/* Check for empty string like float4in */
if (*pt == '\0')
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type minivec: \"%s\"", lit)));
errno = 0;
/* Postgres sets LC_NUMERIC to C on startup */
val = strtof(pt, &stringEnd);
if (stringEnd == pt)
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type minivec: \"%s\"", lit)));
x[dim] = Float4ToFp8Unchecked(val);
/* Check for range error like float4in */
if ((errno == ERANGE && isinf(val)) || (Fp8IsNan(x[dim]) && !isnan(val)))
ereport(ERROR,
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
errmsg("\"%s\" is out of range for type minivec", pnstrdup(pt, stringEnd - pt))));
CheckElement(x[dim]);
dim++;
pt = stringEnd;
while (minivec_isspace(*pt))
pt++;
if (*pt == ',')
pt++;
else if (*pt == ']')
{
pt++;
break;
}
else
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type minivec: \"%s\"", lit)));
}
/* Only whitespace is allowed after the closing brace */
while (minivec_isspace(*pt))
pt++;
if (*pt != '\0')
ereport(ERROR,
(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
errmsg("invalid input syntax for type minivec: \"%s\"", lit),
errdetail("Junk after closing right brace.")));
CheckDim(dim);
CheckExpectedDim(typmod, dim);
result = InitMiniVector(dim);
for (int i = 0; i < dim; i++)
result->x[i] = x[i];
PG_RETURN_POINTER(result);
}
#define AppendChar(ptr, c) (*(ptr)++ = (c))
#define AppendFloat(ptr, f) ((ptr) += float_to_shortest_decimal_bufn((f), (ptr)))
/*
* Convert internal representation to textual representation
*/
FUNCTION_PREFIX PG_FUNCTION_INFO_V1(minivec_out);
Datum
minivec_out(PG_FUNCTION_ARGS)
{
MiniVector *vector = PG_GETARG_MINIVEC_P(0);
int dim = vector->dim;
char *buf;
char *ptr;
/*
* Need:
*
* dim * (FLOAT_SHORTEST_DECIMAL_LEN - 1) bytes for
* float_to_shortest_decimal_bufn
*
* dim - 1 bytes for separator
*
* 3 bytes for [, ], and \0
*/
buf = (char *) palloc(FLOAT_SHORTEST_DECIMAL_LEN * dim + 2);
ptr = buf;
AppendChar(ptr, '[');
for (int i = 0; i < dim; i++)
{
if (i > 0)
AppendChar(ptr, ',');
/*
* Use shortest decimal representation of single-precision float for
* simplicity
*/
AppendFloat(ptr, Fp8ToFloat4(vector->x[i]));
}
AppendChar(ptr, ']');
*ptr = '\0';
PG_FREE_IF_COPY(vector, 0);
PG_RETURN_CSTRING(buf);
}
/*
* Convert type modifier
*/
FUNCTION_PREFIX PG_FUNCTION_INFO_V1(minivec_typmod_in);
Datum
minivec_typmod_in(PG_FUNCTION_ARGS)
{
ArrayType *ta = PG_GETARG_ARRAYTYPE_P(0);
int32 *tl;
int n;
tl = ArrayGetIntegerTypmods(ta, &n);
if (n != 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid type modifier")));
if (*tl < 1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("dimensions for type minivec must be at least 1")));
if (*tl > MINIVEC_MAX_DIM)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("dimensions for type minivec cannot exceed %d", MINIVEC_MAX_DIM)));
PG_RETURN_INT32(*tl);
}
/*
* Convert external binary representation to internal representation
*/
FUNCTION_PREFIX PG_FUNCTION_INFO_V1(minivec_recv);
Datum
minivec_recv(PG_FUNCTION_ARGS)
{
StringInfo buf = (StringInfo) PG_GETARG_POINTER(0);
int32 typmod = PG_GETARG_INT32(2);
MiniVector *result;
int16 dim;
int16 unused;
dim = pq_getmsgint(buf, sizeof(int16));
unused = pq_getmsgint(buf, sizeof(int16));
CheckDim(dim);
CheckExpectedDim(typmod, dim);
if (unused != 0)
ereport(ERROR,
(errcode(ERRCODE_DATA_EXCEPTION),
errmsg("expected unused to be 0, not %d", unused)));
result = InitMiniVector(dim);
for (int i = 0; i < dim; i++)
{
result->x[i] = pq_getmsgint(buf, sizeof(uint8));
CheckElement(result->x[i]);
}
PG_RETURN_POINTER(result);
}
/*
* Convert internal representation to the external binary representation
*/
FUNCTION_PREFIX PG_FUNCTION_INFO_V1(minivec_send);
Datum
minivec_send(PG_FUNCTION_ARGS)
{
MiniVector *vec = PG_GETARG_MINIVEC_P(0);
StringInfoData buf;
pq_begintypsend(&buf);
pq_sendint(&buf, vec->dim, sizeof(int16));
pq_sendint(&buf, vec->unused, sizeof(int16));
for (int i = 0; i < vec->dim; i++)
pq_sendint8(&buf, vec->x[i]);
PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
}

126
src/minivec.h Normal file
View File

@@ -0,0 +1,126 @@
#ifndef MINIVEC_H
#define MINIVEC_H
#include <float.h>
#define MINIVEC_MAX_DIM 16000
#define fp8 uint8
#define MINIVEC_SIZE(_dim) (offsetof(MiniVector, x) + sizeof(fp8)*(_dim))
#define DatumGetMiniVector(x) ((MiniVector *) PG_DETOAST_DATUM(x))
#define PG_GETARG_MINIVEC_P(x) DatumGetMiniVector(PG_GETARG_DATUM(x))
#define PG_RETURN_MINIVEC_P(x) PG_RETURN_POINTER(x)
typedef struct MiniVector
{
int32 vl_len_; /* varlena header (do not touch directly!) */
int16 dim; /* number of dimensions */
int16 unused; /* reserved for future use, always zero */
fp8 x[FLEXIBLE_ARRAY_MEMBER];
} MiniVector;
MiniVector *InitMiniVector(int dim);
/*
* Check if fp8 is NaN
*/
static inline bool
Fp8IsNan(fp8 num)
{
return (num & 0x7F) == 0x7F;
}
float lookup[128] = {0, 0.00195312, 0.00390625, 0.00585938, 0.0078125, 0.00976562, 0.0117188, 0.0136719, 0.015625, 0.0175781, 0.0195312, 0.0214844, 0.0234375, 0.0253906, 0.0273438, 0.0292969, 0.03125, 0.0351562, 0.0390625, 0.0429688, 0.046875, 0.0507812, 0.0546875, 0.0585938, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.101562, 0.109375, 0.117188, 0.125, 0.140625, 0.15625, 0.171875, 0.1875, 0.203125, 0.21875, 0.234375, 0.25, 0.28125, 0.3125, 0.34375, 0.375, 0.40625, 0.4375, 0.46875, 0.5, 0.5625, 0.625, 0.6875, 0.75, 0.8125, 0.875, 0.9375, 1, 1.125, 1.25, 1.375, 1.5, 1.625, 1.75, 1.875, 2, 2.25, 2.5, 2.75, 3, 3.25, 3.5, 3.75, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 26, 28, 30, 32, 36, 40, 44, 48, 52, 56, 60, 64, 72, 80, 88, 96, 104, 112, 120, 128, 144, 160, 176, 192, 208, 224, 240, 256, 288, 320, 352, 384, 416, 448, NAN};
/*
* Convert a fp8 to a float4
*/
static inline float
Fp8ToFloat4(fp8 num)
{
float v = lookup[num & 0x7F];
return (num & 0x80) == 0x80 ? -v : v;
}
/*
* Convert a float4 to a fp8
*/
static inline fp8
Float4ToFp8Unchecked(float num)
{
union
{
float f;
uint32 i;
} swapfloat;
uint32 bin;
int exponent;
int mantissa;
uint8 result;
swapfloat.f = num;
bin = swapfloat.i;
exponent = (bin & 0x7F800000) >> 23;
mantissa = bin & 0x007FFFFF;
/* Sign */
result = (bin & 0x80000000) >> 24;
if (isinf(num) || isnan(num))
{
/* NaN */
result |= 0x7F;
}
else if (exponent > 116)
{
int m;
int gr;
int s;
exponent -= 127;
s = mantissa & 0x000FFFFF;
/* Subnormal */
if (exponent < -6)
{
int diff = -exponent - 6;
mantissa >>= diff;
mantissa += 1 << (23 - diff);
s |= mantissa & 0x000FFFFF;
}
m = mantissa >> 20;
/* Round */
gr = (mantissa >> 19) % 4;
if (gr == 3 || (gr == 1 && s != 0))
m += 1;
if (m == 8)
{
m = 0;
exponent += 1;
}
if (exponent > 8)
{
/* Infinite, which is NaN */
result |= 0x7F;
}
else
{
if (exponent >= -7)
result |= (exponent + 7) << 3;
result |= m;
}
}
return result;
}
#endif