Added minivec type

2026-07-05 12:10:55 +08:00 · 2024-09-23 16:45:19 -07:00
parent 87ac108bf7
commit 035a31ac91
10 changed files with 753 additions and 5 deletions
--- a/src/minivec.c
+++ b/src/minivec.c
@@ -0,0 +1,336 @@
+#include "postgres.h"
+
+#include <math.h>
+
+#include "catalog/pg_type.h"
+#include "common/shortest_dec.h"
+#include "fmgr.h"
+#include "minivec.h"
+#include "lib/stringinfo.h"
+#include "libpq/pqformat.h"
+#include "port.h"				/* for strtof() */
+#include "sparsevec.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/float.h"
+#include "utils/lsyscache.h"
+#include "utils/numeric.h"
+#include "vector.h"
+
+/*
+ * Ensure expected dimensions
+ */
+static inline void
+CheckExpectedDim(int32 typmod, int dim)
+{
+	if (typmod != -1 && typmod != dim)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("expected %d dimensions, not %d", typmod, dim)));
+}
+
+/*
+ * Ensure valid dimensions
+ */
+static inline void
+CheckDim(int dim)
+{
+	if (dim < 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("minivec must have at least 1 dimension")));
+
+	if (dim > MINIVEC_MAX_DIM)
+		ereport(ERROR,
+				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+				 errmsg("minivec cannot have more than %d dimensions", MINIVEC_MAX_DIM)));
+}
+
+/*
+ * Ensure finite element
+ */
+static inline void
+CheckElement(fp8 value)
+{
+	if (Fp8IsNan(value))
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("NaN not allowed in minivec")));
+}
+
+/*
+ * Allocate and initialize a new half vector
+ */
+MiniVector *
+InitMiniVector(int dim)
+{
+	MiniVector *result;
+	int			size;
+
+	size = MINIVEC_SIZE(dim);
+	result = (MiniVector *) palloc0(size);
+	SET_VARSIZE(result, size);
+	result->dim = dim;
+
+	return result;
+}
+
+/*
+ * Check for whitespace, since array_isspace() is static
+ */
+static inline bool
+minivec_isspace(char ch)
+{
+	if (ch == ' ' ||
+		ch == '\t' ||
+		ch == '\n' ||
+		ch == '\r' ||
+		ch == '\v' ||
+		ch == '\f')
+		return true;
+	return false;
+}
+
+/*
+ * Convert textual representation to internal representation
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(minivec_in);
+Datum
+minivec_in(PG_FUNCTION_ARGS)
+{
+	char	   *lit = PG_GETARG_CSTRING(0);
+	int32		typmod = PG_GETARG_INT32(2);
+	fp8			x[MINIVEC_MAX_DIM];
+	int			dim = 0;
+	char	   *pt = lit;
+	MiniVector *result;
+
+	while (minivec_isspace(*pt))
+		pt++;
+
+	if (*pt != '[')
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("invalid input syntax for type minivec: \"%s\"", lit),
+				 errdetail("Vector contents must start with \"[\".")));
+
+	pt++;
+
+	while (minivec_isspace(*pt))
+		pt++;
+
+	if (*pt == ']')
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("minivec must have at least 1 dimension")));
+
+	for (;;)
+	{
+		float		val;
+		char	   *stringEnd;
+
+		if (dim == MINIVEC_MAX_DIM)
+			ereport(ERROR,
+					(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
+					 errmsg("minivec cannot have more than %d dimensions", MINIVEC_MAX_DIM)));
+
+		while (minivec_isspace(*pt))
+			pt++;
+
+		/* Check for empty string like float4in */
+		if (*pt == '\0')
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type minivec: \"%s\"", lit)));
+
+		errno = 0;
+
+		/* Postgres sets LC_NUMERIC to C on startup */
+		val = strtof(pt, &stringEnd);
+
+		if (stringEnd == pt)
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type minivec: \"%s\"", lit)));
+
+		x[dim] = Float4ToFp8Unchecked(val);
+
+		/* Check for range error like float4in */
+		if ((errno == ERANGE && isinf(val)) || (Fp8IsNan(x[dim]) && !isnan(val)))
+			ereport(ERROR,
+					(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
+					 errmsg("\"%s\" is out of range for type minivec", pnstrdup(pt, stringEnd - pt))));
+
+		CheckElement(x[dim]);
+		dim++;
+
+		pt = stringEnd;
+
+		while (minivec_isspace(*pt))
+			pt++;
+
+		if (*pt == ',')
+			pt++;
+		else if (*pt == ']')
+		{
+			pt++;
+			break;
+		}
+		else
+			ereport(ERROR,
+					(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+					 errmsg("invalid input syntax for type minivec: \"%s\"", lit)));
+	}
+
+	/* Only whitespace is allowed after the closing brace */
+	while (minivec_isspace(*pt))
+		pt++;
+
+	if (*pt != '\0')
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_TEXT_REPRESENTATION),
+				 errmsg("invalid input syntax for type minivec: \"%s\"", lit),
+				 errdetail("Junk after closing right brace.")));
+
+	CheckDim(dim);
+	CheckExpectedDim(typmod, dim);
+
+	result = InitMiniVector(dim);
+	for (int i = 0; i < dim; i++)
+		result->x[i] = x[i];
+
+	PG_RETURN_POINTER(result);
+}
+
+#define AppendChar(ptr, c) (*(ptr)++ = (c))
+#define AppendFloat(ptr, f) ((ptr) += float_to_shortest_decimal_bufn((f), (ptr)))
+
+/*
+ * Convert internal representation to textual representation
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(minivec_out);
+Datum
+minivec_out(PG_FUNCTION_ARGS)
+{
+	MiniVector *vector = PG_GETARG_MINIVEC_P(0);
+	int			dim = vector->dim;
+	char	   *buf;
+	char	   *ptr;
+
+	/*
+	 * Need:
+	 *
+	 * dim * (FLOAT_SHORTEST_DECIMAL_LEN - 1) bytes for
+	 * float_to_shortest_decimal_bufn
+	 *
+	 * dim - 1 bytes for separator
+	 *
+	 * 3 bytes for [, ], and \0
+	 */
+	buf = (char *) palloc(FLOAT_SHORTEST_DECIMAL_LEN * dim + 2);
+	ptr = buf;
+
+	AppendChar(ptr, '[');
+
+	for (int i = 0; i < dim; i++)
+	{
+		if (i > 0)
+			AppendChar(ptr, ',');
+
+		/*
+		 * Use shortest decimal representation of single-precision float for
+		 * simplicity
+		 */
+		AppendFloat(ptr, Fp8ToFloat4(vector->x[i]));
+	}
+
+	AppendChar(ptr, ']');
+	*ptr = '\0';
+
+	PG_FREE_IF_COPY(vector, 0);
+	PG_RETURN_CSTRING(buf);
+}
+
+/*
+ * Convert type modifier
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(minivec_typmod_in);
+Datum
+minivec_typmod_in(PG_FUNCTION_ARGS)
+{
+	ArrayType  *ta = PG_GETARG_ARRAYTYPE_P(0);
+	int32	   *tl;
+	int			n;
+
+	tl = ArrayGetIntegerTypmods(ta, &n);
+
+	if (n != 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("invalid type modifier")));
+
+	if (*tl < 1)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("dimensions for type minivec must be at least 1")));
+
+	if (*tl > MINIVEC_MAX_DIM)
+		ereport(ERROR,
+				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+				 errmsg("dimensions for type minivec cannot exceed %d", MINIVEC_MAX_DIM)));
+
+	PG_RETURN_INT32(*tl);
+}
+
+/*
+ * Convert external binary representation to internal representation
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(minivec_recv);
+Datum
+minivec_recv(PG_FUNCTION_ARGS)
+{
+	StringInfo	buf = (StringInfo) PG_GETARG_POINTER(0);
+	int32		typmod = PG_GETARG_INT32(2);
+	MiniVector *result;
+	int16		dim;
+	int16		unused;
+
+	dim = pq_getmsgint(buf, sizeof(int16));
+	unused = pq_getmsgint(buf, sizeof(int16));
+
+	CheckDim(dim);
+	CheckExpectedDim(typmod, dim);
+
+	if (unused != 0)
+		ereport(ERROR,
+				(errcode(ERRCODE_DATA_EXCEPTION),
+				 errmsg("expected unused to be 0, not %d", unused)));
+
+	result = InitMiniVector(dim);
+	for (int i = 0; i < dim; i++)
+	{
+		result->x[i] = pq_getmsgint(buf, sizeof(uint8));
+		CheckElement(result->x[i]);
+	}
+
+	PG_RETURN_POINTER(result);
+}
+
+/*
+ * Convert internal representation to the external binary representation
+ */
+FUNCTION_PREFIX PG_FUNCTION_INFO_V1(minivec_send);
+Datum
+minivec_send(PG_FUNCTION_ARGS)
+{
+	MiniVector *vec = PG_GETARG_MINIVEC_P(0);
+	StringInfoData buf;
+
+	pq_begintypsend(&buf);
+	pq_sendint(&buf, vec->dim, sizeof(int16));
+	pq_sendint(&buf, vec->unused, sizeof(int16));
+	for (int i = 0; i < vec->dim; i++)
+		pq_sendint8(&buf, vec->x[i]);
+
+	PG_RETURN_BYTEA_P(pq_endtypsend(&buf));
+}
--- a/src/minivec.h
+++ b/src/minivec.h
@@ -0,0 +1,126 @@
+#ifndef MINIVEC_H
+#define MINIVEC_H
+
+#include <float.h>
+
+#define MINIVEC_MAX_DIM 16000
+
+#define fp8 uint8
+
+#define MINIVEC_SIZE(_dim)		(offsetof(MiniVector, x) + sizeof(fp8)*(_dim))
+#define DatumGetMiniVector(x)		((MiniVector *) PG_DETOAST_DATUM(x))
+#define PG_GETARG_MINIVEC_P(x)	DatumGetMiniVector(PG_GETARG_DATUM(x))
+#define PG_RETURN_MINIVEC_P(x)	PG_RETURN_POINTER(x)
+
+typedef struct MiniVector
+{
+	int32		vl_len_;		/* varlena header (do not touch directly!) */
+	int16		dim;			/* number of dimensions */
+	int16		unused;			/* reserved for future use, always zero */
+	fp8			x[FLEXIBLE_ARRAY_MEMBER];
+}			MiniVector;
+
+MiniVector *InitMiniVector(int dim);
+
+/*
+ * Check if fp8 is NaN
+ */
+static inline bool
+Fp8IsNan(fp8 num)
+{
+	return (num & 0x7F) == 0x7F;
+}
+
+float		lookup[128] = {0, 0.00195312, 0.00390625, 0.00585938, 0.0078125, 0.00976562, 0.0117188, 0.0136719, 0.015625, 0.0175781, 0.0195312, 0.0214844, 0.0234375, 0.0253906, 0.0273438, 0.0292969, 0.03125, 0.0351562, 0.0390625, 0.0429688, 0.046875, 0.0507812, 0.0546875, 0.0585938, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.101562, 0.109375, 0.117188, 0.125, 0.140625, 0.15625, 0.171875, 0.1875, 0.203125, 0.21875, 0.234375, 0.25, 0.28125, 0.3125, 0.34375, 0.375, 0.40625, 0.4375, 0.46875, 0.5, 0.5625, 0.625, 0.6875, 0.75, 0.8125, 0.875, 0.9375, 1, 1.125, 1.25, 1.375, 1.5, 1.625, 1.75, 1.875, 2, 2.25, 2.5, 2.75, 3, 3.25, 3.5, 3.75, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 26, 28, 30, 32, 36, 40, 44, 48, 52, 56, 60, 64, 72, 80, 88, 96, 104, 112, 120, 128, 144, 160, 176, 192, 208, 224, 240, 256, 288, 320, 352, 384, 416, 448, NAN};
+
+/*
+ * Convert a fp8 to a float4
+ */
+static inline float
+Fp8ToFloat4(fp8 num)
+{
+	float		v = lookup[num & 0x7F];
+
+	return (num & 0x80) == 0x80 ? -v : v;
+}
+
+/*
+ * Convert a float4 to a fp8
+ */
+static inline fp8
+Float4ToFp8Unchecked(float num)
+{
+	union
+	{
+		float		f;
+		uint32		i;
+	}			swapfloat;
+
+	uint32		bin;
+	int			exponent;
+	int			mantissa;
+	uint8		result;
+
+	swapfloat.f = num;
+	bin = swapfloat.i;
+	exponent = (bin & 0x7F800000) >> 23;
+	mantissa = bin & 0x007FFFFF;
+
+	/* Sign */
+	result = (bin & 0x80000000) >> 24;
+
+	if (isinf(num) || isnan(num))
+	{
+		/* NaN */
+		result |= 0x7F;
+	}
+	else if (exponent > 116)
+	{
+		int			m;
+		int			gr;
+		int			s;
+
+		exponent -= 127;
+		s = mantissa & 0x000FFFFF;
+
+		/* Subnormal */
+		if (exponent < -6)
+		{
+			int			diff = -exponent - 6;
+
+			mantissa >>= diff;
+			mantissa += 1 << (23 - diff);
+			s |= mantissa & 0x000FFFFF;
+		}
+
+		m = mantissa >> 20;
+
+		/* Round */
+		gr = (mantissa >> 19) % 4;
+		if (gr == 3 || (gr == 1 && s != 0))
+			m += 1;
+
+		if (m == 8)
+		{
+			m = 0;
+			exponent += 1;
+		}
+
+		if (exponent > 8)
+		{
+			/* Infinite, which is NaN */
+			result |= 0x7F;
+		}
+		else
+		{
+			if (exponent >= -7)
+				result |= (exponent + 7) << 3;
+
+			result |= m;
+		}
+	}
+
+	return result;
+}
+
+#endif