mirror of
https://github.com/pgvector/pgvector.git
synced 2026-07-03 19:20:56 +08:00
Added e2m5 version
This commit is contained in:
@@ -69,6 +69,11 @@ CheckElement(fp8 value)
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("NaN not allowed in minivec")));
|
||||
|
||||
if (Fp8IsInf(value))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_DATA_EXCEPTION),
|
||||
errmsg("infinite value not allowed in minivec")));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -169,7 +174,7 @@ minivec_in(PG_FUNCTION_ARGS)
|
||||
x[dim] = Float4ToFp8Unchecked(val);
|
||||
|
||||
/* Check for range error like float4in */
|
||||
if ((errno == ERANGE && isinf(val)) || (Fp8IsNan(x[dim]) && !isnan(val)))
|
||||
if ((errno == ERANGE && isinf(val)) || (Fp8IsInf(x[dim]) && !isinf(val)))
|
||||
ereport(ERROR,
|
||||
(errcode(ERRCODE_NUMERIC_VALUE_OUT_OF_RANGE),
|
||||
errmsg("\"%s\" is out of range for type minivec", pnstrdup(pt, stringEnd - pt))));
|
||||
@@ -739,7 +744,7 @@ minivec_l2_normalize(PG_FUNCTION_ARGS)
|
||||
/* Check for overflow */
|
||||
for (int i = 0; i < a->dim; i++)
|
||||
{
|
||||
if (Fp8IsNan(rx[i]))
|
||||
if (Fp8IsInf(rx[i]))
|
||||
float_overflow_error();
|
||||
}
|
||||
}
|
||||
@@ -768,18 +773,12 @@ minivec_add(PG_FUNCTION_ARGS)
|
||||
|
||||
/* Auto-vectorized */
|
||||
for (int i = 0, imax = a->dim; i < imax; i++)
|
||||
{
|
||||
#ifdef FLT16_SUPPORT
|
||||
rx[i] = ax[i] + bx[i];
|
||||
#else
|
||||
rx[i] = Float4ToFp8Unchecked(Fp8ToFloat4(ax[i]) + Fp8ToFloat4(bx[i]));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Check for overflow */
|
||||
for (int i = 0, imax = a->dim; i < imax; i++)
|
||||
{
|
||||
if (Fp8IsNan(rx[i]))
|
||||
if (Fp8IsInf(rx[i]))
|
||||
float_overflow_error();
|
||||
}
|
||||
|
||||
@@ -807,18 +806,12 @@ minivec_sub(PG_FUNCTION_ARGS)
|
||||
|
||||
/* Auto-vectorized */
|
||||
for (int i = 0, imax = a->dim; i < imax; i++)
|
||||
{
|
||||
#ifdef FLT16_SUPPORT
|
||||
rx[i] = ax[i] - bx[i];
|
||||
#else
|
||||
rx[i] = Float4ToFp8Unchecked(Fp8ToFloat4(ax[i]) - Fp8ToFloat4(bx[i]));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Check for overflow */
|
||||
for (int i = 0, imax = a->dim; i < imax; i++)
|
||||
{
|
||||
if (Fp8IsNan(rx[i]))
|
||||
if (Fp8IsInf(rx[i]))
|
||||
float_overflow_error();
|
||||
}
|
||||
|
||||
@@ -846,18 +839,12 @@ minivec_mul(PG_FUNCTION_ARGS)
|
||||
|
||||
/* Auto-vectorized */
|
||||
for (int i = 0, imax = a->dim; i < imax; i++)
|
||||
{
|
||||
#ifdef FLT16_SUPPORT
|
||||
rx[i] = ax[i] * bx[i];
|
||||
#else
|
||||
rx[i] = Float4ToFp8Unchecked(Fp8ToFloat4(ax[i]) * Fp8ToFloat4(bx[i]));
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Check for overflow and underflow */
|
||||
for (int i = 0, imax = a->dim; i < imax; i++)
|
||||
{
|
||||
if (Fp8IsNan(rx[i]))
|
||||
if (Fp8IsInf(rx[i]))
|
||||
float_overflow_error();
|
||||
|
||||
if (Fp8IsZero(rx[i]) && !(Fp8IsZero(ax[i]) || Fp8IsZero(bx[i])))
|
||||
|
||||
@@ -28,7 +28,16 @@ MiniVector *InitMiniVector(int dim);
|
||||
static inline bool
|
||||
Fp8IsNan(fp8 num)
|
||||
{
|
||||
return (num & 0x7F) == 0x7F;
|
||||
return (num & 0x7C) == 0x7C && (num & 0x7F) != 0x7C;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if fp8 is infinite
|
||||
*/
|
||||
static inline bool
|
||||
Fp8IsInf(fp8 num)
|
||||
{
|
||||
return (num & 0x7F) == 0x7C;
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -46,7 +55,7 @@ Fp8IsZero(fp8 num)
|
||||
static inline float
|
||||
Fp8ToFloat4(fp8 num)
|
||||
{
|
||||
float lookup[128] = {0, 0.00195312, 0.00390625, 0.00585938, 0.0078125, 0.00976562, 0.0117188, 0.0136719, 0.015625, 0.0175781, 0.0195312, 0.0214844, 0.0234375, 0.0253906, 0.0273438, 0.0292969, 0.03125, 0.0351562, 0.0390625, 0.0429688, 0.046875, 0.0507812, 0.0546875, 0.0585938, 0.0625, 0.0703125, 0.078125, 0.0859375, 0.09375, 0.101562, 0.109375, 0.117188, 0.125, 0.140625, 0.15625, 0.171875, 0.1875, 0.203125, 0.21875, 0.234375, 0.25, 0.28125, 0.3125, 0.34375, 0.375, 0.40625, 0.4375, 0.46875, 0.5, 0.5625, 0.625, 0.6875, 0.75, 0.8125, 0.875, 0.9375, 1, 1.125, 1.25, 1.375, 1.5, 1.625, 1.75, 1.875, 2, 2.25, 2.5, 2.75, 3, 3.25, 3.5, 3.75, 4, 4.5, 5, 5.5, 6, 6.5, 7, 7.5, 8, 9, 10, 11, 12, 13, 14, 15, 16, 18, 20, 22, 24, 26, 28, 30, 32, 36, 40, 44, 48, 52, 56, 60, 64, 72, 80, 88, 96, 104, 112, 120, 128, 144, 160, 176, 192, 208, 224, 240, 256, 288, 320, 352, 384, 416, 448, NAN};
|
||||
float lookup[128] = {0, 0.0000152587890625, 0.000030517578125, 0.0000457763671875, 0.00006103515625, 0.0000762939453125, 0.000091552734375, 0.000106812, 0.00012207, 0.000152588, 0.000183105, 0.000213623, 0.000244141, 0.000305176, 0.000366211, 0.000427246, 0.000488281, 0.000610352, 0.000732422, 0.000854492, 0.000976562, 0.0012207, 0.00146484, 0.00170898, 0.00195312, 0.00244141, 0.00292969, 0.00341797, 0.00390625, 0.00488281, 0.00585938, 0.00683594, 0.0078125, 0.00976562, 0.0117188, 0.0136719, 0.015625, 0.0195312, 0.0234375, 0.0273438, 0.03125, 0.0390625, 0.046875, 0.0546875, 0.0625, 0.078125, 0.09375, 0.109375, 0.125, 0.15625, 0.1875, 0.21875, 0.25, 0.3125, 0.375, 0.4375, 0.5, 0.625, 0.75, 0.875, 1, 1.25, 1.5, 1.75, 2, 2.5, 3, 3.5, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 448, 512, 640, 768, 896, 1024, 1280, 1536, 1792, 2048, 2560, 3072, 3584, 4096, 5120, 6144, 7168, 8192, 10240, 12288, 14336, 16384, 20480, 24576, 28672, 32768, 40960, 49152, 57344, INFINITY, NAN, NAN, NAN};
|
||||
float v = lookup[num & 0x7F];
|
||||
|
||||
return (num & 0x80) == 0x80 ? -v : v;
|
||||
@@ -77,52 +86,58 @@ Float4ToFp8Unchecked(float num)
|
||||
/* Sign */
|
||||
result = (bin & 0x80000000) >> 24;
|
||||
|
||||
if (isinf(num) || isnan(num))
|
||||
if (isinf(num))
|
||||
{
|
||||
/* Infinite */
|
||||
result |= 0x7C;
|
||||
}
|
||||
else if (isnan(num))
|
||||
{
|
||||
/* NaN */
|
||||
result |= 0x7F;
|
||||
result |= 0x7C;
|
||||
result |= mantissa >> 21;
|
||||
}
|
||||
else if (exponent > 116)
|
||||
else if (exponent > 98)
|
||||
{
|
||||
int m;
|
||||
int gr;
|
||||
int s;
|
||||
|
||||
exponent -= 127;
|
||||
s = mantissa & 0x000FFFFF;
|
||||
s = mantissa & 0x001FFFFF;
|
||||
|
||||
/* Subnormal */
|
||||
if (exponent < -6)
|
||||
if (exponent < -14)
|
||||
{
|
||||
int diff = -exponent - 6;
|
||||
int diff = -exponent - 14;
|
||||
|
||||
mantissa >>= diff;
|
||||
mantissa += 1 << (23 - diff);
|
||||
s |= mantissa & 0x000FFFFF;
|
||||
s |= mantissa & 0x001FFFFF;
|
||||
}
|
||||
|
||||
m = mantissa >> 20;
|
||||
m = mantissa >> 21;
|
||||
|
||||
/* Round */
|
||||
gr = (mantissa >> 19) % 4;
|
||||
gr = (mantissa >> 20) % 4;
|
||||
if (gr == 3 || (gr == 1 && s != 0))
|
||||
m += 1;
|
||||
|
||||
if (m == 8)
|
||||
if (m == 4)
|
||||
{
|
||||
m = 0;
|
||||
exponent += 1;
|
||||
}
|
||||
|
||||
if (exponent > 8)
|
||||
if (exponent > 16)
|
||||
{
|
||||
/* Infinite, which is NaN */
|
||||
result |= 0x7F;
|
||||
/* Infinite */
|
||||
result |= 0x7C;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (exponent >= -7)
|
||||
result |= (exponent + 7) << 3;
|
||||
if (exponent >= -14)
|
||||
result |= (exponent + 15) << 2;
|
||||
|
||||
result |= m;
|
||||
}
|
||||
@@ -139,7 +154,7 @@ Float4ToFp8(float num)
|
||||
{
|
||||
fp8 result = Float4ToFp8Unchecked(num);
|
||||
|
||||
if (unlikely(Fp8IsNan(result)) && !isnan(num))
|
||||
if (unlikely(Fp8IsInf(result)) && !isinf(num))
|
||||
{
|
||||
char *buf = palloc(FLOAT_SHORTEST_DECIMAL_LEN);
|
||||
|
||||
|
||||
Reference in New Issue
Block a user