Improved halfvec dispatching code [skip ci]

This commit is contained in:
Andrew Kane
2024-04-17 20:22:14 -07:00
parent 6153e173f3
commit 0b938f8328

View File

@@ -8,14 +8,14 @@
#if defined(HAVE__GET_CPUID)
#include <cpuid.h>
#elif defined(HAVE__CPUID)
#else
#include <intrin.h>
#endif
#ifdef _MSC_VER
#define TARGET_F16C_FMA
#define TARGET_F16C
#else
#define TARGET_F16C_FMA __attribute__((target("f16c,fma")))
#define TARGET_F16C __attribute__((target("avx,f16c,fma")))
#endif
#endif
@@ -40,8 +40,8 @@ HalfvecL2SquaredDistanceDefault(int dim, half * ax, half * bx)
}
#ifdef HALFVEC_DISPATCH
TARGET_F16C_FMA static float
HalfvecL2SquaredDistanceF16cFma(int dim, half * ax, half * bx)
TARGET_F16C static float
HalfvecL2SquaredDistanceF16c(int dim, half * ax, half * bx)
{
float distance;
int i;
@@ -88,8 +88,8 @@ HalfvecInnerProductDefault(int dim, half * ax, half * bx)
}
#ifdef HALFVEC_DISPATCH
TARGET_F16C_FMA static float
HalfvecInnerProductF16cFma(int dim, half * ax, half * bx)
TARGET_F16C static float
HalfvecInnerProductF16c(int dim, half * ax, half * bx)
{
float distance;
int i;
@@ -141,8 +141,8 @@ HalfvecCosineSimilarityDefault(int dim, half * ax, half * bx)
}
#ifdef HALFVEC_DISPATCH
TARGET_F16C_FMA static double
HalfvecCosineSimilarityF16cFma(int dim, half * ax, half * bx)
TARGET_F16C static double
HalfvecCosineSimilarityF16c(int dim, half * ax, half * bx)
{
float similarity;
float norma;
@@ -210,7 +210,7 @@ SupportsCpuFeature(unsigned int feature)
#if defined(HAVE__GET_CPUID)
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
#elif defined(HAVE__CPUID)
#else
__cpuid(exx, 1);
#endif
@@ -241,9 +241,9 @@ HalfvecInit(void)
#ifdef HALFVEC_DISPATCH
if (SupportsCpuFeature(CPU_FEATURE_AVX | CPU_FEATURE_F16C | CPU_FEATURE_FMA))
{
HalfvecL2SquaredDistance = HalfvecL2SquaredDistanceF16cFma;
HalfvecInnerProduct = HalfvecInnerProductF16cFma;
HalfvecCosineSimilarity = HalfvecCosineSimilarityF16cFma;
HalfvecL2SquaredDistance = HalfvecL2SquaredDistanceF16c;
HalfvecInnerProduct = HalfvecInnerProductF16c;
HalfvecCosineSimilarity = HalfvecCosineSimilarityF16c;
}
#endif
}