Added CPU dispatching for L1 distance for halfvec

This commit is contained in:
Andrew Kane
2024-04-22 15:02:17 -07:00
parent d83af48e70
commit 0c9ae4b187

View File

@@ -204,6 +204,39 @@ HalfvecL1DistanceDefault(int dim, half * ax, half * bx)
return distance;
}
#ifdef HALFVEC_DISPATCH
/* Does not require FMA, but keep logic simple */
TARGET_F16C static float
HalfvecL1DistanceF16c(int dim, half * ax, half * bx)
{
float distance;
int i;
float s[8];
int count = (dim / 8) * 8;
__m256 dist = _mm256_setzero_ps();
__m256 sign = _mm256_set1_ps(-0.0);
for (i = 0; i < count; i += 8)
{
__m128i axi = _mm_loadu_si128((__m128i *) (ax + i));
__m128i bxi = _mm_loadu_si128((__m128i *) (bx + i));
__m256 axs = _mm256_cvtph_ps(axi);
__m256 bxs = _mm256_cvtph_ps(bxi);
dist = _mm256_add_ps(dist, _mm256_andnot_ps(sign, _mm256_sub_ps(axs, bxs)));
}
_mm256_storeu_ps(s, dist);
distance = s[0] + s[1] + s[2] + s[3] + s[4] + s[5] + s[6] + s[7];
for (; i < dim; i++)
distance += fabsf(HalfToFloat4(ax[i]) - HalfToFloat4(bx[i]));
return distance;
}
#endif
#ifdef HALFVEC_DISPATCH
#define CPU_FEATURE_FMA (1 << 12)
#define CPU_FEATURE_OSXSAVE (1 << 27)
@@ -258,6 +291,8 @@ HalfvecInit(void)
HalfvecL2SquaredDistance = HalfvecL2SquaredDistanceF16c;
HalfvecInnerProduct = HalfvecInnerProductF16c;
HalfvecCosineSimilarity = HalfvecCosineSimilarityF16c;
/* Does not require FMA, but keep logic simple */
HalfvecL1Distance = HalfvecL1DistanceF16c;
}
#endif
}