8
8
9
9
#if defined(HAVE__GET_CPUID )
10
10
#include <cpuid.h>
11
- #elif defined( HAVE__CPUID )
11
+ #else
12
12
#include <intrin.h>
13
13
#endif
14
14
15
15
#ifdef _MSC_VER
16
- #define TARGET_F16C_FMA
16
+ #define TARGET_F16C
17
17
#else
18
- #define TARGET_F16C_FMA __attribute__((target("f16c,fma")))
18
+ #define TARGET_F16C __attribute__((target("avx, f16c,fma")))
19
19
#endif
20
20
#endif
21
21
@@ -40,8 +40,8 @@ HalfvecL2SquaredDistanceDefault(int dim, half * ax, half * bx)
40
40
}
41
41
42
42
#ifdef HALFVEC_DISPATCH
43
- TARGET_F16C_FMA static float
44
- HalfvecL2SquaredDistanceF16cFma (int dim , half * ax , half * bx )
43
+ TARGET_F16C static float
44
+ HalfvecL2SquaredDistanceF16c (int dim , half * ax , half * bx )
45
45
{
46
46
float distance ;
47
47
int i ;
@@ -88,8 +88,8 @@ HalfvecInnerProductDefault(int dim, half * ax, half * bx)
88
88
}
89
89
90
90
#ifdef HALFVEC_DISPATCH
91
- TARGET_F16C_FMA static float
92
- HalfvecInnerProductF16cFma (int dim , half * ax , half * bx )
91
+ TARGET_F16C static float
92
+ HalfvecInnerProductF16c (int dim , half * ax , half * bx )
93
93
{
94
94
float distance ;
95
95
int i ;
@@ -141,8 +141,8 @@ HalfvecCosineSimilarityDefault(int dim, half * ax, half * bx)
141
141
}
142
142
143
143
#ifdef HALFVEC_DISPATCH
144
- TARGET_F16C_FMA static double
145
- HalfvecCosineSimilarityF16cFma (int dim , half * ax , half * bx )
144
+ TARGET_F16C static double
145
+ HalfvecCosineSimilarityF16c (int dim , half * ax , half * bx )
146
146
{
147
147
float similarity ;
148
148
float norma ;
@@ -192,20 +192,37 @@ HalfvecCosineSimilarityF16cFma(int dim, half * ax, half * bx)
192
192
#endif
193
193
194
194
#ifdef HALFVEC_DISPATCH
195
- #define CPU_FEATURE_FMA (1 << 12)
196
- #define CPU_FEATURE_F16C (1 << 29)
195
+ #define CPU_FEATURE_FMA (1 << 12)
196
+ #define CPU_FEATURE_OSXSAVE (1 << 27)
197
+ #define CPU_FEATURE_AVX (1 << 28)
198
+ #define CPU_FEATURE_F16C (1 << 29)
199
+
200
+ #ifdef _MSC_VER
201
+ #define TARGET_XSAVE
202
+ #else
203
+ #define TARGET_XSAVE __attribute__((target("xsave")))
204
+ #endif
197
205
198
- static bool
206
+ TARGET_XSAVE static bool
199
207
SupportsCpuFeature (unsigned int feature )
200
208
{
201
209
unsigned int exx [4 ] = {0 , 0 , 0 , 0 };
202
210
203
211
#if defined(HAVE__GET_CPUID )
204
212
__get_cpuid (1 , & exx [0 ], & exx [1 ], & exx [2 ], & exx [3 ]);
205
- #elif defined( HAVE__CPUID )
213
+ #else
206
214
__cpuid (exx , 1 );
207
215
#endif
208
216
217
+ /* Check OS supports XSAVE */
218
+ if ((exx [2 ] & CPU_FEATURE_OSXSAVE ) != CPU_FEATURE_OSXSAVE )
219
+ return false;
220
+
221
+ /* Check XMM and YMM registers are enabled */
222
+ if ((_xgetbv (0 ) & 6 ) != 6 )
223
+ return false;
224
+
225
+ /* Now check features */
209
226
return (exx [2 ] & feature ) == feature ;
210
227
}
211
228
#endif
@@ -222,11 +239,11 @@ HalfvecInit(void)
222
239
HalfvecCosineSimilarity = HalfvecCosineSimilarityDefault ;
223
240
224
241
#ifdef HALFVEC_DISPATCH
225
- if (SupportsCpuFeature (CPU_FEATURE_FMA | CPU_FEATURE_F16C ))
242
+ if (SupportsCpuFeature (CPU_FEATURE_AVX | CPU_FEATURE_F16C | CPU_FEATURE_FMA ))
226
243
{
227
- HalfvecL2SquaredDistance = HalfvecL2SquaredDistanceF16cFma ;
228
- HalfvecInnerProduct = HalfvecInnerProductF16cFma ;
229
- HalfvecCosineSimilarity = HalfvecCosineSimilarityF16cFma ;
244
+ HalfvecL2SquaredDistance = HalfvecL2SquaredDistanceF16c ;
245
+ HalfvecInnerProduct = HalfvecInnerProductF16c ;
246
+ HalfvecCosineSimilarity = HalfvecCosineSimilarityF16c ;
230
247
}
231
248
#endif
232
249
}
0 commit comments