Skip to content

Commit 8199aed

Browse files
author
Vladlen Popolitov
committed
Merge branch 'master' into relaxed
2 parents ec359c1 + 0b938f8 commit 8199aed

File tree

2 files changed

+42
-18
lines changed

2 files changed

+42
-18
lines changed

src/halfutils.c

Lines changed: 34 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -8,14 +8,14 @@
88

99
#if defined(HAVE__GET_CPUID)
1010
#include <cpuid.h>
11-
#elif defined(HAVE__CPUID)
11+
#else
1212
#include <intrin.h>
1313
#endif
1414

1515
#ifdef _MSC_VER
16-
#define TARGET_F16C_FMA
16+
#define TARGET_F16C
1717
#else
18-
#define TARGET_F16C_FMA __attribute__((target("f16c,fma")))
18+
#define TARGET_F16C __attribute__((target("avx,f16c,fma")))
1919
#endif
2020
#endif
2121

@@ -40,8 +40,8 @@ HalfvecL2SquaredDistanceDefault(int dim, half * ax, half * bx)
4040
}
4141

4242
#ifdef HALFVEC_DISPATCH
43-
TARGET_F16C_FMA static float
44-
HalfvecL2SquaredDistanceF16cFma(int dim, half * ax, half * bx)
43+
TARGET_F16C static float
44+
HalfvecL2SquaredDistanceF16c(int dim, half * ax, half * bx)
4545
{
4646
float distance;
4747
int i;
@@ -88,8 +88,8 @@ HalfvecInnerProductDefault(int dim, half * ax, half * bx)
8888
}
8989

9090
#ifdef HALFVEC_DISPATCH
91-
TARGET_F16C_FMA static float
92-
HalfvecInnerProductF16cFma(int dim, half * ax, half * bx)
91+
TARGET_F16C static float
92+
HalfvecInnerProductF16c(int dim, half * ax, half * bx)
9393
{
9494
float distance;
9595
int i;
@@ -141,8 +141,8 @@ HalfvecCosineSimilarityDefault(int dim, half * ax, half * bx)
141141
}
142142

143143
#ifdef HALFVEC_DISPATCH
144-
TARGET_F16C_FMA static double
145-
HalfvecCosineSimilarityF16cFma(int dim, half * ax, half * bx)
144+
TARGET_F16C static double
145+
HalfvecCosineSimilarityF16c(int dim, half * ax, half * bx)
146146
{
147147
float similarity;
148148
float norma;
@@ -192,20 +192,37 @@ HalfvecCosineSimilarityF16cFma(int dim, half * ax, half * bx)
192192
#endif
193193

194194
#ifdef HALFVEC_DISPATCH
195-
#define CPU_FEATURE_FMA (1 << 12)
196-
#define CPU_FEATURE_F16C (1 << 29)
195+
#define CPU_FEATURE_FMA (1 << 12)
196+
#define CPU_FEATURE_OSXSAVE (1 << 27)
197+
#define CPU_FEATURE_AVX (1 << 28)
198+
#define CPU_FEATURE_F16C (1 << 29)
199+
200+
#ifdef _MSC_VER
201+
#define TARGET_XSAVE
202+
#else
203+
#define TARGET_XSAVE __attribute__((target("xsave")))
204+
#endif
197205

198-
static bool
206+
TARGET_XSAVE static bool
199207
SupportsCpuFeature(unsigned int feature)
200208
{
201209
unsigned int exx[4] = {0, 0, 0, 0};
202210

203211
#if defined(HAVE__GET_CPUID)
204212
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
205-
#elif defined(HAVE__CPUID)
213+
#else
206214
__cpuid(exx, 1);
207215
#endif
208216

217+
/* Check OS supports XSAVE */
218+
if ((exx[2] & CPU_FEATURE_OSXSAVE) != CPU_FEATURE_OSXSAVE)
219+
return false;
220+
221+
/* Check XMM and YMM registers are enabled */
222+
if ((_xgetbv(0) & 6) != 6)
223+
return false;
224+
225+
/* Now check features */
209226
return (exx[2] & feature) == feature;
210227
}
211228
#endif
@@ -222,11 +239,11 @@ HalfvecInit(void)
222239
HalfvecCosineSimilarity = HalfvecCosineSimilarityDefault;
223240

224241
#ifdef HALFVEC_DISPATCH
225-
if (SupportsCpuFeature(CPU_FEATURE_FMA | CPU_FEATURE_F16C))
242+
if (SupportsCpuFeature(CPU_FEATURE_AVX | CPU_FEATURE_F16C | CPU_FEATURE_FMA))
226243
{
227-
HalfvecL2SquaredDistance = HalfvecL2SquaredDistanceF16cFma;
228-
HalfvecInnerProduct = HalfvecInnerProductF16cFma;
229-
HalfvecCosineSimilarity = HalfvecCosineSimilarityF16cFma;
244+
HalfvecL2SquaredDistance = HalfvecL2SquaredDistanceF16c;
245+
HalfvecInnerProduct = HalfvecInnerProductF16c;
246+
HalfvecCosineSimilarity = HalfvecCosineSimilarityF16c;
230247
}
231248
#endif
232249
}

src/vector.c

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,14 @@
3434
#define CreateStateDatums(dim) palloc(sizeof(Datum) * (dim + 1))
3535

3636
/* target_clones requires glibc */
37-
#if defined(__x86_64__) && defined(__gnu_linux__) && defined(__has_attribute) && __has_attribute(target_clones) && !defined(__FMA__)
37+
#if defined(__gnu_linux__) && defined(__has_attribute)
38+
/* Use separate line for portability */
39+
#if __has_attribute(target_clones)
40+
#define HAVE_TARGET_CLONES
41+
#endif
42+
#endif
43+
44+
#if defined(__x86_64__) && defined(HAVE_TARGET_CLONES) && !defined(__FMA__)
3845
#define VECTOR_DISPATCH __attribute__((target_clones("default", "fma")))
3946
#else
4047
#define VECTOR_DISPATCH

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy