Skip to content

Commit 598e011

Browse files
Fix code for probing availability of AVX-512.
This commit fixes a few things: * Instead of checking for CPU support of the "xsave" extension, we need to check for OS support of XGETBV instructions via the "osxsave" flag. * We must check that additional XCR0 bits are set to be sure the ZMM registers are fully enabled. * We should use the recommended ordering of steps. Specifically, we need to check that the ZMM registers are enabled prior to checking for AVX-512 via CPUID. In passing, split this code into separate functions to improve readability. Reported-by: Andrew Kane Reviewed-by: Akash Shankaran, Raghuveer Devulapalli Discussion: https://postgr.es/m/20240418024459.GA3385227%40nathanxps13
1 parent bb3ca23 commit 598e011

File tree

1 file changed

+47
-33
lines changed

1 file changed

+47
-33
lines changed

src/port/pg_popcount_avx512_choose.c

Lines changed: 47 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -34,55 +34,69 @@
3434
#ifdef TRY_POPCNT_FAST
3535

3636
/*
37-
* Returns true if the CPU supports the instructions required for the AVX-512
38-
* pg_popcount() implementation.
37+
* Does CPUID say there's support for XSAVE instructions?
3938
*/
40-
bool
41-
pg_popcount_avx512_available(void)
39+
static inline bool
40+
xsave_available(void)
4241
{
4342
unsigned int exx[4] = {0, 0, 0, 0};
4443

45-
/* Does CPUID say there's support for AVX-512 popcount instructions? */
46-
#if defined(HAVE__GET_CPUID_COUNT)
47-
__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
48-
#elif defined(HAVE__CPUIDEX)
49-
__cpuidex(exx, 7, 0);
50-
#else
51-
#error cpuid instruction not available
52-
#endif
53-
if ((exx[2] & (1 << 14)) == 0) /* avx512-vpopcntdq */
54-
return false;
55-
56-
/* Does CPUID say there's support for AVX-512 byte and word instructions? */
57-
memset(exx, 0, sizeof(exx));
58-
#if defined(HAVE__GET_CPUID_COUNT)
59-
__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
60-
#elif defined(HAVE__CPUIDEX)
61-
__cpuidex(exx, 7, 0);
62-
#else
63-
#error cpuid instruction not available
64-
#endif
65-
if ((exx[1] & (1 << 30)) == 0) /* avx512-bw */
66-
return false;
67-
68-
/* Does CPUID say there's support for XSAVE instructions? */
69-
memset(exx, 0, sizeof(exx));
7044
#if defined(HAVE__GET_CPUID)
7145
__get_cpuid(1, &exx[0], &exx[1], &exx[2], &exx[3]);
7246
#elif defined(HAVE__CPUID)
7347
__cpuid(exx, 1);
7448
#else
7549
#error cpuid instruction not available
7650
#endif
77-
if ((exx[2] & (1 << 26)) == 0) /* xsave */
78-
return false;
51+
return (exx[2] & (1 << 27)) != 0; /* osxsave */
52+
}
7953

80-
/* Does XGETBV say the ZMM registers are enabled? */
54+
/*
55+
* Does XGETBV say the ZMM registers are enabled?
56+
*
57+
* NB: Caller is responsible for verifying that xsave_available() returns true
58+
* before calling this.
59+
*/
60+
static inline bool
61+
zmm_regs_available(void)
62+
{
8163
#ifdef HAVE_XSAVE_INTRINSICS
82-
return (_xgetbv(0) & 0xe0) != 0;
64+
return (_xgetbv(0) & 0xe6) == 0xe6;
8365
#else
8466
return false;
8567
#endif
8668
}
8769

70+
/*
71+
* Does CPUID say there's support for AVX-512 popcount and byte-and-word
72+
* instructions?
73+
*/
74+
static inline bool
75+
avx512_popcnt_available(void)
76+
{
77+
unsigned int exx[4] = {0, 0, 0, 0};
78+
79+
#if defined(HAVE__GET_CPUID_COUNT)
80+
__get_cpuid_count(7, 0, &exx[0], &exx[1], &exx[2], &exx[3]);
81+
#elif defined(HAVE__CPUIDEX)
82+
__cpuidex(exx, 7, 0);
83+
#else
84+
#error cpuid instruction not available
85+
#endif
86+
return (exx[2] & (1 << 14)) != 0 && /* avx512-vpopcntdq */
87+
(exx[1] & (1 << 30)) != 0; /* avx512-bw */
88+
}
89+
90+
/*
91+
* Returns true if the CPU supports the instructions required for the AVX-512
92+
* pg_popcount() implementation.
93+
*/
94+
bool
95+
pg_popcount_avx512_available(void)
96+
{
97+
return xsave_available() &&
98+
zmm_regs_available() &&
99+
avx512_popcnt_available();
100+
}
101+
88102
#endif /* TRY_POPCNT_FAST */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy