Skip to content

Commit 4864c8e

Browse files
committed
Use direct function calls for pg_popcount{32,64} on non-x86 platforms
Previously, all pg_popcount{32,64} calls were indirected through a function pointer, even though we had no fast implementation for non-x86 platforms. Instead, for those platforms use wrappers around the pg_popcount{32,64}_slow functions. Review and additional hacking by David Rowley Reviewed by Álvaro Herrera Discussion: https://www.postgresql.org/message-id/flat/CAFBsxsE7otwnfA36Ly44zZO%2Bb7AEWHRFANxR1h1kxveEV%3DghLQ%40mail.gmail.com
1 parent ea499f3 commit 4864c8e

File tree

2 files changed

+53
-27
lines changed

2 files changed

+53
-27
lines changed

src/include/port/pg_bitutils.h

Lines changed: 31 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,10 +253,40 @@ pg_ceil_log2_64(uint64 num)
253253
return pg_leftmost_one_pos64(num - 1) + 1;
254254
}
255255

256-
/* Count the number of one-bits in a uint32 or uint64 */
256+
/*
257+
* With MSVC on x86_64 builds, try using native popcnt instructions via the
258+
* __popcnt and __popcnt64 intrinsics. These don't work the same as GCC's
259+
* __builtin_popcount* intrinsic functions as they always emit popcnt
260+
* instructions.
261+
*/
262+
#if defined(_MSC_VER) && defined(_M_AMD64)
263+
#define HAVE_X86_64_POPCNTQ
264+
#endif
265+
266+
/*
267+
* On x86_64, we can use the hardware popcount instruction, but only if
268+
* we can verify that the CPU supports it via the cpuid instruction.
269+
*
270+
* Otherwise, we fall back to a hand-rolled implementation.
271+
*/
272+
#ifdef HAVE_X86_64_POPCNTQ
273+
#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)
274+
#define TRY_POPCNT_FAST 1
275+
#endif
276+
#endif
277+
278+
#ifdef TRY_POPCNT_FAST
279+
/* Attempt to use the POPCNT instruction, but perform a runtime check first */
257280
extern int (*pg_popcount32) (uint32 word);
258281
extern int (*pg_popcount64) (uint64 word);
259282

283+
#else
284+
/* Use a portable implementation -- no need for a function pointer. */
285+
extern int pg_popcount32(uint32 word);
286+
extern int pg_popcount64(uint64 word);
287+
288+
#endif /* TRY_POPCNT_FAST */
289+
260290
/* Count the number of one-bits in a byte array */
261291
extern uint64 pg_popcount(const char *buf, int bytes);
262292

src/port/pg_bitutils.c

Lines changed: 22 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -103,29 +103,6 @@ const uint8 pg_number_of_ones[256] = {
103103
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
104104
};
105105

106-
/*
107-
* With MSVC on x86_64 builds, try using native popcnt instructions via the
108-
* __popcnt and __popcnt64 intrinsics. These don't work the same as GCC's
109-
* __builtin_popcount* intrinsic functions as they always emit popcnt
110-
* instructions.
111-
*/
112-
#if defined(_MSC_VER) && defined(_M_AMD64)
113-
#define HAVE_X86_64_POPCNTQ
114-
#endif
115-
116-
/*
117-
* On x86_64, we can use the hardware popcount instruction, but only if
118-
* we can verify that the CPU supports it via the cpuid instruction.
119-
*
120-
* Otherwise, we fall back to __builtin_popcount if the compiler has that,
121-
* or a hand-rolled implementation if not.
122-
*/
123-
#ifdef HAVE_X86_64_POPCNTQ
124-
#if defined(HAVE__GET_CPUID) || defined(HAVE__CPUID)
125-
#define TRY_POPCNT_FAST 1
126-
#endif
127-
#endif
128-
129106
static int pg_popcount32_slow(uint32 word);
130107
static int pg_popcount64_slow(uint64 word);
131108

@@ -138,9 +115,6 @@ static int pg_popcount64_fast(uint64 word);
138115

139116
int (*pg_popcount32) (uint32 word) = pg_popcount32_choose;
140117
int (*pg_popcount64) (uint64 word) = pg_popcount64_choose;
141-
#else
142-
int (*pg_popcount32) (uint32 word) = pg_popcount32_slow;
143-
int (*pg_popcount64) (uint64 word) = pg_popcount64_slow;
144118
#endif /* TRY_POPCNT_FAST */
145119

146120
#ifdef TRY_POPCNT_FAST
@@ -291,6 +265,28 @@ pg_popcount64_slow(uint64 word)
291265
#endif /* HAVE__BUILTIN_POPCOUNT */
292266
}
293267

268+
#ifndef TRY_POPCNT_FAST
269+
270+
/*
271+
* When the POPCNT instruction is not available, there's no point in using
272+
* function pointers to vary the implementation between the fast and slow
273+
* method. We instead just make these actual external functions when
274+
* TRY_POPCNT_FAST is not defined. The compiler should be able to inline
275+
* the slow versions here.
276+
*/
277+
int
278+
pg_popcount32(uint32 word)
279+
{
280+
return pg_popcount32_slow(word);
281+
}
282+
283+
int
284+
pg_popcount64(uint64 word)
285+
{
286+
return pg_popcount64_slow(word);
287+
}
288+
289+
#endif /* !TRY_POPCNT_FAST */
294290

295291
/*
296292
* pg_popcount

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy