Skip to content

Commit 9f225e9

Browse files
committed
Introduce helper SIMD functions for small byte arrays
vector8_min - helper for emulating ">=" semantics vector8_highbit_mask - used to turn the result of a vector comparison into a bitmask Masahiko Sawada Reviewed by Nathan Bossart, with additional adjustments by me Discussion: https://postgr.es/m/CAFBsxsHbBm_M22gLBO%2BAZT4mfMq3L_oX3wdKZxjeNnT7fHsYMQ%40mail.gmail.com
1 parent 60c0782 commit 9f225e9

File tree

1 file changed

+47
-0
lines changed

1 file changed

+47
-0
lines changed

src/include/port/simd.h

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ static inline bool vector8_has_le(const Vector8 v, const uint8 c);
7979
static inline bool vector8_is_highbit_set(const Vector8 v);
8080
#ifndef USE_NO_SIMD
8181
static inline bool vector32_is_highbit_set(const Vector32 v);
82+
static inline uint32 vector8_highbit_mask(const Vector8 v);
8283
#endif
8384

8485
/* arithmetic operations */
@@ -96,6 +97,7 @@ static inline Vector8 vector8_ssub(const Vector8 v1, const Vector8 v2);
9697
*/
9798
#ifndef USE_NO_SIMD
9899
static inline Vector8 vector8_eq(const Vector8 v1, const Vector8 v2);
100+
static inline Vector8 vector8_min(const Vector8 v1, const Vector8 v2);
99101
static inline Vector32 vector32_eq(const Vector32 v1, const Vector32 v2);
100102
#endif
101103

@@ -299,6 +301,36 @@ vector32_is_highbit_set(const Vector32 v)
299301
}
300302
#endif /* ! USE_NO_SIMD */
301303

304+
/*
305+
* Return a bitmask formed from the high-bit of each element.
306+
*/
307+
#ifndef USE_NO_SIMD
308+
static inline uint32
309+
vector8_highbit_mask(const Vector8 v)
310+
{
311+
#ifdef USE_SSE2
312+
return (uint32) _mm_movemask_epi8(v);
313+
#elif defined(USE_NEON)
314+
/*
315+
* Note: It would be faster to use vget_lane_u64 and vshrn_n_u16, but that
316+
* returns a uint64, making it inconvenient to combine mask values from
317+
* multiple vectors.
318+
*/
319+
static const uint8 mask[16] = {
320+
1 << 0, 1 << 1, 1 << 2, 1 << 3,
321+
1 << 4, 1 << 5, 1 << 6, 1 << 7,
322+
1 << 0, 1 << 1, 1 << 2, 1 << 3,
323+
1 << 4, 1 << 5, 1 << 6, 1 << 7,
324+
};
325+
326+
uint8x16_t masked = vandq_u8(vld1q_u8(mask), (uint8x16_t) vshrq_n_s8(v, 7));
327+
uint8x16_t maskedhi = vextq_u8(masked, masked, 8);
328+
329+
return (uint32) vaddvq_u16((uint16x8_t) vzip1q_u8(masked, maskedhi));
330+
#endif
331+
}
332+
#endif /* ! USE_NO_SIMD */
333+
302334
/*
303335
* Return the bitwise OR of the inputs
304336
*/
@@ -372,4 +404,19 @@ vector32_eq(const Vector32 v1, const Vector32 v2)
372404
}
373405
#endif /* ! USE_NO_SIMD */
374406

407+
/*
408+
* Given two vectors, return a vector with the minimum element of each.
409+
*/
410+
#ifndef USE_NO_SIMD
411+
static inline Vector8
412+
vector8_min(const Vector8 v1, const Vector8 v2)
413+
{
414+
#ifdef USE_SSE2
415+
return _mm_min_epu8(v1, v2);
416+
#elif defined(USE_NEON)
417+
return vminq_u8(v1, v2);
418+
#endif
419+
}
420+
#endif /* ! USE_NO_SIMD */
421+
375422
#endif /* SIMD_H */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy