Skip to content

Commit 0f96754

Browse files
committed
use rotl/rotr in 8x Montgomery mul
1 parent dd4320f commit 0f96754

File tree

1 file changed

+9
-9
lines changed

1 file changed

+9
-9
lines changed

cp-algo/util/simd.hpp

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,12 @@ namespace cp_algo {
4141
[[gnu::always_inline]] inline u64x4 low32(u64x4 x) {
4242
return x & uint32_t(-1);
4343
}
44+
[[gnu::always_inline]] inline auto rotr(auto x) {
45+
return decltype(x)(__builtin_shufflevector(u32x8(x), u32x8(x), 1, 2, 3, 0, 5, 6, 7, 4));
46+
}
47+
[[gnu::always_inline]] inline auto rotl(auto x) {
48+
return decltype(x)(__builtin_shufflevector(u32x8(x), u32x8(x), 3, 0, 1, 2, 7, 4, 5, 6));
49+
}
4450

4551
[[gnu::always_inline]] inline u64x4 montgomery_reduce(u64x4 x, uint32_t mod, uint32_t imod) {
4652
#ifdef __AVX2__
@@ -50,7 +56,7 @@ namespace cp_algo {
5056
auto x_ninv = x * imod;
5157
x += low32(x_ninv) * mod;
5258
#endif
53-
return x >> 32;
59+
return rotr(x);
5460
}
5561

5662
[[gnu::always_inline]] inline u64x4 montgomery_mul(u64x4 x, u64x4 y, uint32_t mod, uint32_t imod) {
@@ -60,16 +66,10 @@ namespace cp_algo {
6066
return montgomery_reduce(low32(x) * low32(y), mod, imod);
6167
#endif
6268
}
63-
6469
[[gnu::always_inline]] inline u32x8 montgomery_mul(u32x8 x, u32x8 y, uint32_t mod, uint32_t imod) {
65-
auto x0246 = u64x4(x);
66-
auto y0246 = u64x4(y);
67-
auto x1357 = u64x4(x) >> 32;
68-
auto y1357 = u64x4(y) >> 32;
69-
return u32x8(montgomery_mul(x0246, y0246, mod, imod)) |
70-
u32x8(montgomery_mul(x1357, y1357, mod, imod) << 32);
70+
return u32x8(montgomery_mul(u64x4(x), u64x4(y), mod, imod)) |
71+
u32x8(rotl(montgomery_mul(u64x4(rotr(x)), u64x4(rotr(y)), mod, imod)));
7172
}
72-
7373
[[gnu::always_inline]] inline dx4 rotate_right(dx4 x) {
7474
static constexpr u64x4 shuffler = {3, 0, 1, 2};
7575
return __builtin_shuffle(x, shuffler);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy