Skip to content

Commit d8a08db

Browse files
committed
Simplify non-native 64x64-bit multiplication in int128.h.
In the non-native code in int128_add_int64_mul_int64(), use signed 64-bit integer multiplication instead of unsigned multiplication for the first three product terms. This simplifies the code needed to add each product term to the result, leading to more compact and efficient code. The actual performance gain is quite modest, but it seems worth it to improve the code's readability. Author: Dean Rasheed <dean.a.rasheed@gmail.com> Reviewed-by: John Naylor <johncnaylorls@gmail.com> Discussion: https://postgr.es/m/CAEZATCWgBMc9ZwKMYqQpaQz2X6gaamYRB+RnMsUNcdMcL2Mj_w@mail.gmail.com
1 parent d9bb8ef commit d8a08db

File tree

1 file changed

+21
-27
lines changed

1 file changed

+21
-27
lines changed

src/include/common/int128.h

Lines changed: 21 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -110,11 +110,11 @@ int128_add_int64(INT128 *i128, int64 v)
110110
}
111111

112112
/*
113-
* INT64_AU32 extracts the most significant 32 bits of int64 as int64, while
114-
* INT64_AL32 extracts the least significant 32 bits as uint64.
113+
* INT64_HI_INT32 extracts the most significant 32 bits of int64 as int32.
114+
* INT64_LO_UINT32 extracts the least significant 32 bits as uint32.
115115
*/
116-
#define INT64_AU32(i64) ((i64) >> 32)
117-
#define INT64_AL32(i64) ((i64) & UINT64CONST(0xFFFFFFFF))
116+
#define INT64_HI_INT32(i64) ((int32) ((i64) >> 32))
117+
#define INT64_LO_UINT32(i64) ((uint32) (i64))
118118

119119
/*
120120
* Add the 128-bit product of two int64 values into an INT128 variable.
@@ -129,7 +129,7 @@ int128_add_int64_mul_int64(INT128 *i128, int64 x, int64 y)
129129
*/
130130
*i128 += (int128) x * (int128) y;
131131
#else
132-
/* INT64_AU32 must use arithmetic right shift */
132+
/* INT64_HI_INT32 must use arithmetic right shift */
133133
StaticAssertDecl(((int64) -1 >> 1) == (int64) -1,
134134
"arithmetic right shift is needed");
135135

@@ -154,33 +154,27 @@ int128_add_int64_mul_int64(INT128 *i128, int64 x, int64 y)
154154
/* No need to work hard if product must be zero */
155155
if (x != 0 && y != 0)
156156
{
157-
int64 x_u32 = INT64_AU32(x);
158-
uint64 x_l32 = INT64_AL32(x);
159-
int64 y_u32 = INT64_AU32(y);
160-
uint64 y_l32 = INT64_AL32(y);
157+
int32 x_hi = INT64_HI_INT32(x);
158+
uint32 x_lo = INT64_LO_UINT32(x);
159+
int32 y_hi = INT64_HI_INT32(y);
160+
uint32 y_lo = INT64_LO_UINT32(y);
161161
int64 tmp;
162162

163163
/* the first term */
164-
i128->hi += x_u32 * y_u32;
165-
166-
/* the second term: sign-extend it only if x is negative */
167-
tmp = x_u32 * y_l32;
168-
if (x < 0)
169-
i128->hi += INT64_AU32(tmp);
170-
else
171-
i128->hi += ((uint64) tmp) >> 32;
172-
int128_add_uint64(i128, ((uint64) INT64_AL32(tmp)) << 32);
173-
174-
/* the third term: sign-extend it only if y is negative */
175-
tmp = x_l32 * y_u32;
176-
if (y < 0)
177-
i128->hi += INT64_AU32(tmp);
178-
else
179-
i128->hi += ((uint64) tmp) >> 32;
180-
int128_add_uint64(i128, ((uint64) INT64_AL32(tmp)) << 32);
164+
i128->hi += (int64) x_hi * (int64) y_hi;
165+
166+
/* the second term: sign-extended with the sign of x */
167+
tmp = (int64) x_hi * (int64) y_lo;
168+
i128->hi += INT64_HI_INT32(tmp);
169+
int128_add_uint64(i128, ((uint64) INT64_LO_UINT32(tmp)) << 32);
170+
171+
/* the third term: sign-extended with the sign of y */
172+
tmp = (int64) x_lo * (int64) y_hi;
173+
i128->hi += INT64_HI_INT32(tmp);
174+
int128_add_uint64(i128, ((uint64) INT64_LO_UINT32(tmp)) << 32);
181175

182176
/* the fourth term: always unsigned */
183-
int128_add_uint64(i128, x_l32 * y_l32);
177+
int128_add_uint64(i128, (uint64) x_lo * (uint64) y_lo);
184178
}
185179
#endif
186180
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy