Skip to content

Commit 0aa38db

Browse files
committed
Optimise numeric division for 3 and 4 base-NBASE digit divisors.
On platforms with 128-bit integer support, introduce a new function div_var_int64(), along the same lines as div_var_int() added in d1b307e for divisors with 1 or 2 base-NBASE digits, and use it to speed up div_var() and div_var_fast() in a similar way when the divisor has 3 or 4 base-NBASE digits. This gives significant performance gains for divisors with 9-16 decimal digits. Joel Jacobson. Discussion: https://postgr.es/m/b7a5893d-af18-4c0b-8918-96de5f1bbf39%40app.fastmail.com https://postgr.es/m/CAEZATCXGm%3DDyTq%3DFrcOqC0gPMVveKUYTaD5KRRoajrUTiWxVMw%40mail.gmail.com
1 parent 009dbde commit 0aa38db

File tree

1 file changed

+167
-0
lines changed

1 file changed

+167
-0
lines changed

src/backend/utils/adt/numeric.c

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -554,6 +554,10 @@ static void div_var_fast(const NumericVar *var1, const NumericVar *var2,
554554
NumericVar *result, int rscale, bool round);
555555
static void div_var_int(const NumericVar *var, int ival, int ival_weight,
556556
NumericVar *result, int rscale, bool round);
557+
#ifdef HAVE_INT128
558+
static void div_var_int64(const NumericVar *var, int64 ival, int ival_weight,
559+
NumericVar *result, int rscale, bool round);
560+
#endif
557561
static int select_div_scale(const NumericVar *var1, const NumericVar *var2);
558562
static void mod_var(const NumericVar *var1, const NumericVar *var2,
559563
NumericVar *result);
@@ -8484,6 +8488,9 @@ div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
84848488
/*
84858489
* If the divisor has just one or two digits, delegate to div_var_int(),
84868490
* which uses fast short division.
8491+
*
8492+
* Similarly, on platforms with 128-bit integer support, delegate to
8493+
* div_var_int64() for divisors with three or four digits.
84878494
*/
84888495
if (var2ndigits <= 2)
84898496
{
@@ -8503,6 +8510,26 @@ div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
85038510
div_var_int(var1, idivisor, idivisor_weight, result, rscale, round);
85048511
return;
85058512
}
8513+
#ifdef HAVE_INT128
8514+
if (var2ndigits <= 4)
8515+
{
8516+
int64 idivisor;
8517+
int idivisor_weight;
8518+
8519+
idivisor = var2->digits[0];
8520+
idivisor_weight = var2->weight;
8521+
for (i = 1; i < var2ndigits; i++)
8522+
{
8523+
idivisor = idivisor * NBASE + var2->digits[i];
8524+
idivisor_weight--;
8525+
}
8526+
if (var2->sign == NUMERIC_NEG)
8527+
idivisor = -idivisor;
8528+
8529+
div_var_int64(var1, idivisor, idivisor_weight, result, rscale, round);
8530+
return;
8531+
}
8532+
#endif
85068533

85078534
/*
85088535
* Otherwise, perform full long division.
@@ -8774,6 +8801,9 @@ div_var_fast(const NumericVar *var1, const NumericVar *var2,
87748801
/*
87758802
* If the divisor has just one or two digits, delegate to div_var_int(),
87768803
* which uses fast short division.
8804+
*
8805+
* Similarly, on platforms with 128-bit integer support, delegate to
8806+
* div_var_int64() for divisors with three or four digits.
87778807
*/
87788808
if (var2ndigits <= 2)
87798809
{
@@ -8793,6 +8823,26 @@ div_var_fast(const NumericVar *var1, const NumericVar *var2,
87938823
div_var_int(var1, idivisor, idivisor_weight, result, rscale, round);
87948824
return;
87958825
}
8826+
#ifdef HAVE_INT128
8827+
if (var2ndigits <= 4)
8828+
{
8829+
int64 idivisor;
8830+
int idivisor_weight;
8831+
8832+
idivisor = var2->digits[0];
8833+
idivisor_weight = var2->weight;
8834+
for (i = 1; i < var2ndigits; i++)
8835+
{
8836+
idivisor = idivisor * NBASE + var2->digits[i];
8837+
idivisor_weight--;
8838+
}
8839+
if (var2->sign == NUMERIC_NEG)
8840+
idivisor = -idivisor;
8841+
8842+
div_var_int64(var1, idivisor, idivisor_weight, result, rscale, round);
8843+
return;
8844+
}
8845+
#endif
87968846

87978847
/*
87988848
* Otherwise, perform full long division.
@@ -9182,6 +9232,123 @@ div_var_int(const NumericVar *var, int ival, int ival_weight,
91829232
}
91839233

91849234

9235+
#ifdef HAVE_INT128
9236+
/*
9237+
* div_var_int64() -
9238+
*
9239+
* Divide a numeric variable by a 64-bit integer with the specified weight.
9240+
* The quotient var / (ival * NBASE^ival_weight) is stored in result.
9241+
*
9242+
* This duplicates the logic in div_var_int(), so any changes made there
9243+
* should be made here too.
9244+
*/
9245+
static void
9246+
div_var_int64(const NumericVar *var, int64 ival, int ival_weight,
9247+
NumericVar *result, int rscale, bool round)
9248+
{
9249+
NumericDigit *var_digits = var->digits;
9250+
int var_ndigits = var->ndigits;
9251+
int res_sign;
9252+
int res_weight;
9253+
int res_ndigits;
9254+
NumericDigit *res_buf;
9255+
NumericDigit *res_digits;
9256+
uint64 divisor;
9257+
int i;
9258+
9259+
/* Guard against division by zero */
9260+
if (ival == 0)
9261+
ereport(ERROR,
9262+
errcode(ERRCODE_DIVISION_BY_ZERO),
9263+
errmsg("division by zero"));
9264+
9265+
/* Result zero check */
9266+
if (var_ndigits == 0)
9267+
{
9268+
zero_var(result);
9269+
result->dscale = rscale;
9270+
return;
9271+
}
9272+
9273+
/*
9274+
* Determine the result sign, weight and number of digits to calculate.
9275+
* The weight figured here is correct if the emitted quotient has no
9276+
* leading zero digits; otherwise strip_var() will fix things up.
9277+
*/
9278+
if (var->sign == NUMERIC_POS)
9279+
res_sign = ival > 0 ? NUMERIC_POS : NUMERIC_NEG;
9280+
else
9281+
res_sign = ival > 0 ? NUMERIC_NEG : NUMERIC_POS;
9282+
res_weight = var->weight - ival_weight;
9283+
/* The number of accurate result digits we need to produce: */
9284+
res_ndigits = res_weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS;
9285+
/* ... but always at least 1 */
9286+
res_ndigits = Max(res_ndigits, 1);
9287+
/* If rounding needed, figure one more digit to ensure correct result */
9288+
if (round)
9289+
res_ndigits++;
9290+
9291+
res_buf = digitbuf_alloc(res_ndigits + 1);
9292+
res_buf[0] = 0; /* spare digit for later rounding */
9293+
res_digits = res_buf + 1;
9294+
9295+
/*
9296+
* Now compute the quotient digits. This is the short division algorithm
9297+
* described in Knuth volume 2, section 4.3.1 exercise 16, except that we
9298+
* allow the divisor to exceed the internal base.
9299+
*
9300+
* In this algorithm, the carry from one digit to the next is at most
9301+
* divisor - 1. Therefore, while processing the next digit, carry may
9302+
* become as large as divisor * NBASE - 1, and so it requires a 128-bit
9303+
* integer if this exceeds PG_UINT64_MAX.
9304+
*/
9305+
divisor = i64abs(ival);
9306+
9307+
if (divisor <= PG_UINT64_MAX / NBASE)
9308+
{
9309+
/* carry cannot overflow 64 bits */
9310+
uint64 carry = 0;
9311+
9312+
for (i = 0; i < res_ndigits; i++)
9313+
{
9314+
carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0);
9315+
res_digits[i] = (NumericDigit) (carry / divisor);
9316+
carry = carry % divisor;
9317+
}
9318+
}
9319+
else
9320+
{
9321+
/* carry may exceed 64 bits */
9322+
uint128 carry = 0;
9323+
9324+
for (i = 0; i < res_ndigits; i++)
9325+
{
9326+
carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0);
9327+
res_digits[i] = (NumericDigit) (carry / divisor);
9328+
carry = carry % divisor;
9329+
}
9330+
}
9331+
9332+
/* Store the quotient in result */
9333+
digitbuf_free(result->buf);
9334+
result->ndigits = res_ndigits;
9335+
result->buf = res_buf;
9336+
result->digits = res_digits;
9337+
result->weight = res_weight;
9338+
result->sign = res_sign;
9339+
9340+
/* Round or truncate to target rscale (and set result->dscale) */
9341+
if (round)
9342+
round_var(result, rscale);
9343+
else
9344+
trunc_var(result, rscale);
9345+
9346+
/* Strip leading/trailing zeroes */
9347+
strip_var(result);
9348+
}
9349+
#endif
9350+
9351+
91859352
/*
91869353
* Default scale selection for division
91879354
*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy