Skip to content

Commit d1b307e

Browse files
committed
Optimise numeric division for one and two base-NBASE digit divisors.
Formerly div_var() had "fast path" short division code that was significantly faster when the divisor was just one base-NBASE digit, but otherwise used long division. This commit adds a new function div_var_int() that divides by an arbitrary 32-bit integer, using the fast short division algorithm, and updates both div_var() and div_var_fast() to use it for one and two digit divisors. In the case of div_var(), this is slightly faster in the one-digit case, because it avoids some digit array copying, and is much faster in the two-digit case where it replaces long division. For div_var_fast(), it is much faster in both cases because the main div_var_fast() algorithm is optimised for larger inputs. Additionally, optimise exp() and ln() by using div_var_int(), allowing a NumericVar to be replaced by an int in a couple of places, most notably in the Taylor series code. This produces a significant speedup of exp(), ln() and the numeric_big regression test. Dean Rasheed, reviewed by Tom Lane. Discussion: https://postgr.es/m/CAEZATCVwsBi-ND-t82Cuuh1=8ee6jdOpzsmGN+CUZB6yjLg9jw@mail.gmail.com
1 parent d996d64 commit d1b307e

File tree

1 file changed

+180
-43
lines changed

1 file changed

+180
-43
lines changed

src/backend/utils/adt/numeric.c

Lines changed: 180 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -551,6 +551,8 @@ static void div_var(const NumericVar *var1, const NumericVar *var2,
551551
int rscale, bool round);
552552
static void div_var_fast(const NumericVar *var1, const NumericVar *var2,
553553
NumericVar *result, int rscale, bool round);
554+
static void div_var_int(const NumericVar *var, int ival, int ival_weight,
555+
NumericVar *result, int rscale, bool round);
554556
static int select_div_scale(const NumericVar *var1, const NumericVar *var2);
555557
static void mod_var(const NumericVar *var1, const NumericVar *var2,
556558
NumericVar *result);
@@ -8451,8 +8453,33 @@ div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
84518453
errmsg("division by zero")));
84528454

84538455
/*
8454-
* Now result zero check
8456+
* If the divisor has just one or two digits, delegate to div_var_int(),
8457+
* which uses fast short division.
84558458
*/
8459+
if (var2ndigits <= 2)
8460+
{
8461+
int idivisor;
8462+
int idivisor_weight;
8463+
8464+
idivisor = var2->digits[0];
8465+
idivisor_weight = var2->weight;
8466+
if (var2ndigits == 2)
8467+
{
8468+
idivisor = idivisor * NBASE + var2->digits[1];
8469+
idivisor_weight--;
8470+
}
8471+
if (var2->sign == NUMERIC_NEG)
8472+
idivisor = -idivisor;
8473+
8474+
div_var_int(var1, idivisor, idivisor_weight, result, rscale, round);
8475+
return;
8476+
}
8477+
8478+
/*
8479+
* Otherwise, perform full long division.
8480+
*/
8481+
8482+
/* Result zero check */
84568483
if (var1ndigits == 0)
84578484
{
84588485
zero_var(result);
@@ -8510,23 +8537,6 @@ div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
85108537
alloc_var(result, res_ndigits);
85118538
res_digits = result->digits;
85128539

8513-
if (var2ndigits == 1)
8514-
{
8515-
/*
8516-
* If there's only a single divisor digit, we can use a fast path (cf.
8517-
* Knuth section 4.3.1 exercise 16).
8518-
*/
8519-
divisor1 = divisor[1];
8520-
carry = 0;
8521-
for (i = 0; i < res_ndigits; i++)
8522-
{
8523-
carry = carry * NBASE + dividend[i + 1];
8524-
res_digits[i] = carry / divisor1;
8525-
carry = carry % divisor1;
8526-
}
8527-
}
8528-
else
8529-
{
85308540
/*
85318541
* The full multiple-place algorithm is taken from Knuth volume 2,
85328542
* Algorithm 4.3.1D.
@@ -8659,7 +8669,6 @@ div_var(const NumericVar *var1, const NumericVar *var2, NumericVar *result,
86598669
/* And we're done with this quotient digit */
86608670
res_digits[j] = qhat;
86618671
}
8662-
}
86638672

86648673
pfree(dividend);
86658674

@@ -8735,8 +8744,33 @@ div_var_fast(const NumericVar *var1, const NumericVar *var2,
87358744
errmsg("division by zero")));
87368745

87378746
/*
8738-
* Now result zero check
8747+
* If the divisor has just one or two digits, delegate to div_var_int(),
8748+
* which uses fast short division.
87398749
*/
8750+
if (var2ndigits <= 2)
8751+
{
8752+
int idivisor;
8753+
int idivisor_weight;
8754+
8755+
idivisor = var2->digits[0];
8756+
idivisor_weight = var2->weight;
8757+
if (var2ndigits == 2)
8758+
{
8759+
idivisor = idivisor * NBASE + var2->digits[1];
8760+
idivisor_weight--;
8761+
}
8762+
if (var2->sign == NUMERIC_NEG)
8763+
idivisor = -idivisor;
8764+
8765+
div_var_int(var1, idivisor, idivisor_weight, result, rscale, round);
8766+
return;
8767+
}
8768+
8769+
/*
8770+
* Otherwise, perform full long division.
8771+
*/
8772+
8773+
/* Result zero check */
87408774
if (var1ndigits == 0)
87418775
{
87428776
zero_var(result);
@@ -9008,6 +9042,118 @@ div_var_fast(const NumericVar *var1, const NumericVar *var2,
90089042
}
90099043

90109044

9045+
/*
9046+
* div_var_int() -
9047+
*
9048+
* Divide a numeric variable by a 32-bit integer with the specified weight.
9049+
* The quotient var / (ival * NBASE^ival_weight) is stored in result.
9050+
*/
9051+
static void
9052+
div_var_int(const NumericVar *var, int ival, int ival_weight,
9053+
NumericVar *result, int rscale, bool round)
9054+
{
9055+
NumericDigit *var_digits = var->digits;
9056+
int var_ndigits = var->ndigits;
9057+
int res_sign;
9058+
int res_weight;
9059+
int res_ndigits;
9060+
NumericDigit *res_buf;
9061+
NumericDigit *res_digits;
9062+
uint32 divisor;
9063+
int i;
9064+
9065+
/* Guard against division by zero */
9066+
if (ival == 0)
9067+
ereport(ERROR,
9068+
errcode(ERRCODE_DIVISION_BY_ZERO),
9069+
errmsg("division by zero"));
9070+
9071+
/* Result zero check */
9072+
if (var_ndigits == 0)
9073+
{
9074+
zero_var(result);
9075+
result->dscale = rscale;
9076+
return;
9077+
}
9078+
9079+
/*
9080+
* Determine the result sign, weight and number of digits to calculate.
9081+
* The weight figured here is correct if the emitted quotient has no
9082+
* leading zero digits; otherwise strip_var() will fix things up.
9083+
*/
9084+
if (var->sign == NUMERIC_POS)
9085+
res_sign = ival > 0 ? NUMERIC_POS : NUMERIC_NEG;
9086+
else
9087+
res_sign = ival > 0 ? NUMERIC_NEG : NUMERIC_POS;
9088+
res_weight = var->weight - ival_weight;
9089+
/* The number of accurate result digits we need to produce: */
9090+
res_ndigits = res_weight + 1 + (rscale + DEC_DIGITS - 1) / DEC_DIGITS;
9091+
/* ... but always at least 1 */
9092+
res_ndigits = Max(res_ndigits, 1);
9093+
/* If rounding needed, figure one more digit to ensure correct result */
9094+
if (round)
9095+
res_ndigits++;
9096+
9097+
res_buf = digitbuf_alloc(res_ndigits + 1);
9098+
res_buf[0] = 0; /* spare digit for later rounding */
9099+
res_digits = res_buf + 1;
9100+
9101+
/*
9102+
* Now compute the quotient digits. This is the short division algorithm
9103+
* described in Knuth volume 2, section 4.3.1 exercise 16, except that we
9104+
* allow the divisor to exceed the internal base.
9105+
*
9106+
* In this algorithm, the carry from one digit to the next is at most
9107+
* divisor - 1. Therefore, while processing the next digit, carry may
9108+
* become as large as divisor * NBASE - 1, and so it requires a 64-bit
9109+
* integer if this exceeds UINT_MAX.
9110+
*/
9111+
divisor = Abs(ival);
9112+
9113+
if (divisor <= UINT_MAX / NBASE)
9114+
{
9115+
/* carry cannot overflow 32 bits */
9116+
uint32 carry = 0;
9117+
9118+
for (i = 0; i < res_ndigits; i++)
9119+
{
9120+
carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0);
9121+
res_digits[i] = (NumericDigit) (carry / divisor);
9122+
carry = carry % divisor;
9123+
}
9124+
}
9125+
else
9126+
{
9127+
/* carry may exceed 32 bits */
9128+
uint64 carry = 0;
9129+
9130+
for (i = 0; i < res_ndigits; i++)
9131+
{
9132+
carry = carry * NBASE + (i < var_ndigits ? var_digits[i] : 0);
9133+
res_digits[i] = (NumericDigit) (carry / divisor);
9134+
carry = carry % divisor;
9135+
}
9136+
}
9137+
9138+
/* Store the quotient in result */
9139+
digitbuf_free(result->buf);
9140+
result->ndigits = res_ndigits;
9141+
result->buf = res_buf;
9142+
result->digits = res_digits;
9143+
result->weight = res_weight;
9144+
result->sign = res_sign;
9145+
9146+
/* Round or truncate to target rscale (and set result->dscale) */
9147+
if (round)
9148+
round_var(result, rscale);
9149+
else
9150+
trunc_var(result, rscale);
9151+
9152+
/* Strip leading/trailing zeroes */
9153+
strip_var(result);
9154+
}
9155+
9156+
90119157
/*
90129158
* Default scale selection for division
90139159
*
@@ -9783,7 +9929,7 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale)
97839929
{
97849930
NumericVar x;
97859931
NumericVar elem;
9786-
NumericVar ni;
9932+
int ni;
97879933
double val;
97889934
int dweight;
97899935
int ndiv2;
@@ -9792,7 +9938,6 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale)
97929938

97939939
init_var(&x);
97949940
init_var(&elem);
9795-
init_var(&ni);
97969941

97979942
set_var_from_var(arg, &x);
97989943

@@ -9820,29 +9965,24 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale)
98209965

98219966
/*
98229967
* Reduce x to the range -0.01 <= x <= 0.01 (approximately) by dividing by
9823-
* 2^n, to improve the convergence rate of the Taylor series.
9968+
* 2^ndiv2, to improve the convergence rate of the Taylor series.
9969+
*
9970+
* Note that the overflow check above ensures that Abs(x) < 6000, which
9971+
* means that ndiv2 <= 20 here.
98249972
*/
98259973
if (Abs(val) > 0.01)
98269974
{
9827-
NumericVar tmp;
9828-
9829-
init_var(&tmp);
9830-
set_var_from_var(&const_two, &tmp);
9831-
98329975
ndiv2 = 1;
98339976
val /= 2;
98349977

98359978
while (Abs(val) > 0.01)
98369979
{
98379980
ndiv2++;
98389981
val /= 2;
9839-
add_var(&tmp, &tmp, &tmp);
98409982
}
98419983

98429984
local_rscale = x.dscale + ndiv2;
9843-
div_var_fast(&x, &tmp, &x, local_rscale, true);
9844-
9845-
free_var(&tmp);
9985+
div_var_int(&x, 1 << ndiv2, 0, &x, local_rscale, true);
98469986
}
98479987
else
98489988
ndiv2 = 0;
@@ -9870,16 +10010,16 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale)
987010010
add_var(&const_one, &x, result);
987110011

987210012
mul_var(&x, &x, &elem, local_rscale);
9873-
set_var_from_var(&const_two, &ni);
9874-
div_var_fast(&elem, &ni, &elem, local_rscale, true);
10013+
ni = 2;
10014+
div_var_int(&elem, ni, 0, &elem, local_rscale, true);
987510015

987610016
while (elem.ndigits != 0)
987710017
{
987810018
add_var(result, &elem, result);
987910019

988010020
mul_var(&elem, &x, &elem, local_rscale);
9881-
add_var(&ni, &const_one, &ni);
9882-
div_var_fast(&elem, &ni, &elem, local_rscale, true);
10021+
ni++;
10022+
div_var_int(&elem, ni, 0, &elem, local_rscale, true);
988310023
}
988410024

988510025
/*
@@ -9899,7 +10039,6 @@ exp_var(const NumericVar *arg, NumericVar *result, int rscale)
989910039

990010040
free_var(&x);
990110041
free_var(&elem);
9902-
free_var(&ni);
990310042
}
990410043

990510044

@@ -9993,7 +10132,7 @@ ln_var(const NumericVar *arg, NumericVar *result, int rscale)
999310132
{
999410133
NumericVar x;
999510134
NumericVar xx;
9996-
NumericVar ni;
10135+
int ni;
999710136
NumericVar elem;
999810137
NumericVar fact;
999910138
int nsqrt;
@@ -10012,7 +10151,6 @@ ln_var(const NumericVar *arg, NumericVar *result, int rscale)
1001210151

1001310152
init_var(&x);
1001410153
init_var(&xx);
10015-
init_var(&ni);
1001610154
init_var(&elem);
1001710155
init_var(&fact);
1001810156

@@ -10073,13 +10211,13 @@ ln_var(const NumericVar *arg, NumericVar *result, int rscale)
1007310211
set_var_from_var(result, &xx);
1007410212
mul_var(result, result, &x, local_rscale);
1007510213

10076-
set_var_from_var(&const_one, &ni);
10214+
ni = 1;
1007710215

1007810216
for (;;)
1007910217
{
10080-
add_var(&ni, &const_two, &ni);
10218+
ni += 2;
1008110219
mul_var(&xx, &x, &xx, local_rscale);
10082-
div_var_fast(&xx, &ni, &elem, local_rscale, true);
10220+
div_var_int(&xx, ni, 0, &elem, local_rscale, true);
1008310221

1008410222
if (elem.ndigits == 0)
1008510223
break;
@@ -10095,7 +10233,6 @@ ln_var(const NumericVar *arg, NumericVar *result, int rscale)
1009510233

1009610234
free_var(&x);
1009710235
free_var(&xx);
10098-
free_var(&ni);
1009910236
free_var(&elem);
1010010237
free_var(&fact);
1010110238
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy