Skip to content

Commit 4c2777d

Browse files
committed
Change get_variable_numdistinct's API to flag default estimates explicitly.
Formerly, callers tested for DEFAULT_NUM_DISTINCT, which had the problem that a perfectly solid estimate might be mistaken for a content-free default.
1 parent 1cb108e commit 4c2777d

File tree

2 files changed

+45
-28
lines changed

2 files changed

+45
-28
lines changed

src/backend/utils/adt/selfuncs.c

Lines changed: 43 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
244244
bool varonleft)
245245
{
246246
double selec;
247+
bool isdefault;
247248

248249
/*
249250
* If the constant is NULL, assume operator is strict and return zero, ie,
@@ -344,7 +345,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
344345
* all the not-common values share this remaining fraction
345346
* equally, so we divide by the number of other distinct values.
346347
*/
347-
otherdistinct = get_variable_numdistinct(vardata) - nnumbers;
348+
otherdistinct = get_variable_numdistinct(vardata, &isdefault) - nnumbers;
348349
if (otherdistinct > 1)
349350
selec /= otherdistinct;
350351

@@ -366,7 +367,7 @@ var_eq_const(VariableStatData *vardata, Oid operator,
366367
* of distinct values and assuming they are equally common. (The guess
367368
* is unlikely to be very good, but we do know a few special cases.)
368369
*/
369-
selec = 1.0 / get_variable_numdistinct(vardata);
370+
selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
370371
}
371372

372373
/* result should be in range, but make sure... */
@@ -384,6 +385,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
384385
bool varonleft)
385386
{
386387
double selec;
388+
bool isdefault;
387389

388390
/*
389391
* If we matched the var to a unique index, assume there is exactly one
@@ -414,7 +416,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
414416
* idea?)
415417
*/
416418
selec = 1.0 - stats->stanullfrac;
417-
ndistinct = get_variable_numdistinct(vardata);
419+
ndistinct = get_variable_numdistinct(vardata, &isdefault);
418420
if (ndistinct > 1)
419421
selec /= ndistinct;
420422

@@ -441,7 +443,7 @@ var_eq_non_const(VariableStatData *vardata, Oid operator,
441443
* of distinct values and assuming they are equally common. (The guess
442444
* is unlikely to be very good, but we do know a few special cases.)
443445
*/
444-
selec = 1.0 / get_variable_numdistinct(vardata);
446+
selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
445447
}
446448

447449
/* result should be in range, but make sure... */
@@ -2071,6 +2073,8 @@ eqjoinsel_inner(Oid operator,
20712073
double selec;
20722074
double nd1;
20732075
double nd2;
2076+
bool isdefault1;
2077+
bool isdefault2;
20742078
Form_pg_statistic stats1 = NULL;
20752079
Form_pg_statistic stats2 = NULL;
20762080
bool have_mcvs1 = false;
@@ -2084,8 +2088,8 @@ eqjoinsel_inner(Oid operator,
20842088
float4 *numbers2 = NULL;
20852089
int nnumbers2 = 0;
20862090

2087-
nd1 = get_variable_numdistinct(vardata1);
2088-
nd2 = get_variable_numdistinct(vardata2);
2091+
nd1 = get_variable_numdistinct(vardata1, &isdefault1);
2092+
nd2 = get_variable_numdistinct(vardata2, &isdefault2);
20892093

20902094
if (HeapTupleIsValid(vardata1->statsTuple))
20912095
{
@@ -2296,6 +2300,8 @@ eqjoinsel_semi(Oid operator,
22962300
double selec;
22972301
double nd1;
22982302
double nd2;
2303+
bool isdefault1;
2304+
bool isdefault2;
22992305
Form_pg_statistic stats1 = NULL;
23002306
bool have_mcvs1 = false;
23012307
Datum *values1 = NULL;
@@ -2308,8 +2314,8 @@ eqjoinsel_semi(Oid operator,
23082314
float4 *numbers2 = NULL;
23092315
int nnumbers2 = 0;
23102316

2311-
nd1 = get_variable_numdistinct(vardata1);
2312-
nd2 = get_variable_numdistinct(vardata2);
2317+
nd1 = get_variable_numdistinct(vardata1, &isdefault1);
2318+
nd2 = get_variable_numdistinct(vardata2, &isdefault2);
23132319

23142320
/*
23152321
* We clamp nd2 to be not more than what we estimate the inner relation's
@@ -2441,7 +2447,7 @@ eqjoinsel_semi(Oid operator,
24412447
* nd2 is default, punt and assume half of the uncertain rows have
24422448
* join partners.
24432449
*/
2444-
if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT)
2450+
if (!isdefault1 && !isdefault2)
24452451
{
24462452
nd1 -= nmatches;
24472453
nd2 -= nmatches;
@@ -2464,7 +2470,7 @@ eqjoinsel_semi(Oid operator,
24642470
*/
24652471
double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
24662472

2467-
if (nd1 != DEFAULT_NUM_DISTINCT && nd2 != DEFAULT_NUM_DISTINCT)
2473+
if (!isdefault1 && !isdefault2)
24682474
{
24692475
if (nd1 <= nd2 || nd2 < 0)
24702476
selec = 1.0 - nullfrac1;
@@ -2955,9 +2961,10 @@ add_unique_group_var(PlannerInfo *root, List *varinfos,
29552961
{
29562962
GroupVarInfo *varinfo;
29572963
double ndistinct;
2964+
bool isdefault;
29582965
ListCell *lc;
29592966

2960-
ndistinct = get_variable_numdistinct(vardata);
2967+
ndistinct = get_variable_numdistinct(vardata, &isdefault);
29612968

29622969
/* cannot use foreach here because of possible list_delete */
29632970
lc = list_head(varinfos);
@@ -3292,14 +3299,23 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
32923299
stanullfrac,
32933300
mcvfreq,
32943301
avgfreq;
3302+
bool isdefault;
32953303
float4 *numbers;
32963304
int nnumbers;
32973305

32983306
examine_variable(root, hashkey, 0, &vardata);
32993307

3300-
/* Get number of distinct values and fraction that are null */
3301-
ndistinct = get_variable_numdistinct(&vardata);
3308+
/* Get number of distinct values */
3309+
ndistinct = get_variable_numdistinct(&vardata, &isdefault);
33023310

3311+
/* If ndistinct isn't real, punt and return 0.1, per comments above */
3312+
if (isdefault)
3313+
{
3314+
ReleaseVariableStats(vardata);
3315+
return (Selectivity) 0.1;
3316+
}
3317+
3318+
/* Get fraction that are null */
33033319
if (HeapTupleIsValid(vardata.statsTuple))
33043320
{
33053321
Form_pg_statistic stats;
@@ -3308,19 +3324,7 @@ estimate_hash_bucketsize(PlannerInfo *root, Node *hashkey, double nbuckets)
33083324
stanullfrac = stats->stanullfrac;
33093325
}
33103326
else
3311-
{
3312-
/*
3313-
* Believe a default ndistinct only if it came from stats. Otherwise
3314-
* punt and return 0.1, per comments above.
3315-
*/
3316-
if (ndistinct == DEFAULT_NUM_DISTINCT)
3317-
{
3318-
ReleaseVariableStats(vardata);
3319-
return (Selectivity) 0.1;
3320-
}
3321-
33223327
stanullfrac = 0.0;
3323-
}
33243328

33253329
/* Compute avg freq of all distinct data values in raw relation */
33263330
avgfreq = (1.0 - stanullfrac) / ndistinct;
@@ -4414,16 +4418,20 @@ examine_simple_variable(PlannerInfo *root, Var *var,
44144418
* Estimate the number of distinct values of a variable.
44154419
*
44164420
* vardata: results of examine_variable
4421+
* *isdefault: set to TRUE if the result is a default rather than based on
4422+
* anything meaningful.
44174423
*
44184424
* NB: be careful to produce an integral result, since callers may compare
44194425
* the result to exact integer counts.
44204426
*/
44214427
double
4422-
get_variable_numdistinct(VariableStatData *vardata)
4428+
get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
44234429
{
44244430
double stadistinct;
44254431
double ntuples;
44264432

4433+
*isdefault = false;
4434+
44274435
/*
44284436
* Determine the stadistinct value to use. There are cases where we can
44294437
* get an estimate even without a pg_statistic entry, or can get a better
@@ -4496,10 +4504,16 @@ get_variable_numdistinct(VariableStatData *vardata)
44964504
* Otherwise we need to get the relation size; punt if not available.
44974505
*/
44984506
if (vardata->rel == NULL)
4507+
{
4508+
*isdefault = true;
44994509
return DEFAULT_NUM_DISTINCT;
4510+
}
45004511
ntuples = vardata->rel->tuples;
45014512
if (ntuples <= 0.0)
4513+
{
4514+
*isdefault = true;
45024515
return DEFAULT_NUM_DISTINCT;
4516+
}
45034517

45044518
/*
45054519
* If we had a relative estimate, use that.
@@ -4509,11 +4523,13 @@ get_variable_numdistinct(VariableStatData *vardata)
45094523

45104524
/*
45114525
* With no data, estimate ndistinct = ntuples if the table is small, else
4512-
* use default.
4526+
* use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small"
4527+
* so that the behavior isn't discontinuous.
45134528
*/
45144529
if (ntuples < DEFAULT_NUM_DISTINCT)
45154530
return ntuples;
45164531

4532+
*isdefault = true;
45174533
return DEFAULT_NUM_DISTINCT;
45184534
}
45194535

src/include/utils/selfuncs.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -121,7 +121,8 @@ extern void get_join_variables(PlannerInfo *root, List *args,
121121
VariableStatData *vardata1,
122122
VariableStatData *vardata2,
123123
bool *join_is_reversed);
124-
extern double get_variable_numdistinct(VariableStatData *vardata);
124+
extern double get_variable_numdistinct(VariableStatData *vardata,
125+
bool *isdefault);
125126
extern double mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
126127
Datum constval, bool varonleft,
127128
double *sumcommonp);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy