Skip to content

Commit 2e46b76

Browse files
committed
Extend join-selectivity API (oprjoin interface) so that join type is
passed to join selectivity estimators. Make use of this in eqjoinsel to derive non-bogus selectivity for IN clauses. Further tweaking of cost estimation for IN. initdb forced because of pg_proc.h changes.
1 parent 955a1f8 commit 2e46b76

File tree

16 files changed

+221
-136
lines changed

16 files changed

+221
-136
lines changed

doc/src/sgml/indexcost.sgml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$Header: /cvsroot/pgsql/doc/src/sgml/Attic/indexcost.sgml,v 2.14 2003/01/14 10:19:02 petere Exp $
2+
$Header: /cvsroot/pgsql/doc/src/sgml/Attic/indexcost.sgml,v 2.15 2003/01/28 22:13:24 tgl Exp $
33
-->
44

55
<chapter id="indexcost">
@@ -205,7 +205,8 @@ amcostestimate (Query *root,
205205

206206
<programlisting>
207207
*indexSelectivity = clauselist_selectivity(root, indexQuals,
208-
lfirsti(rel->relids));
208+
lfirsti(rel->relids),
209+
JOIN_INNER);
209210
</programlisting>
210211
</para>
211212
</step>

src/backend/catalog/pg_operator.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.77 2002/09/04 20:31:14 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/catalog/pg_operator.c,v 1.78 2003/01/28 22:13:25 tgl Exp $
1212
*
1313
* NOTES
1414
* these routines moved here from commands/define.c and somewhat cleaned up.
@@ -485,10 +485,11 @@ OperatorCreate(const char *operatorName,
485485
typeId[0] = INTERNALOID; /* Query */
486486
typeId[1] = OIDOID; /* operator OID */
487487
typeId[2] = INTERNALOID; /* args list */
488+
typeId[3] = INT2OID; /* jointype */
488489

489-
joinOid = LookupFuncName(joinName, 3, typeId);
490+
joinOid = LookupFuncName(joinName, 4, typeId);
490491
if (!OidIsValid(joinOid))
491-
func_error("OperatorDef", joinName, 3, typeId, NULL);
492+
func_error("OperatorDef", joinName, 4, typeId, NULL);
492493
}
493494
else
494495
joinOid = InvalidOid;

src/backend/optimizer/path/clausesel.c

Lines changed: 25 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.55 2003/01/15 19:35:39 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.56 2003/01/28 22:13:29 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -65,12 +65,13 @@ static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
6565
Selectivity
6666
restrictlist_selectivity(Query *root,
6767
List *restrictinfo_list,
68-
int varRelid)
68+
int varRelid,
69+
JoinType jointype)
6970
{
7071
List *clauselist = get_actual_clauses(restrictinfo_list);
7172
Selectivity result;
7273

73-
result = clauselist_selectivity(root, clauselist, varRelid);
74+
result = clauselist_selectivity(root, clauselist, varRelid, jointype);
7475
freeList(clauselist);
7576
return result;
7677
}
@@ -81,7 +82,7 @@ restrictlist_selectivity(Query *root,
8182
* expression clauses. The list can be empty, in which case 1.0
8283
* must be returned.
8384
*
84-
* See clause_selectivity() for the meaning of the varRelid parameter.
85+
* See clause_selectivity() for the meaning of the additional parameters.
8586
*
8687
* Our basic approach is to take the product of the selectivities of the
8788
* subclauses. However, that's only right if the subclauses have independent
@@ -113,7 +114,8 @@ restrictlist_selectivity(Query *root,
113114
Selectivity
114115
clauselist_selectivity(Query *root,
115116
List *clauses,
116-
int varRelid)
117+
int varRelid,
118+
JoinType jointype)
117119
{
118120
Selectivity s1 = 1.0;
119121
RangeQueryClause *rqlist = NULL;
@@ -184,7 +186,7 @@ clauselist_selectivity(Query *root,
184186
}
185187
}
186188
/* Not the right form, so treat it generically. */
187-
s2 = clause_selectivity(root, clause, varRelid);
189+
s2 = clause_selectivity(root, clause, varRelid, jointype);
188190
s1 = s1 * s2;
189191
}
190192

@@ -362,11 +364,15 @@ addRangeClause(RangeQueryClause **rqlist, Node *clause,
362364
*
363365
* When varRelid is 0, all variables are treated as variables. This
364366
* is appropriate for ordinary join clauses and restriction clauses.
367+
*
368+
* jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
369+
* if the clause isn't a join clause or the context is uncertain.
365370
*/
366371
Selectivity
367372
clause_selectivity(Query *root,
368373
Node *clause,
369-
int varRelid)
374+
int varRelid,
375+
JoinType jointype)
370376
{
371377
Selectivity s1 = 1.0; /* default for any unhandled clause type */
372378

@@ -424,14 +430,16 @@ clause_selectivity(Query *root,
424430
/* inverse of the selectivity of the underlying clause */
425431
s1 = 1.0 - clause_selectivity(root,
426432
(Node *) get_notclausearg((Expr *) clause),
427-
varRelid);
433+
varRelid,
434+
jointype);
428435
}
429436
else if (and_clause(clause))
430437
{
431438
/* share code with clauselist_selectivity() */
432439
s1 = clauselist_selectivity(root,
433440
((BoolExpr *) clause)->args,
434-
varRelid);
441+
varRelid,
442+
jointype);
435443
}
436444
else if (or_clause(clause))
437445
{
@@ -447,7 +455,8 @@ clause_selectivity(Query *root,
447455
{
448456
Selectivity s2 = clause_selectivity(root,
449457
(Node *) lfirst(arg),
450-
varRelid);
458+
varRelid,
459+
jointype);
451460

452461
s1 = s1 + s2 - s1 * s2;
453462
}
@@ -479,7 +488,8 @@ clause_selectivity(Query *root,
479488
{
480489
/* Estimate selectivity for a join clause. */
481490
s1 = join_selectivity(root, opno,
482-
((OpExpr *) clause)->args);
491+
((OpExpr *) clause)->args,
492+
jointype);
483493
}
484494
else
485495
{
@@ -519,14 +529,16 @@ clause_selectivity(Query *root,
519529
s1 = booltestsel(root,
520530
((BooleanTest *) clause)->booltesttype,
521531
(Node *) ((BooleanTest *) clause)->arg,
522-
varRelid);
532+
varRelid,
533+
jointype);
523534
}
524535
else if (IsA(clause, RelabelType))
525536
{
526537
/* Not sure this case is needed, but it can't hurt */
527538
s1 = clause_selectivity(root,
528539
(Node *) ((RelabelType *) clause)->arg,
529-
varRelid);
540+
varRelid,
541+
jointype);
530542
}
531543

532544
#ifdef SELECTIVITY_DEBUG

src/backend/optimizer/path/costsize.c

Lines changed: 73 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
* Portions Copyright (c) 1994, Regents of the University of California
5050
*
5151
* IDENTIFICATION
52-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.103 2003/01/27 20:51:50 tgl Exp $
52+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/costsize.c,v 1.104 2003/01/28 22:13:33 tgl Exp $
5353
*
5454
*-------------------------------------------------------------------------
5555
*/
@@ -104,7 +104,8 @@ bool enable_hashjoin = true;
104104
static Selectivity estimate_hash_bucketsize(Query *root, Var *var,
105105
int nbuckets);
106106
static bool cost_qual_eval_walker(Node *node, QualCost *total);
107-
static Selectivity approx_selectivity(Query *root, List *quals);
107+
static Selectivity approx_selectivity(Query *root, List *quals,
108+
JoinType jointype);
108109
static void set_rel_width(Query *root, RelOptInfo *rel);
109110
static double relation_byte_size(double tuples, int width);
110111
static double page_size(double tuples, int width);
@@ -697,7 +698,8 @@ cost_nestloop(NestPath *path, Query *root)
697698
*/
698699
if (path->jointype == JOIN_IN)
699700
{
700-
Selectivity qual_selec = approx_selectivity(root, restrictlist);
701+
Selectivity qual_selec = approx_selectivity(root, restrictlist,
702+
path->jointype);
701703
double qptuples;
702704

703705
qptuples = ceil(qual_selec * outer_path_rows * inner_path_rows);
@@ -816,10 +818,12 @@ cost_mergejoin(MergePath *path, Query *root)
816818
* Note: it's probably bogus to use the normal selectivity calculation
817819
* here when either the outer or inner path is a UniquePath.
818820
*/
819-
merge_selec = approx_selectivity(root, mergeclauses);
821+
merge_selec = approx_selectivity(root, mergeclauses,
822+
path->jpath.jointype);
820823
cost_qual_eval(&merge_qual_cost, mergeclauses);
821824
qpquals = set_ptrDifference(restrictlist, mergeclauses);
822-
qp_selec = approx_selectivity(root, qpquals);
825+
qp_selec = approx_selectivity(root, qpquals,
826+
path->jpath.jointype);
823827
cost_qual_eval(&qp_qual_cost, qpquals);
824828
freeList(qpquals);
825829

@@ -1044,10 +1048,12 @@ cost_hashjoin(HashPath *path, Query *root)
10441048
* Note: it's probably bogus to use the normal selectivity calculation
10451049
* here when either the outer or inner path is a UniquePath.
10461050
*/
1047-
hash_selec = approx_selectivity(root, hashclauses);
1051+
hash_selec = approx_selectivity(root, hashclauses,
1052+
path->jpath.jointype);
10481053
cost_qual_eval(&hash_qual_cost, hashclauses);
10491054
qpquals = set_ptrDifference(restrictlist, hashclauses);
1050-
qp_selec = approx_selectivity(root, qpquals);
1055+
qp_selec = approx_selectivity(root, qpquals,
1056+
path->jpath.jointype);
10511057
cost_qual_eval(&qp_qual_cost, qpquals);
10521058
freeList(qpquals);
10531059

@@ -1084,54 +1090,67 @@ cost_hashjoin(HashPath *path, Query *root)
10841090
* Determine bucketsize fraction for inner relation. We use the
10851091
* smallest bucketsize estimated for any individual hashclause;
10861092
* this is undoubtedly conservative.
1093+
*
1094+
* BUT: if inner relation has been unique-ified, we can assume it's
1095+
* good for hashing. This is important both because it's the right
1096+
* answer, and because we avoid contaminating the cache with a value
1097+
* that's wrong for non-unique-ified paths.
10871098
*/
1088-
innerbucketsize = 1.0;
1089-
foreach(hcl, hashclauses)
1099+
if (IsA(inner_path, UniquePath))
1100+
innerbucketsize = 1.0 / virtualbuckets;
1101+
else
10901102
{
1091-
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
1092-
Selectivity thisbucketsize;
1103+
innerbucketsize = 1.0;
1104+
foreach(hcl, hashclauses)
1105+
{
1106+
RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(hcl);
1107+
Selectivity thisbucketsize;
10931108

1094-
Assert(IsA(restrictinfo, RestrictInfo));
1109+
Assert(IsA(restrictinfo, RestrictInfo));
10951110

1096-
/*
1097-
* First we have to figure out which side of the hashjoin clause is the
1098-
* inner side.
1099-
*
1100-
* Since we tend to visit the same clauses over and over when planning
1101-
* a large query, we cache the bucketsize estimate in the RestrictInfo
1102-
* node to avoid repeated lookups of statistics.
1103-
*/
1104-
if (is_subseti(restrictinfo->right_relids, inner_path->parent->relids))
1105-
{
1106-
/* righthand side is inner */
1107-
thisbucketsize = restrictinfo->right_bucketsize;
1108-
if (thisbucketsize < 0)
1111+
/*
1112+
* First we have to figure out which side of the hashjoin clause
1113+
* is the inner side.
1114+
*
1115+
* Since we tend to visit the same clauses over and over when
1116+
* planning a large query, we cache the bucketsize estimate in the
1117+
* RestrictInfo node to avoid repeated lookups of statistics.
1118+
*/
1119+
if (is_subseti(restrictinfo->right_relids,
1120+
inner_path->parent->relids))
11091121
{
1110-
/* not cached yet */
1111-
thisbucketsize = estimate_hash_bucketsize(root,
1122+
/* righthand side is inner */
1123+
thisbucketsize = restrictinfo->right_bucketsize;
1124+
if (thisbucketsize < 0)
1125+
{
1126+
/* not cached yet */
1127+
thisbucketsize =
1128+
estimate_hash_bucketsize(root,
11121129
(Var *) get_rightop(restrictinfo->clause),
1113-
virtualbuckets);
1114-
restrictinfo->right_bucketsize = thisbucketsize;
1130+
virtualbuckets);
1131+
restrictinfo->right_bucketsize = thisbucketsize;
1132+
}
11151133
}
1116-
}
1117-
else
1118-
{
1119-
Assert(is_subseti(restrictinfo->left_relids,
1120-
inner_path->parent->relids));
1121-
/* lefthand side is inner */
1122-
thisbucketsize = restrictinfo->left_bucketsize;
1123-
if (thisbucketsize < 0)
1134+
else
11241135
{
1125-
/* not cached yet */
1126-
thisbucketsize = estimate_hash_bucketsize(root,
1136+
Assert(is_subseti(restrictinfo->left_relids,
1137+
inner_path->parent->relids));
1138+
/* lefthand side is inner */
1139+
thisbucketsize = restrictinfo->left_bucketsize;
1140+
if (thisbucketsize < 0)
1141+
{
1142+
/* not cached yet */
1143+
thisbucketsize =
1144+
estimate_hash_bucketsize(root,
11271145
(Var *) get_leftop(restrictinfo->clause),
1128-
virtualbuckets);
1129-
restrictinfo->left_bucketsize = thisbucketsize;
1146+
virtualbuckets);
1147+
restrictinfo->left_bucketsize = thisbucketsize;
1148+
}
11301149
}
1131-
}
11321150

1133-
if (innerbucketsize > thisbucketsize)
1134-
innerbucketsize = thisbucketsize;
1151+
if (innerbucketsize > thisbucketsize)
1152+
innerbucketsize = thisbucketsize;
1153+
}
11351154
}
11361155

11371156
/*
@@ -1557,7 +1576,7 @@ cost_qual_eval_walker(Node *node, QualCost *total)
15571576
* seems OK to live with the approximation.
15581577
*/
15591578
static Selectivity
1560-
approx_selectivity(Query *root, List *quals)
1579+
approx_selectivity(Query *root, List *quals, JoinType jointype)
15611580
{
15621581
Selectivity total = 1.0;
15631582
List *l;
@@ -1582,13 +1601,14 @@ approx_selectivity(Query *root, List *quals)
15821601
restrictinfo->this_selec =
15831602
clause_selectivity(root,
15841603
(Node *) restrictinfo->clause,
1585-
0);
1604+
0,
1605+
jointype);
15861606
selec = restrictinfo->this_selec;
15871607
}
15881608
else
15891609
{
15901610
/* If it's a bare expression, must always do it the hard way */
1591-
selec = clause_selectivity(root, qual, 0);
1611+
selec = clause_selectivity(root, qual, 0, jointype);
15921612
}
15931613
total *= selec;
15941614
}
@@ -1620,7 +1640,8 @@ set_baserel_size_estimates(Query *root, RelOptInfo *rel)
16201640
temp = rel->tuples *
16211641
restrictlist_selectivity(root,
16221642
rel->baserestrictinfo,
1623-
lfirsti(rel->relids));
1643+
lfirsti(rel->relids),
1644+
JOIN_INNER);
16241645

16251646
/*
16261647
* Force estimate to be at least one row, to make explain output look
@@ -1682,7 +1703,8 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
16821703
*/
16831704
selec = restrictlist_selectivity(root,
16841705
restrictlist,
1685-
0);
1706+
0,
1707+
jointype);
16861708

16871709
/*
16881710
* Basically, we multiply size of Cartesian product by selectivity.
@@ -1694,8 +1716,6 @@ set_joinrel_size_estimates(Query *root, RelOptInfo *rel,
16941716
* For JOIN_IN and variants, the Cartesian product is figured with
16951717
* respect to a unique-ified input, and then we can clamp to the size
16961718
* of the other input.
1697-
* XXX it's not at all clear that the ordinary selectivity calculation
1698-
* is appropriate in this case.
16991719
*/
17001720
switch (jointype)
17011721
{
@@ -1798,7 +1818,8 @@ set_function_size_estimates(Query *root, RelOptInfo *rel)
17981818
temp = rel->tuples *
17991819
restrictlist_selectivity(root,
18001820
rel->baserestrictinfo,
1801-
lfirsti(rel->relids));
1821+
lfirsti(rel->relids),
1822+
JOIN_INNER);
18021823

18031824
/*
18041825
* Force estimate to be at least one row, to make explain output look

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy