Skip to content

Commit c473d92

Browse files
committed
Fix cost_mergejoin's failure to adjust for rescanning of non-unique merge join
keys when considering a semi or anti join. This requires estimating the selectivity of the merge qual as though it were a regular inner join condition. To allow caching both that and the real outer-join-aware selectivity, split RestrictInfo.this_selec into two fields. This fixes one of the problems reported by Kevin Grittner.
1 parent c87c31f commit c473d92

File tree

9 files changed

+86
-53
lines changed

9 files changed

+86
-53
lines changed

src/backend/nodes/copyfuncs.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* Portions Copyright (c) 1994, Regents of the University of California
1616
*
1717
* IDENTIFICATION
18-
* $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.422 2009/02/02 19:31:39 alvherre Exp $
18+
* $PostgreSQL: pgsql/src/backend/nodes/copyfuncs.c,v 1.423 2009/02/06 23:43:23 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -1606,7 +1606,8 @@ _copyRestrictInfo(RestrictInfo *from)
16061606
/* EquivalenceClasses are never copied, so shallow-copy the pointers */
16071607
COPY_SCALAR_FIELD(parent_ec);
16081608
COPY_SCALAR_FIELD(eval_cost);
1609-
COPY_SCALAR_FIELD(this_selec);
1609+
COPY_SCALAR_FIELD(norm_selec);
1610+
COPY_SCALAR_FIELD(outer_selec);
16101611
COPY_NODE_FIELD(mergeopfamilies);
16111612
/* EquivalenceClasses are never copied, so shallow-copy the pointers */
16121613
COPY_SCALAR_FIELD(left_ec);

src/backend/nodes/outfuncs.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.351 2009/02/02 19:31:39 alvherre Exp $
11+
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.352 2009/02/06 23:43:23 tgl Exp $
1212
*
1313
* NOTES
1414
* Every node type that can appear in stored rules' parsetrees *must*
@@ -1609,7 +1609,8 @@ _outRestrictInfo(StringInfo str, RestrictInfo *node)
16091609
WRITE_BITMAPSET_FIELD(right_relids);
16101610
WRITE_NODE_FIELD(orclause);
16111611
/* don't write parent_ec, leads to infinite recursion in plan tree dump */
1612-
WRITE_FLOAT_FIELD(this_selec, "%.4f");
1612+
WRITE_FLOAT_FIELD(norm_selec, "%.4f");
1613+
WRITE_FLOAT_FIELD(outer_selec, "%.4f");
16131614
WRITE_NODE_FIELD(mergeopfamilies);
16141615
/* don't write left_ec, leads to infinite recursion in plan tree dump */
16151616
/* don't write right_ec, leads to infinite recursion in plan tree dump */

src/backend/optimizer/path/clausesel.c

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.96 2009/01/01 17:23:43 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/path/clausesel.c,v 1.97 2009/02/06 23:43:23 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -516,21 +516,34 @@ clause_selectivity(PlannerInfo *root,
516516
/*
517517
* If the clause is marked redundant, always return 1.0.
518518
*/
519-
if (rinfo->this_selec > 1)
519+
if (rinfo->norm_selec > 1)
520520
return (Selectivity) 1.0;
521521

522522
/*
523523
* If possible, cache the result of the selectivity calculation for
524524
* the clause. We can cache if varRelid is zero or the clause
525525
* contains only vars of that relid --- otherwise varRelid will affect
526-
* the result, so mustn't cache.
526+
* the result, so mustn't cache. Outer join quals might be examined
527+
* with either their join's actual jointype or JOIN_INNER, so we need
528+
* two cache variables to remember both cases. Note: we assume the
529+
* result won't change if we are switching the input relations or
530+
* considering a unique-ified case, so we only need one cache variable
531+
* for all non-JOIN_INNER cases.
527532
*/
528533
if (varRelid == 0 ||
529534
bms_is_subset_singleton(rinfo->clause_relids, varRelid))
530535
{
531536
/* Cacheable --- do we already have the result? */
532-
if (rinfo->this_selec >= 0)
533-
return rinfo->this_selec;
537+
if (jointype == JOIN_INNER)
538+
{
539+
if (rinfo->norm_selec >= 0)
540+
return rinfo->norm_selec;
541+
}
542+
else
543+
{
544+
if (rinfo->outer_selec >= 0)
545+
return rinfo->outer_selec;
546+
}
534547
cacheable = true;
535548
}
536549

@@ -753,7 +766,12 @@ clause_selectivity(PlannerInfo *root,
753766

754767
/* Cache the result if possible */
755768
if (cacheable)
756-
rinfo->this_selec = s1;
769+
{
770+
if (jointype == JOIN_INNER)
771+
rinfo->norm_selec = s1;
772+
else
773+
rinfo->outer_selec = s1;
774+
}
757775

758776
#ifdef SELECTIVITY_DEBUG
759777
elog(DEBUG4, "clause_selectivity: s1 %f", s1);

src/backend/optimizer/path/costsize.c

Lines changed: 33 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@
5454
* Portions Copyright (c) 1994, Regents of the University of California
5555
*
5656
* IDENTIFICATION
57-
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.203 2009/01/01 17:23:43 momjian Exp $
57+
* $PostgreSQL: pgsql/src/backend/optimizer/path/costsize.c,v 1.204 2009/02/06 23:43:23 tgl Exp $
5858
*
5959
*-------------------------------------------------------------------------
6060
*/
@@ -120,7 +120,7 @@ static MergeScanSelCache *cached_scansel(PlannerInfo *root,
120120
PathKey *pathkey);
121121
static bool cost_qual_eval_walker(Node *node, cost_qual_eval_context *context);
122122
static double approx_tuple_count(PlannerInfo *root, JoinPath *path,
123-
List *quals, SpecialJoinInfo *sjinfo);
123+
List *quals);
124124
static void set_rel_width(PlannerInfo *root, RelOptInfo *rel);
125125
static double relation_byte_size(double tuples, int width);
126126
static double page_size(double tuples, int width);
@@ -1507,11 +1507,9 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
15071507

15081508
/*
15091509
* Get approx # tuples passing the mergequals. We use approx_tuple_count
1510-
* here for speed --- in most cases, any errors won't affect the result
1511-
* much.
1510+
* here because we need an estimate done with JOIN_INNER semantics.
15121511
*/
1513-
mergejointuples = approx_tuple_count(root, &path->jpath,
1514-
mergeclauses, sjinfo);
1512+
mergejointuples = approx_tuple_count(root, &path->jpath, mergeclauses);
15151513

15161514
/*
15171515
* When there are equal merge keys in the outer relation, the mergejoin
@@ -1539,16 +1537,10 @@ cost_mergejoin(MergePath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
15391537
* when we should not. Can we do better without expensive selectivity
15401538
* computations?
15411539
*
1542-
* For SEMI and ANTI joins, only one inner tuple need be rescanned for
1543-
* each group of same-keyed outer tuples (assuming that all joinquals
1544-
* are merge quals). This makes the effect small enough to ignore,
1545-
* so we just set rescannedtuples = 0. Likewise, the whole issue is
1546-
* moot if we are working from a unique-ified outer input.
1540+
* The whole issue is moot if we are working from a unique-ified outer
1541+
* input.
15471542
*/
1548-
if (sjinfo->jointype == JOIN_SEMI ||
1549-
sjinfo->jointype == JOIN_ANTI)
1550-
rescannedtuples = 0;
1551-
else if (IsA(outer_path, UniquePath))
1543+
if (IsA(outer_path, UniquePath))
15521544
rescannedtuples = 0;
15531545
else
15541546
{
@@ -1847,11 +1839,9 @@ cost_hashjoin(HashPath *path, PlannerInfo *root, SpecialJoinInfo *sjinfo)
18471839

18481840
/*
18491841
* Get approx # tuples passing the hashquals. We use approx_tuple_count
1850-
* here for speed --- in most cases, any errors won't affect the result
1851-
* much.
1842+
* here because we need an estimate done with JOIN_INNER semantics.
18521843
*/
1853-
hashjointuples = approx_tuple_count(root, &path->jpath,
1854-
hashclauses, sjinfo);
1844+
hashjointuples = approx_tuple_count(root, &path->jpath, hashclauses);
18551845

18561846
/* cost of source data */
18571847
startup_cost += outer_path->startup_cost;
@@ -2324,6 +2314,11 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
23242314
* The quals can be either an implicitly-ANDed list of boolean expressions,
23252315
* or a list of RestrictInfo nodes (typically the latter).
23262316
*
2317+
* We intentionally compute the selectivity under JOIN_INNER rules, even
2318+
* if it's some type of outer join. This is appropriate because we are
2319+
* trying to figure out how many tuples pass the initial merge or hash
2320+
* join step.
2321+
*
23272322
* This is quick-and-dirty because we bypass clauselist_selectivity, and
23282323
* simply multiply the independent clause selectivities together. Now
23292324
* clauselist_selectivity often can't do any better than that anyhow, but
@@ -2336,31 +2331,40 @@ cost_qual_eval_walker(Node *node, cost_qual_eval_context *context)
23362331
* seems OK to live with the approximation.
23372332
*/
23382333
static double
2339-
approx_tuple_count(PlannerInfo *root, JoinPath *path,
2340-
List *quals, SpecialJoinInfo *sjinfo)
2334+
approx_tuple_count(PlannerInfo *root, JoinPath *path, List *quals)
23412335
{
23422336
double tuples;
23432337
double outer_tuples = path->outerjoinpath->parent->rows;
23442338
double inner_tuples = path->innerjoinpath->parent->rows;
2339+
SpecialJoinInfo sjinfo;
23452340
Selectivity selec = 1.0;
23462341
ListCell *l;
23472342

2343+
/*
2344+
* Make up a SpecialJoinInfo for JOIN_INNER semantics.
2345+
*/
2346+
sjinfo.type = T_SpecialJoinInfo;
2347+
sjinfo.min_lefthand = path->outerjoinpath->parent->relids;
2348+
sjinfo.min_righthand = path->innerjoinpath->parent->relids;
2349+
sjinfo.syn_lefthand = path->outerjoinpath->parent->relids;
2350+
sjinfo.syn_righthand = path->innerjoinpath->parent->relids;
2351+
sjinfo.jointype = JOIN_INNER;
2352+
/* we don't bother trying to make the remaining fields valid */
2353+
sjinfo.lhs_strict = false;
2354+
sjinfo.delay_upper_joins = false;
2355+
sjinfo.join_quals = NIL;
2356+
23482357
/* Get the approximate selectivity */
23492358
foreach(l, quals)
23502359
{
23512360
Node *qual = (Node *) lfirst(l);
23522361

23532362
/* Note that clause_selectivity will be able to cache its result */
2354-
selec *= clause_selectivity(root, qual, 0, sjinfo->jointype, sjinfo);
2363+
selec *= clause_selectivity(root, qual, 0, JOIN_INNER, &sjinfo);
23552364
}
23562365

2357-
/* Apply it correctly using the input relation sizes */
2358-
if (sjinfo->jointype == JOIN_SEMI)
2359-
tuples = selec * outer_tuples;
2360-
else if (sjinfo->jointype == JOIN_ANTI)
2361-
tuples = (1.0 - selec) * outer_tuples;
2362-
else
2363-
tuples = selec * outer_tuples * inner_tuples;
2366+
/* Apply it to the input relation sizes */
2367+
tuples = selec * outer_tuples * inner_tuples;
23642368

23652369
return clamp_row_est(tuples);
23662370
}

src/backend/optimizer/path/equivclass.c

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Portions Copyright (c) 1994, Regents of the University of California
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.16 2009/01/01 17:23:43 momjian Exp $
13+
* $PostgreSQL: pgsql/src/backend/optimizer/path/equivclass.c,v 1.17 2009/02/06 23:43:23 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
@@ -1200,7 +1200,8 @@ reconsider_outer_join_clauses(PlannerInfo *root)
12001200
list_delete_cell(root->left_join_clauses, cell, prev);
12011201
/* we throw it back anyway (see notes above) */
12021202
/* but the thrown-back clause has no extra selectivity */
1203-
rinfo->this_selec = 2.0;
1203+
rinfo->norm_selec = 2.0;
1204+
rinfo->outer_selec = 1.0;
12041205
distribute_restrictinfo_to_rels(root, rinfo);
12051206
}
12061207
else
@@ -1222,7 +1223,8 @@ reconsider_outer_join_clauses(PlannerInfo *root)
12221223
list_delete_cell(root->right_join_clauses, cell, prev);
12231224
/* we throw it back anyway (see notes above) */
12241225
/* but the thrown-back clause has no extra selectivity */
1225-
rinfo->this_selec = 2.0;
1226+
rinfo->norm_selec = 2.0;
1227+
rinfo->outer_selec = 1.0;
12261228
distribute_restrictinfo_to_rels(root, rinfo);
12271229
}
12281230
else
@@ -1244,7 +1246,8 @@ reconsider_outer_join_clauses(PlannerInfo *root)
12441246
list_delete_cell(root->full_join_clauses, cell, prev);
12451247
/* we throw it back anyway (see notes above) */
12461248
/* but the thrown-back clause has no extra selectivity */
1247-
rinfo->this_selec = 2.0;
1249+
rinfo->norm_selec = 2.0;
1250+
rinfo->outer_selec = 1.0;
12481251
distribute_restrictinfo_to_rels(root, rinfo);
12491252
}
12501253
else

src/backend/optimizer/path/orindxpath.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.86 2009/01/01 17:23:44 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/path/orindxpath.c,v 1.87 2009/02/06 23:43:23 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -174,10 +174,11 @@ create_or_index_quals(PlannerInfo *root, RelOptInfo *rel)
174174
{
175175
orig_selec = clause_selectivity(root, (Node *) bestrinfo,
176176
0, JOIN_INNER, NULL);
177-
bestrinfo->this_selec = orig_selec / or_selec;
177+
bestrinfo->norm_selec = orig_selec / or_selec;
178178
/* clamp result to sane range */
179-
if (bestrinfo->this_selec > 1)
180-
bestrinfo->this_selec = 1;
179+
if (bestrinfo->norm_selec > 1)
180+
bestrinfo->norm_selec = 1;
181+
/* It isn't an outer join clause, so no need to adjust outer_selec */
181182
}
182183

183184
/* Tell caller to recompute rel's rows estimate */

src/backend/optimizer/prep/prepunion.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@
2222
*
2323
*
2424
* IDENTIFICATION
25-
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.164 2009/01/01 17:23:44 momjian Exp $
25+
* $PostgreSQL: pgsql/src/backend/optimizer/prep/prepunion.c,v 1.165 2009/02/06 23:43:23 tgl Exp $
2626
*
2727
*-------------------------------------------------------------------------
2828
*/
@@ -1662,7 +1662,8 @@ adjust_appendrel_attrs_mutator(Node *node, AppendRelInfo *context)
16621662
* different values when considering the child relation.
16631663
*/
16641664
newinfo->eval_cost.startup = -1;
1665-
newinfo->this_selec = -1;
1665+
newinfo->norm_selec = -1;
1666+
newinfo->outer_selec = -1;
16661667
newinfo->left_ec = NULL;
16671668
newinfo->right_ec = NULL;
16681669
newinfo->left_em = NULL;

src/backend/optimizer/util/restrictinfo.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.56 2009/01/01 17:23:45 momjian Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/util/restrictinfo.c,v 1.57 2009/02/06 23:43:23 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -338,7 +338,8 @@ make_restrictinfo_internal(Expr *clause,
338338
restrictinfo->parent_ec = NULL;
339339

340340
restrictinfo->eval_cost.startup = -1;
341-
restrictinfo->this_selec = -1;
341+
restrictinfo->norm_selec = -1;
342+
restrictinfo->outer_selec = -1;
342343

343344
restrictinfo->mergeopfamilies = NIL;
344345

src/include/nodes/relation.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.167 2009/01/01 17:24:00 momjian Exp $
10+
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.168 2009/02/06 23:43:24 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -992,8 +992,11 @@ typedef struct RestrictInfo
992992

993993
/* cache space for cost and selectivity */
994994
QualCost eval_cost; /* eval cost of clause; -1 if not yet set */
995-
Selectivity this_selec; /* selectivity; -1 if not yet set; >1 means
995+
Selectivity norm_selec; /* selectivity for "normal" (JOIN_INNER)
996+
* semantics; -1 if not yet set; >1 means
996997
* a redundant clause */
998+
Selectivity outer_selec; /* selectivity for outer join semantics;
999+
* -1 if not yet set */
9971000

9981001
/* valid if clause is mergejoinable, else NIL */
9991002
List *mergeopfamilies; /* opfamilies containing clause operator */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy