Skip to content

Commit 5e9027b

Browse files
committed
Prevent pushing down WHERE clauses into unsafe UNION/INTERSECT nests.
The planner is aware that it mustn't push down upper-level quals into subqueries if the quals reference subquery output columns that contain set-returning functions or volatile functions, or are non-DISTINCT outputs of a DISTINCT ON subquery. However, it missed making this check when there were one or more levels of UNION or INTERSECT above the dangerous expression. This could lead to "set-valued function called in context that cannot accept a set" errors, as seen in bug #8213 from Eric Soroos, or to silently wrong answers in the other cases. To fix, refactor the checks so that we make the column-is-unsafe checks during subquery_is_pushdown_safe(), which already has to recursively inspect all arms of a set-operation tree. This makes qual_is_pushdown_safe() considerably simpler, at the cost that we will spend some cycles checking output columns that possibly aren't referenced in any upper qual. But the cases where this code gets executed at all are already nontrivial queries, so it's unlikely anybody will notice any slowdown of planning. This has been broken since commit 05f916e, which makes the bug over ten years old. A bit surprising nobody noticed it before now.
1 parent 0ac9f9e commit 5e9027b

File tree

3 files changed

+179
-91
lines changed

3 files changed

+179
-91
lines changed

src/backend/optimizer/path/allpaths.c

Lines changed: 125 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -64,13 +64,14 @@ static void set_worktable_pathlist(PlannerInfo *root, RelOptInfo *rel,
6464
RangeTblEntry *rte);
6565
static RelOptInfo *make_rel_from_joinlist(PlannerInfo *root, List *joinlist);
6666
static bool subquery_is_pushdown_safe(Query *subquery, Query *topquery,
67-
bool *differentTypes);
67+
bool *unsafeColumns);
6868
static bool recurse_pushdown_safe(Node *setOp, Query *topquery,
69-
bool *differentTypes);
69+
bool *unsafeColumns);
70+
static void check_output_expressions(Query *subquery, bool *unsafeColumns);
7071
static void compare_tlist_datatypes(List *tlist, List *colTypes,
71-
bool *differentTypes);
72+
bool *unsafeColumns);
7273
static bool qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
73-
bool *differentTypes);
74+
bool *unsafeColumns);
7475
static void subquery_push_qual(Query *subquery,
7576
RangeTblEntry *rte, Index rti, Node *qual);
7677
static void recurse_push_qual(Node *setOp, Query *topquery,
@@ -545,7 +546,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
545546
{
546547
Query *parse = root->parse;
547548
Query *subquery = rte->subquery;
548-
bool *differentTypes;
549+
bool *unsafeColumns;
549550
double tuple_fraction;
550551
PlannerInfo *subroot;
551552
List *pathkeys;
@@ -557,8 +558,12 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
557558
*/
558559
subquery = copyObject(subquery);
559560

560-
/* We need a workspace for keeping track of set-op type coercions */
561-
differentTypes = (bool *)
561+
/*
562+
* We need a workspace for keeping track of unsafe-to-reference columns.
563+
* unsafeColumns[i] is set TRUE if we've found that output column i of the
564+
* subquery is unsafe to use in a pushed-down qual.
565+
*/
566+
unsafeColumns = (bool *)
562567
palloc0((list_length(subquery->targetList) + 1) * sizeof(bool));
563568

564569
/*
@@ -582,7 +587,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
582587
* push down a pushable qual, because it'd result in a worse plan?
583588
*/
584589
if (rel->baserestrictinfo != NIL &&
585-
subquery_is_pushdown_safe(subquery, subquery, differentTypes))
590+
subquery_is_pushdown_safe(subquery, subquery, unsafeColumns))
586591
{
587592
/* OK to consider pushing down individual quals */
588593
List *upperrestrictlist = NIL;
@@ -594,7 +599,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
594599
Node *clause = (Node *) rinfo->clause;
595600

596601
if (!rinfo->pseudoconstant &&
597-
qual_is_pushdown_safe(subquery, rti, clause, differentTypes))
602+
qual_is_pushdown_safe(subquery, rti, clause, unsafeColumns))
598603
{
599604
/* Push it down */
600605
subquery_push_qual(subquery, rte, rti, clause);
@@ -608,7 +613,7 @@ set_subquery_pathlist(PlannerInfo *root, RelOptInfo *rel,
608613
rel->baserestrictinfo = upperrestrictlist;
609614
}
610615

611-
pfree(differentTypes);
616+
pfree(unsafeColumns);
612617

613618
/*
614619
* We can safely pass the outer tuple_fraction down to the subquery if the
@@ -986,17 +991,19 @@ standard_join_search(PlannerInfo *root, int levels_needed, List *initial_rels)
986991
* 3. If the subquery contains EXCEPT or EXCEPT ALL set ops we cannot push
987992
* quals into it, because that would change the results.
988993
*
989-
* 4. For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
990-
* push quals into each component query, but the quals can only reference
991-
* subquery columns that suffer no type coercions in the set operation.
992-
* Otherwise there are possible semantic gotchas. So, we check the
993-
* component queries to see if any of them have different output types;
994-
* differentTypes[k] is set true if column k has different type in any
995-
* component.
994+
* In addition, we make several checks on the subquery's output columns
995+
* to see if it is safe to reference them in pushed-down quals. If output
996+
* column k is found to be unsafe to reference, we set unsafeColumns[k] to
997+
* TRUE, but we don't reject the subquery overall since column k might
998+
* not be referenced by some/all quals. The unsafeColumns[] array will be
999+
* consulted later by qual_is_pushdown_safe(). It's better to do it this
1000+
* way than to make the checks directly in qual_is_pushdown_safe(), because
1001+
* when the subquery involves set operations we have to check the output
1002+
* expressions in each arm of the set op.
9961003
*/
9971004
static bool
9981005
subquery_is_pushdown_safe(Query *subquery, Query *topquery,
999-
bool *differentTypes)
1006+
bool *unsafeColumns)
10001007
{
10011008
SetOperationStmt *topop;
10021009

@@ -1008,13 +1015,22 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
10081015
if (subquery->hasWindowFuncs)
10091016
return false;
10101017

1018+
/*
1019+
* If we're at a leaf query, check for unsafe expressions in its target
1020+
* list, and mark any unsafe ones in unsafeColumns[]. (Non-leaf nodes in
1021+
* setop trees have only simple Vars in their tlists, so no need to check
1022+
* them.)
1023+
*/
1024+
if (subquery->setOperations == NULL)
1025+
check_output_expressions(subquery, unsafeColumns);
1026+
10111027
/* Are we at top level, or looking at a setop component? */
10121028
if (subquery == topquery)
10131029
{
10141030
/* Top level, so check any component queries */
10151031
if (subquery->setOperations != NULL)
10161032
if (!recurse_pushdown_safe(subquery->setOperations, topquery,
1017-
differentTypes))
1033+
unsafeColumns))
10181034
return false;
10191035
}
10201036
else
@@ -1027,7 +1043,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
10271043
Assert(topop && IsA(topop, SetOperationStmt));
10281044
compare_tlist_datatypes(subquery->targetList,
10291045
topop->colTypes,
1030-
differentTypes);
1046+
unsafeColumns);
10311047
}
10321048
return true;
10331049
}
@@ -1037,7 +1053,7 @@ subquery_is_pushdown_safe(Query *subquery, Query *topquery,
10371053
*/
10381054
static bool
10391055
recurse_pushdown_safe(Node *setOp, Query *topquery,
1040-
bool *differentTypes)
1056+
bool *unsafeColumns)
10411057
{
10421058
if (IsA(setOp, RangeTblRef))
10431059
{
@@ -1046,19 +1062,19 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
10461062
Query *subquery = rte->subquery;
10471063

10481064
Assert(subquery != NULL);
1049-
return subquery_is_pushdown_safe(subquery, topquery, differentTypes);
1065+
return subquery_is_pushdown_safe(subquery, topquery, unsafeColumns);
10501066
}
10511067
else if (IsA(setOp, SetOperationStmt))
10521068
{
10531069
SetOperationStmt *op = (SetOperationStmt *) setOp;
10541070

1055-
/* EXCEPT is no good */
1071+
/* EXCEPT is no good (point 3 for subquery_is_pushdown_safe) */
10561072
if (op->op == SETOP_EXCEPT)
10571073
return false;
10581074
/* Else recurse */
1059-
if (!recurse_pushdown_safe(op->larg, topquery, differentTypes))
1075+
if (!recurse_pushdown_safe(op->larg, topquery, unsafeColumns))
10601076
return false;
1061-
if (!recurse_pushdown_safe(op->rarg, topquery, differentTypes))
1077+
if (!recurse_pushdown_safe(op->rarg, topquery, unsafeColumns))
10621078
return false;
10631079
}
10641080
else
@@ -1070,17 +1086,92 @@ recurse_pushdown_safe(Node *setOp, Query *topquery,
10701086
}
10711087

10721088
/*
1073-
* Compare tlist's datatypes against the list of set-operation result types.
1074-
* For any items that are different, mark the appropriate element of
1075-
* differentTypes[] to show that this column will have type conversions.
1089+
* check_output_expressions - check subquery's output expressions for safety
1090+
*
1091+
* There are several cases in which it's unsafe to push down an upper-level
1092+
* qual if it references a particular output column of a subquery. We check
1093+
* each output column of the subquery and set unsafeColumns[k] to TRUE if
1094+
* that column is unsafe for a pushed-down qual to reference. The conditions
1095+
* checked here are:
1096+
*
1097+
* 1. We must not push down any quals that refer to subselect outputs that
1098+
* return sets, else we'd introduce functions-returning-sets into the
1099+
* subquery's WHERE/HAVING quals.
1100+
*
1101+
* 2. We must not push down any quals that refer to subselect outputs that
1102+
* contain volatile functions, for fear of introducing strange results due
1103+
* to multiple evaluation of a volatile function.
1104+
*
1105+
* 3. If the subquery uses DISTINCT ON, we must not push down any quals that
1106+
* refer to non-DISTINCT output columns, because that could change the set
1107+
* of rows returned. (This condition is vacuous for DISTINCT, because then
1108+
* there are no non-DISTINCT output columns, so we needn't check. But note
1109+
* we are assuming that the qual can't distinguish values that the DISTINCT
1110+
* operator sees as equal. This is a bit shaky but we have no way to test
1111+
* for the case, and it's unlikely enough that we shouldn't refuse the
1112+
* optimization just because it could theoretically happen.)
1113+
*/
1114+
static void
1115+
check_output_expressions(Query *subquery, bool *unsafeColumns)
1116+
{
1117+
ListCell *lc;
1118+
1119+
foreach(lc, subquery->targetList)
1120+
{
1121+
TargetEntry *tle = (TargetEntry *) lfirst(lc);
1122+
1123+
if (tle->resjunk)
1124+
continue; /* ignore resjunk columns */
1125+
1126+
/* We need not check further if output col is already known unsafe */
1127+
if (unsafeColumns[tle->resno])
1128+
continue;
1129+
1130+
/* Functions returning sets are unsafe (point 1) */
1131+
if (expression_returns_set((Node *) tle->expr))
1132+
{
1133+
unsafeColumns[tle->resno] = true;
1134+
continue;
1135+
}
1136+
1137+
/* Volatile functions are unsafe (point 2) */
1138+
if (contain_volatile_functions((Node *) tle->expr))
1139+
{
1140+
unsafeColumns[tle->resno] = true;
1141+
continue;
1142+
}
1143+
1144+
/* If subquery uses DISTINCT ON, check point 3 */
1145+
if (subquery->hasDistinctOn &&
1146+
!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
1147+
{
1148+
/* non-DISTINCT column, so mark it unsafe */
1149+
unsafeColumns[tle->resno] = true;
1150+
continue;
1151+
}
1152+
}
1153+
}
1154+
1155+
/*
1156+
* For subqueries using UNION/UNION ALL/INTERSECT/INTERSECT ALL, we can
1157+
* push quals into each component query, but the quals can only reference
1158+
* subquery columns that suffer no type coercions in the set operation.
1159+
* Otherwise there are possible semantic gotchas. So, we check the
1160+
* component queries to see if any of them have output types different from
1161+
* the top-level setop outputs. unsafeColumns[k] is set true if column k
1162+
* has different type in any component.
10761163
*
10771164
* We don't have to care about typmods here: the only allowed difference
10781165
* between set-op input and output typmods is input is a specific typmod
10791166
* and output is -1, and that does not require a coercion.
1167+
*
1168+
* tlist is a subquery tlist.
1169+
* colTypes is an OID list of the top-level setop's output column types.
1170+
* unsafeColumns[] is the result array.
10801171
*/
10811172
static void
10821173
compare_tlist_datatypes(List *tlist, List *colTypes,
1083-
bool *differentTypes)
1174+
bool *unsafeColumns)
10841175
{
10851176
ListCell *l;
10861177
ListCell *colType = list_head(colTypes);
@@ -1094,7 +1185,7 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
10941185
if (colType == NULL)
10951186
elog(ERROR, "wrong number of tlist entries");
10961187
if (exprType((Node *) tle->expr) != lfirst_oid(colType))
1097-
differentTypes[tle->resno] = true;
1188+
unsafeColumns[tle->resno] = true;
10981189
colType = lnext(colType);
10991190
}
11001191
if (colType != NULL)
@@ -1117,34 +1208,15 @@ compare_tlist_datatypes(List *tlist, List *colTypes,
11171208
* (since there is no easy way to name that within the subquery itself).
11181209
*
11191210
* 3. The qual must not refer to any subquery output columns that were
1120-
* found to have inconsistent types across a set operation tree by
1121-
* subquery_is_pushdown_safe().
1122-
*
1123-
* 4. If the subquery uses DISTINCT ON, we must not push down any quals that
1124-
* refer to non-DISTINCT output columns, because that could change the set
1125-
* of rows returned. (This condition is vacuous for DISTINCT, because then
1126-
* there are no non-DISTINCT output columns, so we needn't check. But note
1127-
* we are assuming that the qual can't distinguish values that the DISTINCT
1128-
* operator sees as equal. This is a bit shaky but we have no way to test
1129-
* for the case, and it's unlikely enough that we shouldn't refuse the
1130-
* optimization just because it could theoretically happen.)
1131-
*
1132-
* 5. We must not push down any quals that refer to subselect outputs that
1133-
* return sets, else we'd introduce functions-returning-sets into the
1134-
* subquery's WHERE/HAVING quals.
1135-
*
1136-
* 6. We must not push down any quals that refer to subselect outputs that
1137-
* contain volatile functions, for fear of introducing strange results due
1138-
* to multiple evaluation of a volatile function.
1211+
* found to be unsafe to reference by subquery_is_pushdown_safe().
11391212
*/
11401213
static bool
11411214
qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
1142-
bool *differentTypes)
1215+
bool *unsafeColumns)
11431216
{
11441217
bool safe = true;
11451218
List *vars;
11461219
ListCell *vl;
1147-
Bitmapset *tested = NULL;
11481220

11491221
/* Refuse subselects (point 1) */
11501222
if (contain_subplans(qual))
@@ -1164,7 +1236,6 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
11641236
foreach(vl, vars)
11651237
{
11661238
Var *var = (Var *) lfirst(vl);
1167-
TargetEntry *tle;
11681239

11691240
/*
11701241
* XXX Punt if we find any PlaceHolderVars in the restriction clause.
@@ -1180,6 +1251,7 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
11801251
}
11811252

11821253
Assert(var->varno == rti);
1254+
Assert(var->varattno >= 0);
11831255

11841256
/* Check point 2 */
11851257
if (var->varattno == 0)
@@ -1188,53 +1260,15 @@ qual_is_pushdown_safe(Query *subquery, Index rti, Node *qual,
11881260
break;
11891261
}
11901262

1191-
/*
1192-
* We use a bitmapset to avoid testing the same attno more than once.
1193-
* (NB: this only works because subquery outputs can't have negative
1194-
* attnos.)
1195-
*/
1196-
if (bms_is_member(var->varattno, tested))
1197-
continue;
1198-
tested = bms_add_member(tested, var->varattno);
1199-
12001263
/* Check point 3 */
1201-
if (differentTypes[var->varattno])
1202-
{
1203-
safe = false;
1204-
break;
1205-
}
1206-
1207-
/* Must find the tlist element referenced by the Var */
1208-
tle = get_tle_by_resno(subquery->targetList, var->varattno);
1209-
Assert(tle != NULL);
1210-
Assert(!tle->resjunk);
1211-
1212-
/* If subquery uses DISTINCT ON, check point 4 */
1213-
if (subquery->hasDistinctOn &&
1214-
!targetIsInSortList(tle, InvalidOid, subquery->distinctClause))
1215-
{
1216-
/* non-DISTINCT column, so fail */
1217-
safe = false;
1218-
break;
1219-
}
1220-
1221-
/* Refuse functions returning sets (point 5) */
1222-
if (expression_returns_set((Node *) tle->expr))
1223-
{
1224-
safe = false;
1225-
break;
1226-
}
1227-
1228-
/* Refuse volatile functions (point 6) */
1229-
if (contain_volatile_functions((Node *) tle->expr))
1264+
if (unsafeColumns[var->varattno])
12301265
{
12311266
safe = false;
12321267
break;
12331268
}
12341269
}
12351270

12361271
list_free(vars);
1237-
bms_free(tested);
12381272

12391273
return safe;
12401274
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy