Skip to content

Commit 3b5d671

Browse files
committed
Fix incorrect matching of subexpressions in outer-join plan nodes.
Previously we would re-use input subexpressions in all expression trees attached to a Join plan node. However, if it's an outer join and the subexpression appears in the nullable-side input, this is potentially incorrect for apparently-matching subexpressions that came from above the outer join (ie, targetlist and qpqual expressions), because the executor will treat the subexpression value as NULL when maybe it should not be. The case is fairly hard to hit because (a) you need a non-strict subexpression (else NULL is correct), and (b) we don't usually compute expressions in the outputs of non-toplevel plan nodes. But we might do so if the expressions are sort keys for a mergejoin, for example. Probably in the long run we should make a more explicit distinction between Vars appearing above and below an outer join, but that will be a major planner redesign and not at all back-patchable. For the moment, just hack set_join_references so that it will not match any non-Var expressions coming from nullable inputs to expressions that came from above the join. (This is somewhat overkill, in that a strict expression could still be matched, but it doesn't seem worth the effort to check that.) Per report from Qingqing Zhou. The added regression test case is based on his example. This has been broken for a very long time, so back-patch to all active branches.
1 parent 3b82837 commit 3b5d671

File tree

3 files changed

+121
-14
lines changed

3 files changed

+121
-14
lines changed

src/backend/optimizer/plan/setrefs.c

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1027,19 +1027,13 @@ set_join_references(PlannerGlobal *glob, Join *join, int rtoffset)
10271027
outer_itlist = build_tlist_index(outer_plan->targetlist);
10281028
inner_itlist = build_tlist_index(inner_plan->targetlist);
10291029

1030-
/* All join plans have tlist, qual, and joinqual */
1031-
join->plan.targetlist = fix_join_expr(glob,
1032-
join->plan.targetlist,
1033-
outer_itlist,
1034-
inner_itlist,
1035-
(Index) 0,
1036-
rtoffset);
1037-
join->plan.qual = fix_join_expr(glob,
1038-
join->plan.qual,
1039-
outer_itlist,
1040-
inner_itlist,
1041-
(Index) 0,
1042-
rtoffset);
1030+
/*
1031+
* First process the joinquals (including merge or hash clauses). These
1032+
* are logically below the join so they can always use all values
1033+
* available from the input tlists. It's okay to also handle
1034+
* NestLoopParams now, because those couldn't refer to nullable
1035+
* subexpressions.
1036+
*/
10431037
join->joinqual = fix_join_expr(glob,
10441038
join->joinqual,
10451039
outer_itlist,
@@ -1090,6 +1084,49 @@ set_join_references(PlannerGlobal *glob, Join *join, int rtoffset)
10901084
rtoffset);
10911085
}
10921086

1087+
/*
1088+
* Now we need to fix up the targetlist and qpqual, which are logically
1089+
* above the join. This means they should not re-use any input expression
1090+
* that was computed in the nullable side of an outer join. Vars and
1091+
* PlaceHolderVars are fine, so we can implement this restriction just by
1092+
* clearing has_non_vars in the indexed_tlist structs.
1093+
*
1094+
* XXX This is a grotty workaround for the fact that we don't clearly
1095+
* distinguish between a Var appearing below an outer join and the "same"
1096+
* Var appearing above it. If we did, we'd not need to hack the matching
1097+
* rules this way.
1098+
*/
1099+
switch (join->jointype)
1100+
{
1101+
case JOIN_LEFT:
1102+
case JOIN_SEMI:
1103+
case JOIN_ANTI:
1104+
inner_itlist->has_non_vars = false;
1105+
break;
1106+
case JOIN_RIGHT:
1107+
outer_itlist->has_non_vars = false;
1108+
break;
1109+
case JOIN_FULL:
1110+
outer_itlist->has_non_vars = false;
1111+
inner_itlist->has_non_vars = false;
1112+
break;
1113+
default:
1114+
break;
1115+
}
1116+
1117+
join->plan.targetlist = fix_join_expr(glob,
1118+
join->plan.targetlist,
1119+
outer_itlist,
1120+
inner_itlist,
1121+
(Index) 0,
1122+
rtoffset);
1123+
join->plan.qual = fix_join_expr(glob,
1124+
join->plan.qual,
1125+
outer_itlist,
1126+
inner_itlist,
1127+
(Index) 0,
1128+
rtoffset);
1129+
10931130
pfree(outer_itlist);
10941131
pfree(inner_itlist);
10951132
}
@@ -1365,7 +1402,9 @@ search_indexed_tlist_for_var(Var *var, indexed_tlist *itlist,
13651402
* If no match, return NULL.
13661403
*
13671404
* NOTE: it is a waste of time to call this unless itlist->has_ph_vars or
1368-
* itlist->has_non_vars
1405+
* itlist->has_non_vars. Furthermore, set_join_references() relies on being
1406+
* able to prevent matching of non-Vars by clearing itlist->has_non_vars,
1407+
* so there's a correctness reason not to call it unless that's set.
13691408
*/
13701409
static Var *
13711410
search_indexed_tlist_for_non_var(Node *node,

src/test/regress/expected/join.out

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2788,6 +2788,54 @@ explain (costs off)
27882788
Index Cond: (unique2 = 42)
27892789
(6 rows)
27902790

2791+
--
2792+
-- test that quals attached to an outer join have correct semantics,
2793+
-- specifically that they don't re-use expressions computed below the join;
2794+
-- we force a mergejoin so that coalesce(b.q1, 1) appears as a join input
2795+
--
2796+
set enable_hashjoin to off;
2797+
set enable_nestloop to off;
2798+
explain (verbose, costs off)
2799+
select a.q2, b.q1
2800+
from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1)
2801+
where coalesce(b.q1, 1) > 0;
2802+
QUERY PLAN
2803+
-------------------------------------------------------
2804+
Merge Left Join
2805+
Output: a.q2, b.q1
2806+
Merge Cond: (a.q2 = (COALESCE(b.q1, 1::bigint)))
2807+
Filter: (COALESCE(b.q1, 1::bigint) > 0)
2808+
-> Sort
2809+
Output: a.q2
2810+
Sort Key: a.q2
2811+
-> Seq Scan on public.int8_tbl a
2812+
Output: a.q2
2813+
-> Sort
2814+
Output: b.q1, (COALESCE(b.q1, 1::bigint))
2815+
Sort Key: (COALESCE(b.q1, 1::bigint))
2816+
-> Seq Scan on public.int8_tbl b
2817+
Output: b.q1, COALESCE(b.q1, 1::bigint)
2818+
(14 rows)
2819+
2820+
select a.q2, b.q1
2821+
from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1)
2822+
where coalesce(b.q1, 1) > 0;
2823+
q2 | q1
2824+
-------------------+------------------
2825+
-4567890123456789 |
2826+
123 | 123
2827+
123 | 123
2828+
456 |
2829+
4567890123456789 | 4567890123456789
2830+
4567890123456789 | 4567890123456789
2831+
4567890123456789 | 4567890123456789
2832+
4567890123456789 | 4567890123456789
2833+
4567890123456789 | 4567890123456789
2834+
4567890123456789 | 4567890123456789
2835+
(10 rows)
2836+
2837+
reset enable_hashjoin;
2838+
reset enable_nestloop;
27912839
--
27922840
-- test join removal
27932841
--

src/test/regress/sql/join.sql

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -750,6 +750,26 @@ explain (costs off)
750750
explain (costs off)
751751
select * from tenk1 a full join tenk1 b using(unique2) where unique2 = 42;
752752

753+
--
754+
-- test that quals attached to an outer join have correct semantics,
755+
-- specifically that they don't re-use expressions computed below the join;
756+
-- we force a mergejoin so that coalesce(b.q1, 1) appears as a join input
757+
--
758+
759+
set enable_hashjoin to off;
760+
set enable_nestloop to off;
761+
762+
explain (verbose, costs off)
763+
select a.q2, b.q1
764+
from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1)
765+
where coalesce(b.q1, 1) > 0;
766+
select a.q2, b.q1
767+
from int8_tbl a left join int8_tbl b on a.q2 = coalesce(b.q1, 1)
768+
where coalesce(b.q1, 1) > 0;
769+
770+
reset enable_hashjoin;
771+
reset enable_nestloop;
772+
753773
--
754774
-- test join removal
755775
--

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy