Skip to content

Commit 9f13376

Browse files
committed
Pull up ANY-SUBLINK with the necessary lateral support.
For ANY-SUBLINK, we adopted a two-stage pull-up approach to handle different types of scenarios. In the first stage, the sublink is pulled up as a subquery. Because of this, when writing this code, we did not have the ability to perform lateral joins, and therefore, we were unable to pull up Var with varlevelsup=1. Now that we have the ability to use lateral joins, we can eliminate this limitation. Author: Andy Fan <zhihui.fan1213@gmail.com> Author: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Tom Lane <tgl@sss.pgh.pa.us> Reviewed-by: Richard Guo <guofenglinux@gmail.com> Reviewed-by: Alena Rybakina <lena.ribackina@yandex.ru> Reviewed-by: Andrey Lepikhov <a.lepikhov@postgrespro.ru>
1 parent 995d400 commit 9f13376

File tree

7 files changed

+192
-20
lines changed

7 files changed

+192
-20
lines changed

contrib/postgres_fdw/expected/postgres_fdw.out

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11894,23 +11894,23 @@ CREATE FOREIGN TABLE foreign_tbl (b int)
1189411894
CREATE FOREIGN TABLE foreign_tbl2 () INHERITS (foreign_tbl)
1189511895
SERVER loopback OPTIONS (table_name 'base_tbl');
1189611896
EXPLAIN (VERBOSE, COSTS OFF)
11897-
SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
11897+
SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl);
1189811898
QUERY PLAN
1189911899
-----------------------------------------------------------------------------
1190011900
Seq Scan on public.base_tbl
1190111901
Output: base_tbl.a
1190211902
Filter: (SubPlan 1)
1190311903
SubPlan 1
1190411904
-> Result
11905-
Output: base_tbl.a
11905+
Output: base_tbl.a, (random() > '0'::double precision)
1190611906
-> Append
1190711907
-> Async Foreign Scan on public.foreign_tbl foreign_tbl_1
1190811908
Remote SQL: SELECT NULL FROM public.base_tbl
1190911909
-> Async Foreign Scan on public.foreign_tbl2 foreign_tbl_2
1191011910
Remote SQL: SELECT NULL FROM public.base_tbl
1191111911
(11 rows)
1191211912

11913-
SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
11913+
SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl);
1191411914
a
1191511915
---
1191611916
1

contrib/postgres_fdw/sql/postgres_fdw.sql

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3988,8 +3988,8 @@ CREATE FOREIGN TABLE foreign_tbl2 () INHERITS (foreign_tbl)
39883988
SERVER loopback OPTIONS (table_name 'base_tbl');
39893989

39903990
EXPLAIN (VERBOSE, COSTS OFF)
3991-
SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
3992-
SELECT a FROM base_tbl WHERE a IN (SELECT a FROM foreign_tbl);
3991+
SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl);
3992+
SELECT a FROM base_tbl WHERE (a, random() > 0) IN (SELECT a, random() > 0 FROM foreign_tbl);
39933993

39943994
-- Clean up
39953995
DROP FOREIGN TABLE foreign_tbl CASCADE;

src/backend/optimizer/plan/subselect.c

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1278,14 +1278,23 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
12781278
List *subquery_vars;
12791279
Node *quals;
12801280
ParseState *pstate;
1281+
Relids sub_ref_outer_relids;
1282+
bool use_lateral;
12811283

12821284
Assert(sublink->subLinkType == ANY_SUBLINK);
12831285

12841286
/*
1285-
* The sub-select must not refer to any Vars of the parent query. (Vars of
1286-
* higher levels should be okay, though.)
1287+
* If the sub-select refers to any Vars of the parent query, we so let's
1288+
* considering it as LATERAL. (Vars of higher levels don't matter here.)
12871289
*/
1288-
if (contain_vars_of_level((Node *) subselect, 1))
1290+
sub_ref_outer_relids = pull_varnos_of_level(NULL, (Node *) subselect, 1);
1291+
use_lateral = !bms_is_empty(sub_ref_outer_relids);
1292+
1293+
/*
1294+
* Check that sub-select refers nothing outside of available_rels of the
1295+
* parent query.
1296+
*/
1297+
if (!bms_is_subset(sub_ref_outer_relids, available_rels))
12891298
return NULL;
12901299

12911300
/*
@@ -1323,7 +1332,7 @@ convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink,
13231332
nsitem = addRangeTableEntryForSubquery(pstate,
13241333
subselect,
13251334
makeAlias("ANY_subquery", NIL),
1326-
false,
1335+
use_lateral,
13271336
false);
13281337
rte = nsitem->p_rte;
13291338
parse->rtable = lappend(parse->rtable, rte);

src/test/regress/expected/join.out

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5277,7 +5277,7 @@ reset enable_nestloop;
52775277
explain (costs off)
52785278
select a.unique1, b.unique2
52795279
from onek a left join onek b on a.unique1 = b.unique2
5280-
where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
5280+
where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
52815281
QUERY PLAN
52825282
----------------------------------------------------------
52835283
Hash Join
@@ -5293,7 +5293,7 @@ select a.unique1, b.unique2
52935293

52945294
select a.unique1, b.unique2
52955295
from onek a left join onek b on a.unique1 = b.unique2
5296-
where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
5296+
where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
52975297
unique1 | unique2
52985298
---------+---------
52995299
123 | 123
@@ -8210,12 +8210,12 @@ select * from (values (0), (1)) v(id),
82108210
lateral (select * from int8_tbl t1,
82118211
lateral (select * from
82128212
(select * from int8_tbl t2
8213-
where q1 = any (select q2 from int8_tbl t3
8213+
where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
82148214
where q2 = (select greatest(t1.q1,t2.q2))
82158215
and (select v.id=0)) offset 0) ss2) ss
82168216
where t1.q1 = ss.q2) ss0;
8217-
QUERY PLAN
8218-
----------------------------------------------------------------------
8217+
QUERY PLAN
8218+
-------------------------------------------------------------------------------
82198219
Nested Loop
82208220
Output: "*VALUES*".column1, t1.q1, t1.q2, ss2.q1, ss2.q2
82218221
-> Seq Scan on public.int8_tbl t1
@@ -8232,7 +8232,7 @@ lateral (select * from int8_tbl t1,
82328232
Filter: (SubPlan 3)
82338233
SubPlan 3
82348234
-> Result
8235-
Output: t3.q2
8235+
Output: t3.q2, (random() > '0'::double precision)
82368236
One-Time Filter: $4
82378237
InitPlan 1 (returns $2)
82388238
-> Result
@@ -8249,7 +8249,7 @@ select * from (values (0), (1)) v(id),
82498249
lateral (select * from int8_tbl t1,
82508250
lateral (select * from
82518251
(select * from int8_tbl t2
8252-
where q1 = any (select q2 from int8_tbl t3
8252+
where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
82538253
where q2 = (select greatest(t1.q1,t2.q2))
82548254
and (select v.id=0)) offset 0) ss2) ss
82558255
where t1.q1 = ss.q2) ss0;

src/test/regress/expected/subselect.out

Lines changed: 126 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1926,3 +1926,129 @@ select * from x for update;
19261926
Output: subselect_tbl.f1, subselect_tbl.f2, subselect_tbl.f3
19271927
(2 rows)
19281928

1929+
-- Pull-up the direct-correlated ANY_SUBLINK
1930+
explain (costs off)
1931+
select * from tenk1 A where hundred in (select hundred from tenk2 B where B.odd = A.odd);
1932+
QUERY PLAN
1933+
------------------------------------------------------------
1934+
Hash Join
1935+
Hash Cond: ((a.odd = b.odd) AND (a.hundred = b.hundred))
1936+
-> Seq Scan on tenk1 a
1937+
-> Hash
1938+
-> HashAggregate
1939+
Group Key: b.odd, b.hundred
1940+
-> Seq Scan on tenk2 b
1941+
(7 rows)
1942+
1943+
explain (costs off)
1944+
select * from tenk1 A where exists
1945+
(select 1 from tenk2 B
1946+
where A.hundred in (select C.hundred FROM tenk2 C
1947+
WHERE c.odd = b.odd));
1948+
QUERY PLAN
1949+
---------------------------------
1950+
Nested Loop Semi Join
1951+
Join Filter: (SubPlan 1)
1952+
-> Seq Scan on tenk1 a
1953+
-> Materialize
1954+
-> Seq Scan on tenk2 b
1955+
SubPlan 1
1956+
-> Seq Scan on tenk2 c
1957+
Filter: (odd = b.odd)
1958+
(8 rows)
1959+
1960+
-- we should only try to pull up the sublink into RHS of a left join
1961+
-- but a.hundred is not avaiable.
1962+
explain (costs off)
1963+
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
1964+
ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
1965+
QUERY PLAN
1966+
---------------------------------
1967+
Nested Loop Left Join
1968+
Join Filter: (SubPlan 1)
1969+
-> Seq Scan on tenk1 a
1970+
-> Materialize
1971+
-> Seq Scan on tenk2 b
1972+
SubPlan 1
1973+
-> Seq Scan on tenk2 c
1974+
Filter: (odd = b.odd)
1975+
(8 rows)
1976+
1977+
-- we should only try to pull up the sublink into RHS of a left join
1978+
-- but a.odd is not avaiable for this.
1979+
explain (costs off)
1980+
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
1981+
ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = a.odd);
1982+
QUERY PLAN
1983+
---------------------------------
1984+
Nested Loop Left Join
1985+
Join Filter: (SubPlan 1)
1986+
-> Seq Scan on tenk1 a
1987+
-> Materialize
1988+
-> Seq Scan on tenk2 b
1989+
SubPlan 1
1990+
-> Seq Scan on tenk2 c
1991+
Filter: (odd = a.odd)
1992+
(8 rows)
1993+
1994+
-- should be able to pull up since all the references is available
1995+
explain (costs off)
1996+
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
1997+
ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
1998+
QUERY PLAN
1999+
------------------------------------------------------------------------
2000+
Nested Loop Left Join
2001+
-> Seq Scan on tenk1 a
2002+
-> Materialize
2003+
-> Hash Join
2004+
Hash Cond: ((b.odd = c.odd) AND (b.hundred = c.hundred))
2005+
-> Seq Scan on tenk2 b
2006+
-> Hash
2007+
-> HashAggregate
2008+
Group Key: c.odd, c.hundred
2009+
-> Seq Scan on tenk2 c
2010+
(10 rows)
2011+
2012+
-- we can pull up the sublink into the inner JoinExpr.
2013+
explain (costs off)
2014+
SELECT * FROM tenk1 A INNER JOIN tenk2 B
2015+
ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
2016+
QUERY PLAN
2017+
-------------------------------------------------
2018+
Hash Join
2019+
Hash Cond: (c.odd = b.odd)
2020+
-> Hash Join
2021+
Hash Cond: (a.hundred = c.hundred)
2022+
-> Seq Scan on tenk1 a
2023+
-> Hash
2024+
-> HashAggregate
2025+
Group Key: c.odd, c.hundred
2026+
-> Seq Scan on tenk2 c
2027+
-> Hash
2028+
-> Seq Scan on tenk2 b
2029+
(11 rows)
2030+
2031+
-- we can pull up the aggregate sublink into RHS of a left join.
2032+
explain (costs off)
2033+
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
2034+
ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);
2035+
QUERY PLAN
2036+
---------------------------------------------------------------------------------------
2037+
Nested Loop Left Join
2038+
-> Seq Scan on tenk1 a
2039+
-> Materialize
2040+
-> Nested Loop
2041+
-> Seq Scan on tenk2 b
2042+
-> Memoize
2043+
Cache Key: b.hundred, b.odd
2044+
Cache Mode: binary
2045+
-> Subquery Scan on "ANY_subquery"
2046+
Filter: (b.hundred = "ANY_subquery".min)
2047+
-> Result
2048+
InitPlan 1 (returns $1)
2049+
-> Limit
2050+
-> Index Scan using tenk2_hundred on tenk2 c
2051+
Index Cond: (hundred IS NOT NULL)
2052+
Filter: (odd = b.odd)
2053+
(16 rows)
2054+

src/test/regress/sql/join.sql

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1864,11 +1864,11 @@ reset enable_nestloop;
18641864
explain (costs off)
18651865
select a.unique1, b.unique2
18661866
from onek a left join onek b on a.unique1 = b.unique2
1867-
where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
1867+
where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
18681868

18691869
select a.unique1, b.unique2
18701870
from onek a left join onek b on a.unique1 = b.unique2
1871-
where b.unique2 = any (select q1 from int8_tbl c where c.q1 < b.unique1);
1871+
where (b.unique2, random() > 0) = any (select q1, random() > 0 from int8_tbl c where c.q1 < b.unique1);
18721872

18731873
--
18741874
-- test full-join strength reduction
@@ -3038,7 +3038,7 @@ select * from (values (0), (1)) v(id),
30383038
lateral (select * from int8_tbl t1,
30393039
lateral (select * from
30403040
(select * from int8_tbl t2
3041-
where q1 = any (select q2 from int8_tbl t3
3041+
where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
30423042
where q2 = (select greatest(t1.q1,t2.q2))
30433043
and (select v.id=0)) offset 0) ss2) ss
30443044
where t1.q1 = ss.q2) ss0;
@@ -3047,7 +3047,7 @@ select * from (values (0), (1)) v(id),
30473047
lateral (select * from int8_tbl t1,
30483048
lateral (select * from
30493049
(select * from int8_tbl t2
3050-
where q1 = any (select q2 from int8_tbl t3
3050+
where (q1, random() > 0) = any (select q2, random() > 0 from int8_tbl t3
30513051
where q2 = (select greatest(t1.q1,t2.q2))
30523052
and (select v.id=0)) offset 0) ss2) ss
30533053
where t1.q1 = ss.q2) ss0;

src/test/regress/sql/subselect.sql

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -968,3 +968,40 @@ select * from (with x as (select 2 as y) select * from x) ss;
968968
explain (verbose, costs off)
969969
with x as (select * from subselect_tbl)
970970
select * from x for update;
971+
972+
-- Pull-up the direct-correlated ANY_SUBLINK
973+
explain (costs off)
974+
select * from tenk1 A where hundred in (select hundred from tenk2 B where B.odd = A.odd);
975+
976+
explain (costs off)
977+
select * from tenk1 A where exists
978+
(select 1 from tenk2 B
979+
where A.hundred in (select C.hundred FROM tenk2 C
980+
WHERE c.odd = b.odd));
981+
982+
-- we should only try to pull up the sublink into RHS of a left join
983+
-- but a.hundred is not avaiable.
984+
explain (costs off)
985+
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
986+
ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
987+
988+
-- we should only try to pull up the sublink into RHS of a left join
989+
-- but a.odd is not avaiable for this.
990+
explain (costs off)
991+
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
992+
ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = a.odd);
993+
994+
-- should be able to pull up since all the references is available
995+
explain (costs off)
996+
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
997+
ON B.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
998+
999+
-- we can pull up the sublink into the inner JoinExpr.
1000+
explain (costs off)
1001+
SELECT * FROM tenk1 A INNER JOIN tenk2 B
1002+
ON A.hundred in (SELECT c.hundred FROM tenk2 C WHERE c.odd = b.odd);
1003+
1004+
-- we can pull up the aggregate sublink into RHS of a left join.
1005+
explain (costs off)
1006+
SELECT * FROM tenk1 A LEFT JOIN tenk2 B
1007+
ON B.hundred in (SELECT min(c.hundred) FROM tenk2 C WHERE c.odd = b.odd);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy