Skip to content

Commit 9c47574

Browse files
committed
Disallow partitionwise join when collations don't match
If the collation of any join key column doesn’t match the collation of the corresponding partition key, partitionwise joins can yield incorrect results. For example, rows that would match under the join key collation might be located in different partitions due to the partitioning collation. In such cases, a partitionwise join would yield different results from a non-partitionwise join, so disallow it in such cases. Reported-by: Tender Wang <tndrwang@gmail.com> Author: Jian He <jian.universality@gmail.com> Reviewed-by: Tender Wang <tndrwang@gmail.com> Reviewed-by: Junwang Zhao <zhjwpku@gmail.com> Discussion: https://postgr.es/m/CAHewXNno_HKiQ6PqyLYfuqDtwp7KKHZiH1J7Pqyz0nr+PS2Dwg@mail.gmail.com Backpatch-through: 12
1 parent 46d9be5 commit 9c47574

File tree

3 files changed

+144
-0
lines changed

3 files changed

+144
-0
lines changed

src/backend/optimizer/path/joinrels.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1645,6 +1645,10 @@ have_partkey_equi_join(RelOptInfo *joinrel,
16451645
if (ipk1 != ipk2)
16461646
continue;
16471647

1648+
/* Reject if the partition key collation differs from the clause's. */
1649+
if (rel1->part_scheme->partcollation[ipk1] != opexpr->inputcollid)
1650+
return false;
1651+
16481652
/*
16491653
* The clause allows partitionwise join if only it uses the same
16501654
* operator family as that specified by the partition key.

src/test/regress/expected/collate.icu.utf8.out

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2029,6 +2029,124 @@ SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
20292029
b | 5
20302030
(4 rows)
20312031

2032+
-- Partitionwise join should not be allowed too when the collation used by the
2033+
-- join keys doesn't match the partition key collation.
2034+
SET enable_partitionwise_join TO false;
2035+
EXPLAIN (COSTS OFF)
2036+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
2037+
QUERY PLAN
2038+
-------------------------------------------------------------
2039+
Sort
2040+
Sort Key: t1.c COLLATE "C"
2041+
-> HashAggregate
2042+
Group Key: t1.c
2043+
-> Hash Join
2044+
Hash Cond: (t1.c = t2.c)
2045+
-> Append
2046+
-> Seq Scan on pagg_tab3_p2 t1_1
2047+
-> Seq Scan on pagg_tab3_p1 t1_2
2048+
-> Hash
2049+
-> Append
2050+
-> Seq Scan on pagg_tab3_p2 t2_1
2051+
-> Seq Scan on pagg_tab3_p1 t2_2
2052+
(13 rows)
2053+
2054+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
2055+
c | count
2056+
---+-------
2057+
A | 100
2058+
B | 100
2059+
(2 rows)
2060+
2061+
SET enable_partitionwise_join TO true;
2062+
EXPLAIN (COSTS OFF)
2063+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
2064+
QUERY PLAN
2065+
-------------------------------------------------------------
2066+
Sort
2067+
Sort Key: t1.c COLLATE "C"
2068+
-> HashAggregate
2069+
Group Key: t1.c
2070+
-> Hash Join
2071+
Hash Cond: (t1.c = t2.c)
2072+
-> Append
2073+
-> Seq Scan on pagg_tab3_p2 t1_1
2074+
-> Seq Scan on pagg_tab3_p1 t1_2
2075+
-> Hash
2076+
-> Append
2077+
-> Seq Scan on pagg_tab3_p2 t2_1
2078+
-> Seq Scan on pagg_tab3_p1 t2_2
2079+
(13 rows)
2080+
2081+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
2082+
c | count
2083+
---+-------
2084+
A | 100
2085+
B | 100
2086+
(2 rows)
2087+
2088+
-- OK when the join clause uses the same collation as the partition key.
2089+
EXPLAIN (COSTS OFF)
2090+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
2091+
QUERY PLAN
2092+
------------------------------------------------------------------
2093+
Sort
2094+
Sort Key: ((t1.c)::text) COLLATE "C"
2095+
-> Append
2096+
-> HashAggregate
2097+
Group Key: (t1.c)::text
2098+
-> Hash Join
2099+
Hash Cond: ((t1.c)::text = (t2.c)::text)
2100+
-> Seq Scan on pagg_tab3_p2 t1
2101+
-> Hash
2102+
-> Seq Scan on pagg_tab3_p2 t2
2103+
-> HashAggregate
2104+
Group Key: (t1_1.c)::text
2105+
-> Hash Join
2106+
Hash Cond: ((t1_1.c)::text = (t2_1.c)::text)
2107+
-> Seq Scan on pagg_tab3_p1 t1_1
2108+
-> Hash
2109+
-> Seq Scan on pagg_tab3_p1 t2_1
2110+
(17 rows)
2111+
2112+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
2113+
c | count
2114+
---+-------
2115+
A | 25
2116+
B | 25
2117+
a | 25
2118+
b | 25
2119+
(4 rows)
2120+
2121+
SET enable_partitionwise_join TO false;
2122+
EXPLAIN (COSTS OFF)
2123+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
2124+
QUERY PLAN
2125+
-------------------------------------------------------------
2126+
Sort
2127+
Sort Key: ((t1.c)::text) COLLATE "C"
2128+
-> HashAggregate
2129+
Group Key: (t1.c)::text
2130+
-> Hash Join
2131+
Hash Cond: ((t1.c)::text = (t2.c)::text)
2132+
-> Append
2133+
-> Seq Scan on pagg_tab3_p2 t1_1
2134+
-> Seq Scan on pagg_tab3_p1 t1_2
2135+
-> Hash
2136+
-> Append
2137+
-> Seq Scan on pagg_tab3_p2 t2_1
2138+
-> Seq Scan on pagg_tab3_p1 t2_2
2139+
(13 rows)
2140+
2141+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
2142+
c | count
2143+
---+-------
2144+
A | 25
2145+
B | 25
2146+
a | 25
2147+
b | 25
2148+
(4 rows)
2149+
20322150
DROP TABLE pagg_tab3;
20332151
RESET enable_partitionwise_aggregate;
20342152
RESET max_parallel_workers_per_gather;

src/test/regress/sql/collate.icu.utf8.sql

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,28 @@ EXPLAIN (COSTS OFF)
771771
SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
772772
SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
773773

774+
-- Partitionwise join should not be allowed too when the collation used by the
775+
-- join keys doesn't match the partition key collation.
776+
SET enable_partitionwise_join TO false;
777+
EXPLAIN (COSTS OFF)
778+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
779+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
780+
781+
SET enable_partitionwise_join TO true;
782+
EXPLAIN (COSTS OFF)
783+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
784+
SELECT t1.c, count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c GROUP BY 1 ORDER BY t1.c COLLATE "C";
785+
786+
-- OK when the join clause uses the same collation as the partition key.
787+
EXPLAIN (COSTS OFF)
788+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
789+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
790+
791+
SET enable_partitionwise_join TO false;
792+
EXPLAIN (COSTS OFF)
793+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
794+
SELECT t1.c COLLATE "C", count(t2.c) FROM pagg_tab3 t1 JOIN pagg_tab3 t2 ON t1.c = t2.c COLLATE "C" GROUP BY t1.c COLLATE "C" ORDER BY t1.c COLLATE "C";
795+
774796
DROP TABLE pagg_tab3;
775797

776798
RESET enable_partitionwise_aggregate;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy