Skip to content

Commit 46d9be5

Browse files
committed
Disallow partitionwise grouping when collations don't match
If the collation of any grouping column doesn’t match the collation of the corresponding partition key, partitionwise grouping can yield incorrect results. For example, rows that would be grouped under the grouping collation may end up in different partitions under the partitioning collation. In such cases, full partitionwise grouping would produce results that differ from those without partitionwise grouping, so disallowed that. Partial partitionwise aggregation is still allowed, as the Finalize step reconciles partition-level aggregates with grouping requirements across all partitions, ensuring that the final output remains consistent. This commit also fixes group_by_has_partkey() by ensuring the RelabelType node is stripped from grouping expressions when matching them to partition key expressions to avoid false mismatches. Bug: #18568 Reported-by: Webbo Han <1105066510@qq.com> Author: Webbo Han <1105066510@qq.com> Reviewed-by: Tender Wang <tndrwang@gmail.com> Reviewed-by: Aleksander Alekseev <aleksander@timescale.com> Reviewed-by: Jian He <jian.universality@gmail.com> Discussion: https://postgr.es/m/18568-2a9afb6b9f7e6ed3@postgresql.org Discussion: https://postgr.es/m/tencent_9D9103CDA420C07768349CC1DFF88465F90A@qq.com Discussion: https://postgr.es/m/CAHewXNno_HKiQ6PqyLYfuqDtwp7KKHZiH1J7Pqyz0nr+PS2Dwg@mail.gmail.com Backpatch-through: 12
1 parent cae459d commit 46d9be5

File tree

3 files changed

+163
-8
lines changed

3 files changed

+163
-8
lines changed

src/backend/optimizer/plan/planner.c

Lines changed: 36 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4074,9 +4074,10 @@ create_ordinary_grouping_paths(PlannerInfo *root, RelOptInfo *input_rel,
40744074
* If this is the topmost relation or if the parent relation is doing
40754075
* full partitionwise aggregation, then we can do full partitionwise
40764076
* aggregation provided that the GROUP BY clause contains all of the
4077-
* partitioning columns at this level. Otherwise, we can do at most
4078-
* partial partitionwise aggregation. But if partial aggregation is
4079-
* not supported in general then we can't use it for partitionwise
4077+
* partitioning columns at this level and the collation used by GROUP
4078+
* BY matches the partitioning collation. Otherwise, we can do at
4079+
* most partial partitionwise aggregation. But if partial aggregation
4080+
* is not supported in general then we can't use it for partitionwise
40804081
* aggregation either.
40814082
*/
40824083
if (extra->patype == PARTITIONWISE_AGGREGATE_FULL &&
@@ -7362,8 +7363,8 @@ create_partitionwise_grouping_paths(PlannerInfo *root,
73627363
/*
73637364
* group_by_has_partkey
73647365
*
7365-
* Returns true, if all the partition keys of the given relation are part of
7366-
* the GROUP BY clauses, false otherwise.
7366+
* Returns true if all the partition keys of the given relation are part of
7367+
* the GROUP BY clauses, including having matching collation, false otherwise.
73677368
*/
73687369
static bool
73697370
group_by_has_partkey(RelOptInfo *input_rel,
@@ -7391,13 +7392,40 @@ group_by_has_partkey(RelOptInfo *input_rel,
73917392

73927393
foreach(lc, partexprs)
73937394
{
7395+
ListCell *lg;
73947396
Expr *partexpr = lfirst(lc);
7397+
Oid partcoll = input_rel->part_scheme->partcollation[cnt];
73957398

7396-
if (list_member(groupexprs, partexpr))
7399+
foreach(lg, groupexprs)
73977400
{
7398-
found = true;
7399-
break;
7401+
Expr *groupexpr = lfirst(lg);
7402+
Oid groupcoll = exprCollation((Node *) groupexpr);
7403+
7404+
/*
7405+
* Note: we can assume there is at most one RelabelType node;
7406+
* eval_const_expressions() will have simplified if more than
7407+
* one.
7408+
*/
7409+
if (IsA(groupexpr, RelabelType))
7410+
groupexpr = ((RelabelType *) groupexpr)->arg;
7411+
7412+
if (equal(groupexpr, partexpr))
7413+
{
7414+
/*
7415+
* Reject a match if the grouping collation does not match
7416+
* the partitioning collation.
7417+
*/
7418+
if (OidIsValid(partcoll) && OidIsValid(groupcoll) &&
7419+
partcoll != groupcoll)
7420+
return false;
7421+
7422+
found = true;
7423+
break;
7424+
}
74007425
}
7426+
7427+
if (found)
7428+
break;
74017429
}
74027430

74037431
/*

src/test/regress/expected/collate.icu.utf8.out

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1943,6 +1943,96 @@ SELECT (SELECT count(*) FROM test33_0) <> (SELECT count(*) FROM test33_1);
19431943
t
19441944
(1 row)
19451945

1946+
--
1947+
-- Bug #18568
1948+
--
1949+
-- Partitionwise aggregate (full or partial) should not be used when a
1950+
-- partition key's collation doesn't match that of the GROUP BY column it is
1951+
-- matched with.
1952+
SET max_parallel_workers_per_gather TO 0;
1953+
SET enable_incremental_sort TO off;
1954+
CREATE TABLE pagg_tab3 (a text, c text collate case_insensitive) PARTITION BY LIST(c collate "C");
1955+
CREATE TABLE pagg_tab3_p1 PARTITION OF pagg_tab3 FOR VALUES IN ('a', 'b');
1956+
CREATE TABLE pagg_tab3_p2 PARTITION OF pagg_tab3 FOR VALUES IN ('B', 'A');
1957+
INSERT INTO pagg_tab3 SELECT i % 4 + 1, substr('abAB', (i % 4) + 1 , 1) FROM generate_series(0, 19) i;
1958+
ANALYZE pagg_tab3;
1959+
SET enable_partitionwise_aggregate TO false;
1960+
EXPLAIN (COSTS OFF)
1961+
SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1;
1962+
QUERY PLAN
1963+
-----------------------------------------------------------
1964+
Sort
1965+
Sort Key: (upper(pagg_tab3.c)) COLLATE case_insensitive
1966+
-> HashAggregate
1967+
Group Key: pagg_tab3.c
1968+
-> Append
1969+
-> Seq Scan on pagg_tab3_p2 pagg_tab3_1
1970+
-> Seq Scan on pagg_tab3_p1 pagg_tab3_2
1971+
(7 rows)
1972+
1973+
SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1;
1974+
upper | count
1975+
-------+-------
1976+
A | 10
1977+
B | 10
1978+
(2 rows)
1979+
1980+
-- No "full" partitionwise aggregation allowed, though "partial" is allowed.
1981+
SET enable_partitionwise_aggregate TO true;
1982+
EXPLAIN (COSTS OFF)
1983+
SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1;
1984+
QUERY PLAN
1985+
--------------------------------------------------------------
1986+
Sort
1987+
Sort Key: (upper(pagg_tab3.c)) COLLATE case_insensitive
1988+
-> Finalize HashAggregate
1989+
Group Key: pagg_tab3.c
1990+
-> Append
1991+
-> Partial HashAggregate
1992+
Group Key: pagg_tab3.c
1993+
-> Seq Scan on pagg_tab3_p2 pagg_tab3
1994+
-> Partial HashAggregate
1995+
Group Key: pagg_tab3_1.c
1996+
-> Seq Scan on pagg_tab3_p1 pagg_tab3_1
1997+
(11 rows)
1998+
1999+
SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1;
2000+
upper | count
2001+
-------+-------
2002+
A | 10
2003+
B | 10
2004+
(2 rows)
2005+
2006+
-- OK to use full partitionwise aggregate after changing the GROUP BY column's
2007+
-- collation to be the same as that of the partition key.
2008+
EXPLAIN (COSTS OFF)
2009+
SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
2010+
QUERY PLAN
2011+
--------------------------------------------------------
2012+
Sort
2013+
Sort Key: ((pagg_tab3.c)::text) COLLATE "C"
2014+
-> Append
2015+
-> HashAggregate
2016+
Group Key: (pagg_tab3.c)::text
2017+
-> Seq Scan on pagg_tab3_p2 pagg_tab3
2018+
-> HashAggregate
2019+
Group Key: (pagg_tab3_1.c)::text
2020+
-> Seq Scan on pagg_tab3_p1 pagg_tab3_1
2021+
(9 rows)
2022+
2023+
SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
2024+
c | count
2025+
---+-------
2026+
A | 5
2027+
B | 5
2028+
a | 5
2029+
b | 5
2030+
(4 rows)
2031+
2032+
DROP TABLE pagg_tab3;
2033+
RESET enable_partitionwise_aggregate;
2034+
RESET max_parallel_workers_per_gather;
2035+
RESET enable_incremental_sort;
19462036
-- cleanup
19472037
RESET search_path;
19482038
SET client_min_messages TO warning;

src/test/regress/sql/collate.icu.utf8.sql

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -739,6 +739,43 @@ INSERT INTO test33 VALUES (2, 'DEF');
739739
-- they end up in the same partition (but it's platform-dependent which one)
740740
SELECT (SELECT count(*) FROM test33_0) <> (SELECT count(*) FROM test33_1);
741741

742+
--
743+
-- Bug #18568
744+
--
745+
-- Partitionwise aggregate (full or partial) should not be used when a
746+
-- partition key's collation doesn't match that of the GROUP BY column it is
747+
-- matched with.
748+
SET max_parallel_workers_per_gather TO 0;
749+
SET enable_incremental_sort TO off;
750+
751+
CREATE TABLE pagg_tab3 (a text, c text collate case_insensitive) PARTITION BY LIST(c collate "C");
752+
CREATE TABLE pagg_tab3_p1 PARTITION OF pagg_tab3 FOR VALUES IN ('a', 'b');
753+
CREATE TABLE pagg_tab3_p2 PARTITION OF pagg_tab3 FOR VALUES IN ('B', 'A');
754+
INSERT INTO pagg_tab3 SELECT i % 4 + 1, substr('abAB', (i % 4) + 1 , 1) FROM generate_series(0, 19) i;
755+
ANALYZE pagg_tab3;
756+
757+
SET enable_partitionwise_aggregate TO false;
758+
EXPLAIN (COSTS OFF)
759+
SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1;
760+
SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1;
761+
762+
-- No "full" partitionwise aggregation allowed, though "partial" is allowed.
763+
SET enable_partitionwise_aggregate TO true;
764+
EXPLAIN (COSTS OFF)
765+
SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1;
766+
SELECT upper(c collate case_insensitive), count(c) FROM pagg_tab3 GROUP BY c collate case_insensitive ORDER BY 1;
767+
768+
-- OK to use full partitionwise aggregate after changing the GROUP BY column's
769+
-- collation to be the same as that of the partition key.
770+
EXPLAIN (COSTS OFF)
771+
SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
772+
SELECT c collate "C", count(c) FROM pagg_tab3 GROUP BY c collate "C" ORDER BY 1;
773+
774+
DROP TABLE pagg_tab3;
775+
776+
RESET enable_partitionwise_aggregate;
777+
RESET max_parallel_workers_per_gather;
778+
RESET enable_incremental_sort;
742779

743780
-- cleanup
744781
RESET search_path;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy