Skip to content

Commit 2d26c4a

Browse files
committed
Fix get_useful_pathkeys_for_relation for volatile expressions
When considering Incremental Sort below a Gather Merge, we need to be a bit more careful when matching pathkeys to EC members. It's not enough to find a member whose Vars are all in the current relation's target; volatile expressions in particular need to be contained in the target, otherwise it's too early to use the pathkey. Reported-by: Jaime Casanova Author: James Coleman Reviewed-by: Tomas Vondra Backpatch-through: 13, where the incremental sort code was added Discussion: https://postgr.es/m/CAJGNTeNaxpXgBVcRhJX%2B2vSbq%2BF2kJqGBcvompmpvXb7pq%2BoFA%40mail.gmail.com
1 parent 936043c commit 2d26c4a

File tree

5 files changed

+207
-6
lines changed

5 files changed

+207
-6
lines changed

src/backend/optimizer/path/allpaths.c

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2756,7 +2756,8 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
27562756
/*
27572757
* Considering query_pathkeys is always worth it, because it might allow
27582758
* us to avoid a total sort when we have a partially presorted path
2759-
* available.
2759+
* available or to push the total sort into the parallel portion of the
2760+
* query.
27602761
*/
27612762
if (root->query_pathkeys)
27622763
{
@@ -2769,17 +2770,17 @@ get_useful_pathkeys_for_relation(PlannerInfo *root, RelOptInfo *rel)
27692770
EquivalenceClass *pathkey_ec = pathkey->pk_eclass;
27702771

27712772
/*
2772-
* We can only build an Incremental Sort for pathkeys which
2773-
* contain an EC member in the current relation, so ignore any
2774-
* suffix of the list as soon as we find a pathkey without an EC
2775-
* member the relation.
2773+
* We can only build a sort for pathkeys which contain an EC
2774+
* member in the current relation's target, so ignore any suffix
2775+
* of the list as soon as we find a pathkey without an EC member
2776+
* in the relation.
27762777
*
27772778
* By still returning the prefix of the pathkeys list that does
27782779
* meet criteria of EC membership in the current relation, we
27792780
* enable not just an incremental sort on the entirety of
27802781
* query_pathkeys but also incremental sort below a JOIN.
27812782
*/
2782-
if (!find_em_expr_for_rel(pathkey_ec, rel))
2783+
if (!find_em_expr_usable_for_sorting_rel(pathkey_ec, rel))
27832784
break;
27842785

27852786
npathkeys++;

src/backend/optimizer/path/equivclass.c

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -794,6 +794,76 @@ find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel)
794794
return NULL;
795795
}
796796

797+
/*
798+
* Find an equivalence class member expression that can be safely used by a
799+
* sort node on top of the provided relation. The rules here must match those
800+
* applied in prepare_sort_from_pathkeys.
801+
*/
802+
Expr *
803+
find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel)
804+
{
805+
ListCell *lc_em;
806+
807+
/*
808+
* If there is more than one equivalence member matching these
809+
* requirements we'll be content to choose any one of them.
810+
*/
811+
foreach(lc_em, ec->ec_members)
812+
{
813+
EquivalenceMember *em = lfirst(lc_em);
814+
Expr *em_expr = em->em_expr;
815+
PathTarget *target = rel->reltarget;
816+
ListCell *lc_target_expr;
817+
818+
/*
819+
* We shouldn't be trying to sort by an equivalence class that
820+
* contains a constant, so no need to consider such cases any further.
821+
*/
822+
if (em->em_is_const)
823+
continue;
824+
825+
/*
826+
* Any Vars in the equivalence class member need to come from this
827+
* relation. This is a superset of prepare_sort_from_pathkeys ignoring
828+
* child members unless they belong to the rel being sorted.
829+
*/
830+
if (!bms_is_subset(em->em_relids, rel->relids))
831+
continue;
832+
833+
/*
834+
* As long as the expression isn't volatile then
835+
* prepare_sort_from_pathkeys is able to generate a new target entry,
836+
* so there's no need to verify that one already exists.
837+
*/
838+
if (!ec->ec_has_volatile)
839+
return em->em_expr;
840+
841+
/*
842+
* If, however, it's volatile, we have to verify that the
843+
* equivalence member's expr is already generated in the
844+
* relation's target (we do strip relabels first from both
845+
* expressions, which is cheap and might allow us to match
846+
* more expressions).
847+
*/
848+
while (em_expr && IsA(em_expr, RelabelType))
849+
em_expr = ((RelabelType *) em_expr)->arg;
850+
851+
foreach(lc_target_expr, target->exprs)
852+
{
853+
Expr *target_expr = lfirst(lc_target_expr);
854+
855+
while (target_expr && IsA(target_expr, RelabelType))
856+
target_expr = ((RelabelType *) target_expr)->arg;
857+
858+
if (equal(target_expr, em_expr))
859+
return em->em_expr;
860+
}
861+
}
862+
863+
/* We didn't find any suitable equivalence class expression */
864+
return NULL;
865+
}
866+
797867
/*
798868
* generate_base_implied_equalities
799869
* Generate any restriction clauses that we can deduce from equivalence

src/include/optimizer/paths.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ extern EquivalenceClass *get_eclass_for_sort_expr(PlannerInfo *root,
135135
Relids rel,
136136
bool create_it);
137137
extern Expr *find_em_expr_for_rel(EquivalenceClass *ec, RelOptInfo *rel);
138+
extern Expr *find_em_expr_usable_for_sorting_rel(EquivalenceClass *ec, RelOptInfo *rel);
138139
extern void generate_base_implied_equalities(PlannerInfo *root);
139140
extern List *generate_join_implied_equalities(PlannerInfo *root,
140141
Relids join_relids,

src/test/regress/expected/incremental_sort.out

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1469,3 +1469,101 @@ explain (costs off) select * from t union select * from t order by 1,3;
14691469
(13 rows)
14701470

14711471
drop table t;
1472+
-- Sort pushdown can't go below where expressions are part of the rel target.
1473+
-- In particular this is interesting for volatile expressions which have to
1474+
-- go above joins since otherwise we'll incorrectly use expression evaluations
1475+
-- across multiple rows.
1476+
set enable_hashagg=off;
1477+
set enable_seqscan=off;
1478+
set enable_incremental_sort = off;
1479+
set parallel_tuple_cost=0;
1480+
set parallel_setup_cost=0;
1481+
set min_parallel_table_scan_size = 0;
1482+
set min_parallel_index_scan_size = 0;
1483+
-- Parallel sort below join.
1484+
explain (costs off) select distinct sub.unique1, stringu1
1485+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
1486+
QUERY PLAN
1487+
--------------------------------------------------------------------------
1488+
Unique
1489+
-> Nested Loop
1490+
-> Gather Merge
1491+
Workers Planned: 2
1492+
-> Sort
1493+
Sort Key: tenk1.unique1, tenk1.stringu1
1494+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1495+
-> Function Scan on generate_series
1496+
(8 rows)
1497+
1498+
explain (costs off) select sub.unique1, stringu1
1499+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
1500+
order by 1, 2;
1501+
QUERY PLAN
1502+
--------------------------------------------------------------------
1503+
Nested Loop
1504+
-> Gather Merge
1505+
Workers Planned: 2
1506+
-> Sort
1507+
Sort Key: tenk1.unique1, tenk1.stringu1
1508+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1509+
-> Function Scan on generate_series
1510+
(7 rows)
1511+
1512+
-- Parallel sort but with expression that can be safely generated at the base rel.
1513+
explain (costs off) select distinct sub.unique1, md5(stringu1)
1514+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
1515+
QUERY PLAN
1516+
----------------------------------------------------------------------------------------
1517+
Unique
1518+
-> Nested Loop
1519+
-> Gather Merge
1520+
Workers Planned: 2
1521+
-> Sort
1522+
Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C"
1523+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1524+
-> Function Scan on generate_series
1525+
(8 rows)
1526+
1527+
explain (costs off) select sub.unique1, md5(stringu1)
1528+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
1529+
order by 1, 2;
1530+
QUERY PLAN
1531+
----------------------------------------------------------------------------------
1532+
Nested Loop
1533+
-> Gather Merge
1534+
Workers Planned: 2
1535+
-> Sort
1536+
Sort Key: tenk1.unique1, (md5((tenk1.stringu1)::text)) COLLATE "C"
1537+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1538+
-> Function Scan on generate_series
1539+
(7 rows)
1540+
1541+
-- Parallel sort but with expression not available until the upper rel.
1542+
explain (costs off) select distinct sub.unique1, stringu1 || random()::text
1543+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
1544+
QUERY PLAN
1545+
---------------------------------------------------------------------------------------------
1546+
Unique
1547+
-> Sort
1548+
Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C"
1549+
-> Gather
1550+
Workers Planned: 2
1551+
-> Nested Loop
1552+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1553+
-> Function Scan on generate_series
1554+
(8 rows)
1555+
1556+
explain (costs off) select sub.unique1, stringu1 || random()::text
1557+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
1558+
order by 1, 2;
1559+
QUERY PLAN
1560+
---------------------------------------------------------------------------------------
1561+
Sort
1562+
Sort Key: tenk1.unique1, (((tenk1.stringu1)::text || (random())::text)) COLLATE "C"
1563+
-> Gather
1564+
Workers Planned: 2
1565+
-> Nested Loop
1566+
-> Parallel Index Scan using tenk1_unique1 on tenk1
1567+
-> Function Scan on generate_series
1568+
(7 rows)
1569+

src/test/regress/sql/incremental_sort.sql

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,3 +221,34 @@ set enable_hashagg to off;
221221
explain (costs off) select * from t union select * from t order by 1,3;
222222

223223
drop table t;
224+
225+
-- Sort pushdown can't go below where expressions are part of the rel target.
226+
-- In particular this is interesting for volatile expressions which have to
227+
-- go above joins since otherwise we'll incorrectly use expression evaluations
228+
-- across multiple rows.
229+
set enable_hashagg=off;
230+
set enable_seqscan=off;
231+
set enable_incremental_sort = off;
232+
set parallel_tuple_cost=0;
233+
set parallel_setup_cost=0;
234+
set min_parallel_table_scan_size = 0;
235+
set min_parallel_index_scan_size = 0;
236+
237+
-- Parallel sort below join.
238+
explain (costs off) select distinct sub.unique1, stringu1
239+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
240+
explain (costs off) select sub.unique1, stringu1
241+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
242+
order by 1, 2;
243+
-- Parallel sort but with expression that can be safely generated at the base rel.
244+
explain (costs off) select distinct sub.unique1, md5(stringu1)
245+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
246+
explain (costs off) select sub.unique1, md5(stringu1)
247+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
248+
order by 1, 2;
249+
-- Parallel sort but with expression not available until the upper rel.
250+
explain (costs off) select distinct sub.unique1, stringu1 || random()::text
251+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub;
252+
explain (costs off) select sub.unique1, stringu1 || random()::text
253+
from tenk1, lateral (select tenk1.unique1 from generate_series(1, 1000)) as sub
254+
order by 1, 2;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy