Skip to content

Commit de0dc1a

Browse files
committed
Fix cost_incremental_sort for expressions with varno 0
When estimating the number of pre-sorted groups in cost_incremental_sort we must not pass Vars with varno 0 to estimate_num_groups, which would cause failues in find_base_rel. This may happen when sorting output of set operations, thanks to generate_append_tlist. Unlike recurse_set_operations we can't easily access the original target list, so if we find any Vars with varno 0, we fall back to the default estimate DEFAULT_NUM_DISTINCT. Reported-by: Justin Pryzby Discussion: https://postgr.es/m/20200411214639.GK2228%40telsasoft.com
1 parent 92c12e4 commit de0dc1a

File tree

3 files changed

+75
-3
lines changed

3 files changed

+75
-3
lines changed

src/backend/optimizer/path/costsize.c

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1804,6 +1804,7 @@ cost_incremental_sort(Path *path,
18041804
List *presortedExprs = NIL;
18051805
ListCell *l;
18061806
int i = 0;
1807+
bool unknown_varno = false;
18071808

18081809
Assert(presorted_keys != 0);
18091810

@@ -1814,22 +1815,69 @@ cost_incremental_sort(Path *path,
18141815
if (input_tuples < 2.0)
18151816
input_tuples = 2.0;
18161817

1817-
/* Extract presorted keys as list of expressions */
1818+
/* Default estimate of number of groups, capped to one group per row. */
1819+
input_groups = Min(input_tuples, DEFAULT_NUM_DISTINCT);
1820+
1821+
/*
1822+
* Extract presorted keys as list of expressions.
1823+
*
1824+
* We need to be careful about Vars containing "varno 0" which might
1825+
* have been introduced by generate_append_tlist, which would confuse
1826+
* estimate_num_groups (in fact it'd fail for such expressions). See
1827+
* recurse_set_operations which has to deal with the same issue.
1828+
*
1829+
* Unlike recurse_set_operations we can't access the original target
1830+
* list here, and even if we could it's not very clear how useful would
1831+
* that be for a set operation combining multiple tables. So we simply
1832+
* detect if there are any expressions with "varno 0" and use the
1833+
* default DEFAULT_NUM_DISTINCT in that case.
1834+
*
1835+
* We might also use either 1.0 (a single group) or input_tuples (each
1836+
* row being a separate group), pretty much the worst and best case for
1837+
* incremental sort. But those are extreme cases and using something in
1838+
* between seems reasonable. Furthermore, generate_append_tlist is used
1839+
* for set operations, which are likely to produce mostly unique output
1840+
* anyway - from that standpoint the DEFAULT_NUM_DISTINCT is defensive
1841+
* while maintaining lower startup cost.
1842+
*/
18181843
foreach(l, pathkeys)
18191844
{
1845+
Node *expr;
1846+
Relids varnos;
1847+
18201848
PathKey *key = (PathKey *) lfirst(l);
18211849
EquivalenceMember *member = (EquivalenceMember *)
18221850
linitial(key->pk_eclass->ec_members);
18231851

1852+
/*
1853+
* Check if the expression contains Var with "varno 0" so that we
1854+
* don't call estimate_num_groups in that case.
1855+
*/
1856+
expr = (Node *) member->em_expr;
1857+
1858+
if (IsA(expr, RelabelType))
1859+
expr = (Node *) ((RelabelType *) expr)->arg;
1860+
1861+
varnos = pull_varnos(expr);
1862+
1863+
if (bms_is_member(0, varnos))
1864+
{
1865+
unknown_varno = true;
1866+
break;
1867+
}
1868+
1869+
/* expression not containing any Vars with "varno 0" */
18241870
presortedExprs = lappend(presortedExprs, member->em_expr);
18251871

18261872
i++;
18271873
if (i >= presorted_keys)
18281874
break;
18291875
}
18301876

1831-
/* Estimate number of groups with equal presorted keys */
1832-
input_groups = estimate_num_groups(root, presortedExprs, input_tuples, NULL);
1877+
/* Estimate number of groups with equal presorted keys. */
1878+
if (!unknown_varno)
1879+
input_groups = estimate_num_groups(root, presortedExprs, input_tuples, NULL);
1880+
18331881
group_tuples = input_tuples / input_groups;
18341882
group_input_run_cost = input_run_cost / input_groups;
18351883

src/test/regress/expected/incremental_sort.out

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1447,4 +1447,24 @@ explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1
14471447
-> Parallel Index Scan using t_a_idx on t
14481448
(12 rows)
14491449

1450+
-- Incremental sort vs. set operations with varno 0
1451+
set enable_hashagg to off;
1452+
explain (costs off) select * from t union select * from t order by 1,3;
1453+
QUERY PLAN
1454+
----------------------------------------------------------
1455+
Incremental Sort
1456+
Sort Key: t.a, t.c
1457+
Presorted Key: t.a
1458+
-> Unique
1459+
-> Sort
1460+
Sort Key: t.a, t.b, t.c
1461+
-> Append
1462+
-> Gather
1463+
Workers Planned: 2
1464+
-> Parallel Seq Scan on t
1465+
-> Gather
1466+
Workers Planned: 2
1467+
-> Parallel Seq Scan on t t_1
1468+
(13 rows)
1469+
14501470
drop table t;

src/test/regress/sql/incremental_sort.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,4 +216,8 @@ explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1
216216
set enable_incrementalsort = on;
217217
explain (costs off) select a,b,sum(c) from t group by 1,2 order by 1,2,3 limit 1;
218218

219+
-- Incremental sort vs. set operations with varno 0
220+
set enable_hashagg to off;
221+
explain (costs off) select * from t union select * from t order by 1,3;
222+
219223
drop table t;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy