Skip to content

Commit deb71fa

Browse files
committed
Fix costing for parallel aggregation.
The original patch kind of ignored the fact that we were doing something different from a costing point of view, but nobody noticed. This patch fixes that oversight. David Rowley
1 parent 46d73e0 commit deb71fa

File tree

3 files changed

+92
-25
lines changed

3 files changed

+92
-25
lines changed

src/backend/optimizer/plan/planner.c

Lines changed: 31 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3262,6 +3262,8 @@ create_grouping_paths(PlannerInfo *root,
32623262
RelOptInfo *grouped_rel;
32633263
PathTarget *partial_grouping_target = NULL;
32643264
AggClauseCosts agg_costs;
3265+
AggClauseCosts agg_partial_costs; /* parallel only */
3266+
AggClauseCosts agg_final_costs; /* parallel only */
32653267
Size hashaggtablesize;
32663268
double dNumGroups;
32673269
double dNumPartialGroups = 0;
@@ -3346,8 +3348,10 @@ create_grouping_paths(PlannerInfo *root,
33463348
MemSet(&agg_costs, 0, sizeof(AggClauseCosts));
33473349
if (parse->hasAggs)
33483350
{
3349-
count_agg_clauses(root, (Node *) target->exprs, &agg_costs);
3350-
count_agg_clauses(root, parse->havingQual, &agg_costs);
3351+
count_agg_clauses(root, (Node *) target->exprs, &agg_costs, true,
3352+
false, false);
3353+
count_agg_clauses(root, parse->havingQual, &agg_costs, true, false,
3354+
false);
33513355
}
33523356

33533357
/*
@@ -3422,6 +3426,25 @@ create_grouping_paths(PlannerInfo *root,
34223426
NIL,
34233427
NIL);
34243428

3429+
/*
3430+
* Collect statistics about aggregates for estimating costs of
3431+
* performing aggregation in parallel.
3432+
*/
3433+
MemSet(&agg_partial_costs, 0, sizeof(AggClauseCosts));
3434+
MemSet(&agg_final_costs, 0, sizeof(AggClauseCosts));
3435+
if (parse->hasAggs)
3436+
{
3437+
/* partial phase */
3438+
count_agg_clauses(root, (Node *) partial_grouping_target->exprs,
3439+
&agg_partial_costs, false, false, true);
3440+
3441+
/* final phase */
3442+
count_agg_clauses(root, (Node *) target->exprs, &agg_final_costs,
3443+
true, true, true);
3444+
count_agg_clauses(root, parse->havingQual, &agg_final_costs, true,
3445+
true, true);
3446+
}
3447+
34253448
if (can_sort)
34263449
{
34273450
/* Checked in set_grouped_rel_consider_parallel() */
@@ -3457,7 +3480,7 @@ create_grouping_paths(PlannerInfo *root,
34573480
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
34583481
parse->groupClause,
34593482
NIL,
3460-
&agg_costs,
3483+
&agg_partial_costs,
34613484
dNumPartialGroups,
34623485
false,
34633486
false,
@@ -3482,7 +3505,7 @@ create_grouping_paths(PlannerInfo *root,
34823505

34833506
hashaggtablesize =
34843507
estimate_hashagg_tablesize(cheapest_partial_path,
3485-
&agg_costs,
3508+
&agg_partial_costs,
34863509
dNumPartialGroups);
34873510

34883511
/*
@@ -3499,7 +3522,7 @@ create_grouping_paths(PlannerInfo *root,
34993522
AGG_HASHED,
35003523
parse->groupClause,
35013524
NIL,
3502-
&agg_costs,
3525+
&agg_partial_costs,
35033526
dNumPartialGroups,
35043527
false,
35053528
false,
@@ -3631,7 +3654,7 @@ create_grouping_paths(PlannerInfo *root,
36313654
parse->groupClause ? AGG_SORTED : AGG_PLAIN,
36323655
parse->groupClause,
36333656
(List *) parse->havingQual,
3634-
&agg_costs,
3657+
&agg_final_costs,
36353658
dNumGroups,
36363659
true,
36373660
true,
@@ -3691,7 +3714,7 @@ create_grouping_paths(PlannerInfo *root,
36913714
Path *path = (Path *) linitial(grouped_rel->partial_pathlist);
36923715

36933716
hashaggtablesize = estimate_hashagg_tablesize(path,
3694-
&agg_costs,
3717+
&agg_final_costs,
36953718
dNumGroups);
36963719

36973720
if (hashaggtablesize < work_mem * 1024L)
@@ -3713,7 +3736,7 @@ create_grouping_paths(PlannerInfo *root,
37133736
AGG_HASHED,
37143737
parse->groupClause,
37153738
(List *) parse->havingQual,
3716-
&agg_costs,
3739+
&agg_final_costs,
37173740
dNumGroups,
37183741
true,
37193742
true,

src/backend/optimizer/util/clauses.c

Lines changed: 59 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ typedef struct
6161
{
6262
PlannerInfo *root;
6363
AggClauseCosts *costs;
64+
bool finalizeAggs;
65+
bool combineStates;
66+
bool serialStates;
6467
} count_agg_clauses_context;
6568

6669
typedef struct
@@ -540,12 +543,16 @@ contain_agg_clause_walker(Node *node, void *context)
540543
* are no subqueries. There mustn't be outer-aggregate references either.
541544
*/
542545
void
543-
count_agg_clauses(PlannerInfo *root, Node *clause, AggClauseCosts *costs)
546+
count_agg_clauses(PlannerInfo *root, Node *clause, AggClauseCosts *costs,
547+
bool finalizeAggs, bool combineStates, bool serialStates)
544548
{
545549
count_agg_clauses_context context;
546550

547551
context.root = root;
548552
context.costs = costs;
553+
context.finalizeAggs = finalizeAggs;
554+
context.combineStates = combineStates;
555+
context.serialStates = serialStates;
549556
(void) count_agg_clauses_walker(clause, &context);
550557
}
551558

@@ -562,6 +569,9 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
562569
Form_pg_aggregate aggform;
563570
Oid aggtransfn;
564571
Oid aggfinalfn;
572+
Oid aggcombinefn;
573+
Oid aggserialfn;
574+
Oid aggdeserialfn;
565575
Oid aggtranstype;
566576
int32 aggtransspace;
567577
QualCost argcosts;
@@ -583,6 +593,9 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
583593
aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
584594
aggtransfn = aggform->aggtransfn;
585595
aggfinalfn = aggform->aggfinalfn;
596+
aggcombinefn = aggform->aggcombinefn;
597+
aggserialfn = aggform->aggserialfn;
598+
aggdeserialfn = aggform->aggdeserialfn;
586599
aggtranstype = aggform->aggtranstype;
587600
aggtransspace = aggform->aggtransspace;
588601
ReleaseSysCache(aggTuple);
@@ -592,28 +605,58 @@ count_agg_clauses_walker(Node *node, count_agg_clauses_context *context)
592605
if (aggref->aggorder != NIL || aggref->aggdistinct != NIL)
593606
costs->numOrderedAggs++;
594607

595-
/* add component function execution costs to appropriate totals */
596-
costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost;
597-
if (OidIsValid(aggfinalfn))
598-
costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost;
608+
/*
609+
* Add the appropriate component function execution costs to
610+
* appropriate totals.
611+
*/
612+
if (context->combineStates)
613+
{
614+
/* charge for combining previously aggregated states */
615+
costs->transCost.per_tuple += get_func_cost(aggcombinefn) * cpu_operator_cost;
599616

600-
/* also add the input expressions' cost to per-input-row costs */
601-
cost_qual_eval_node(&argcosts, (Node *) aggref->args, context->root);
602-
costs->transCost.startup += argcosts.startup;
603-
costs->transCost.per_tuple += argcosts.per_tuple;
617+
/* charge for deserialization, when appropriate */
618+
if (context->serialStates && OidIsValid(aggdeserialfn))
619+
costs->transCost.per_tuple += get_func_cost(aggdeserialfn) * cpu_operator_cost;
620+
}
621+
else
622+
costs->transCost.per_tuple += get_func_cost(aggtransfn) * cpu_operator_cost;
623+
624+
if (context->finalizeAggs)
625+
{
626+
if (OidIsValid(aggfinalfn))
627+
costs->finalCost += get_func_cost(aggfinalfn) * cpu_operator_cost;
628+
}
629+
else if (context->serialStates)
630+
{
631+
if (OidIsValid(aggserialfn))
632+
costs->finalCost += get_func_cost(aggserialfn) * cpu_operator_cost;
633+
}
604634

605635
/*
606-
* Add any filter's cost to per-input-row costs.
607-
*
608-
* XXX Ideally we should reduce input expression costs according to
609-
* filter selectivity, but it's not clear it's worth the trouble.
636+
* Some costs will already have been incurred by the initial aggregate
637+
* node, so we mustn't include these again.
610638
*/
611-
if (aggref->aggfilter)
639+
if (!context->combineStates)
612640
{
613-
cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter,
614-
context->root);
641+
/* add the input expressions' cost to per-input-row costs */
642+
cost_qual_eval_node(&argcosts, (Node *) aggref->args, context->root);
615643
costs->transCost.startup += argcosts.startup;
616644
costs->transCost.per_tuple += argcosts.per_tuple;
645+
646+
/*
647+
* Add any filter's cost to per-input-row costs.
648+
*
649+
* XXX Ideally we should reduce input expression costs according
650+
* to filter selectivity, but it's not clear it's worth the
651+
* trouble.
652+
*/
653+
if (aggref->aggfilter)
654+
{
655+
cost_qual_eval_node(&argcosts, (Node *) aggref->aggfilter,
656+
context->root);
657+
costs->transCost.startup += argcosts.startup;
658+
costs->transCost.per_tuple += argcosts.per_tuple;
659+
}
617660
}
618661

619662
/*

src/include/optimizer/clauses.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,8 @@ extern List *make_ands_implicit(Expr *clause);
6767
extern PartialAggType aggregates_allow_partial(Node *clause);
6868
extern bool contain_agg_clause(Node *clause);
6969
extern void count_agg_clauses(PlannerInfo *root, Node *clause,
70-
AggClauseCosts *costs);
70+
AggClauseCosts *costs, bool finalizeAggs,
71+
bool combineStates, bool serialStates);
7172

7273
extern bool contain_window_function(Node *clause);
7374
extern WindowFuncLists *find_window_functions(Node *clause, Index maxWinRef);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy