Skip to content

Commit 8ed3f11

Browse files
committed
Perform one only projection to compute agg arguments.
Previously we did a ExecProject() for each individual aggregate argument. That turned out to be a performance bottleneck in queries with multiple aggregates. Doing all the argument computations in one ExecProject() is quite a bit cheaper because ExecProject's fastpath can do the work at once in a relatively tight loop, and because it can get all the required columns with a single slot_getsomeattr and save some other redundant setup costs. Author: Andres Freund Reviewed-By: Heikki Linnakangas Discussion: https://postgr.es/m/20161103110721.h5i5t5saxfk5eeik@alap3.anarazel.de
1 parent 6d46f47 commit 8ed3f11

File tree

2 files changed

+119
-52
lines changed

2 files changed

+119
-52
lines changed

src/backend/executor/nodeAgg.c

Lines changed: 115 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,7 @@
160160
#include "executor/executor.h"
161161
#include "executor/nodeAgg.h"
162162
#include "miscadmin.h"
163+
#include "nodes/makefuncs.h"
163164
#include "nodes/nodeFuncs.h"
164165
#include "optimizer/clauses.h"
165166
#include "optimizer/tlist.h"
@@ -213,6 +214,9 @@ typedef struct AggStatePerTransData
213214
*/
214215
int numInputs;
215216

217+
/* offset of input columns in AggState->evalslot */
218+
int inputoff;
219+
216220
/*
217221
* Number of aggregated input columns to pass to the transfn. This
218222
* includes the ORDER BY columns for ordered-set aggs, but not for plain
@@ -234,7 +238,6 @@ typedef struct AggStatePerTransData
234238

235239
/* ExprStates of the FILTER and argument expressions. */
236240
ExprState *aggfilter; /* state of FILTER expression, if any */
237-
List *args; /* states of aggregated-argument expressions */
238241
List *aggdirectargs; /* states of direct-argument expressions */
239242

240243
/*
@@ -291,19 +294,19 @@ typedef struct AggStatePerTransData
291294
transtypeByVal;
292295

293296
/*
294-
* Stuff for evaluation of inputs. We used to just use ExecEvalExpr, but
295-
* with the addition of ORDER BY we now need at least a slot for passing
296-
* data to the sort object, which requires a tupledesc, so we might as
297-
* well go whole hog and use ExecProject too.
297+
* Stuff for evaluation of aggregate inputs in cases where the aggregate
298+
* requires sorted input. The arguments themselves will be evaluated via
299+
* AggState->evalslot/evalproj for all aggregates at once, but we only
300+
* want to sort the relevant columns for individual aggregates.
298301
*/
299-
TupleDesc evaldesc; /* descriptor of input tuples */
300-
ProjectionInfo *evalproj; /* projection machinery */
302+
TupleDesc sortdesc; /* descriptor of input tuples */
301303

302304
/*
303305
* Slots for holding the evaluated input arguments. These are set up
304-
* during ExecInitAgg() and then used for each input row.
306+
* during ExecInitAgg() and then used for each input row requiring
307+
* procesessing besides what's done in AggState->evalproj.
305308
*/
306-
TupleTableSlot *evalslot; /* current input tuple */
309+
TupleTableSlot *sortslot; /* current input tuple */
307310
TupleTableSlot *uniqslot; /* used for multi-column DISTINCT */
308311

309312
/*
@@ -621,14 +624,14 @@ initialize_aggregate(AggState *aggstate, AggStatePerTrans pertrans,
621624
*/
622625
if (pertrans->numInputs == 1)
623626
pertrans->sortstates[aggstate->current_set] =
624-
tuplesort_begin_datum(pertrans->evaldesc->attrs[0]->atttypid,
627+
tuplesort_begin_datum(pertrans->sortdesc->attrs[0]->atttypid,
625628
pertrans->sortOperators[0],
626629
pertrans->sortCollations[0],
627630
pertrans->sortNullsFirst[0],
628631
work_mem, false);
629632
else
630633
pertrans->sortstates[aggstate->current_set] =
631-
tuplesort_begin_heap(pertrans->evaldesc,
634+
tuplesort_begin_heap(pertrans->sortdesc,
632635
pertrans->numSortCols,
633636
pertrans->sortColIdx,
634637
pertrans->sortOperators,
@@ -847,14 +850,19 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
847850
int setno = 0;
848851
int numGroupingSets = Max(aggstate->phase->numsets, 1);
849852
int numTrans = aggstate->numtrans;
853+
TupleTableSlot *slot = aggstate->evalslot;
854+
855+
/* compute input for all aggregates */
856+
if (aggstate->evalproj)
857+
aggstate->evalslot = ExecProject(aggstate->evalproj, NULL);
850858

851859
for (transno = 0; transno < numTrans; transno++)
852860
{
853861
AggStatePerTrans pertrans = &aggstate->pertrans[transno];
854862
ExprState *filter = pertrans->aggfilter;
855863
int numTransInputs = pertrans->numTransInputs;
856864
int i;
857-
TupleTableSlot *slot;
865+
int inputoff = pertrans->inputoff;
858866

859867
/* Skip anything FILTERed out */
860868
if (filter)
@@ -868,13 +876,10 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
868876
continue;
869877
}
870878

871-
/* Evaluate the current input expressions for this aggregate */
872-
slot = ExecProject(pertrans->evalproj, NULL);
873-
874879
if (pertrans->numSortCols > 0)
875880
{
876881
/* DISTINCT and/or ORDER BY case */
877-
Assert(slot->tts_nvalid == pertrans->numInputs);
882+
Assert(slot->tts_nvalid >= (pertrans->numInputs + inputoff));
878883

879884
/*
880885
* If the transfn is strict, we want to check for nullity before
@@ -887,7 +892,7 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
887892
{
888893
for (i = 0; i < numTransInputs; i++)
889894
{
890-
if (slot->tts_isnull[i])
895+
if (slot->tts_isnull[i + inputoff])
891896
break;
892897
}
893898
if (i < numTransInputs)
@@ -899,10 +904,25 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
899904
/* OK, put the tuple into the tuplesort object */
900905
if (pertrans->numInputs == 1)
901906
tuplesort_putdatum(pertrans->sortstates[setno],
902-
slot->tts_values[0],
903-
slot->tts_isnull[0]);
907+
slot->tts_values[inputoff],
908+
slot->tts_isnull[inputoff]);
904909
else
905-
tuplesort_puttupleslot(pertrans->sortstates[setno], slot);
910+
{
911+
/*
912+
* Copy slot contents, starting from inputoff, into sort
913+
* slot.
914+
*/
915+
ExecClearTuple(pertrans->sortslot);
916+
memcpy(pertrans->sortslot->tts_values,
917+
&slot->tts_values[inputoff],
918+
pertrans->numInputs * sizeof(Datum));
919+
memcpy(pertrans->sortslot->tts_isnull,
920+
&slot->tts_isnull[inputoff],
921+
pertrans->numInputs * sizeof(bool));
922+
pertrans->sortslot->tts_nvalid = pertrans->numInputs;
923+
ExecStoreVirtualTuple(pertrans->sortslot);
924+
tuplesort_puttupleslot(pertrans->sortstates[setno], pertrans->sortslot);
925+
}
906926
}
907927
}
908928
else
@@ -915,8 +935,8 @@ advance_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
915935
Assert(slot->tts_nvalid >= numTransInputs);
916936
for (i = 0; i < numTransInputs; i++)
917937
{
918-
fcinfo->arg[i + 1] = slot->tts_values[i];
919-
fcinfo->argnull[i + 1] = slot->tts_isnull[i];
938+
fcinfo->arg[i + 1] = slot->tts_values[i + inputoff];
939+
fcinfo->argnull[i + 1] = slot->tts_isnull[i + inputoff];
920940
}
921941

922942
for (setno = 0; setno < numGroupingSets; setno++)
@@ -943,20 +963,24 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
943963
{
944964
int transno;
945965
int numTrans = aggstate->numtrans;
966+
TupleTableSlot *slot = NULL;
946967

947968
/* combine not supported with grouping sets */
948969
Assert(aggstate->phase->numsets == 0);
949970

971+
/* compute input for all aggregates */
972+
if (aggstate->evalproj)
973+
slot = ExecProject(aggstate->evalproj, NULL);
974+
950975
for (transno = 0; transno < numTrans; transno++)
951976
{
952977
AggStatePerTrans pertrans = &aggstate->pertrans[transno];
953978
AggStatePerGroup pergroupstate = &pergroup[transno];
954-
TupleTableSlot *slot;
955979
FunctionCallInfo fcinfo = &pertrans->transfn_fcinfo;
980+
int inputoff = pertrans->inputoff;
956981

957-
/* Evaluate the current input expressions for this aggregate */
958-
slot = ExecProject(pertrans->evalproj, NULL);
959982
Assert(slot->tts_nvalid >= 1);
983+
Assert(slot->tts_nvalid + inputoff >= 1);
960984

961985
/*
962986
* deserialfn_oid will be set if we must deserialize the input state
@@ -965,18 +989,18 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
965989
if (OidIsValid(pertrans->deserialfn_oid))
966990
{
967991
/* Don't call a strict deserialization function with NULL input */
968-
if (pertrans->deserialfn.fn_strict && slot->tts_isnull[0])
992+
if (pertrans->deserialfn.fn_strict && slot->tts_isnull[inputoff])
969993
{
970-
fcinfo->arg[1] = slot->tts_values[0];
971-
fcinfo->argnull[1] = slot->tts_isnull[0];
994+
fcinfo->arg[1] = slot->tts_values[inputoff];
995+
fcinfo->argnull[1] = slot->tts_isnull[inputoff];
972996
}
973997
else
974998
{
975999
FunctionCallInfo dsinfo = &pertrans->deserialfn_fcinfo;
9761000
MemoryContext oldContext;
9771001

978-
dsinfo->arg[0] = slot->tts_values[0];
979-
dsinfo->argnull[0] = slot->tts_isnull[0];
1002+
dsinfo->arg[0] = slot->tts_values[inputoff];
1003+
dsinfo->argnull[0] = slot->tts_isnull[inputoff];
9801004
/* Dummy second argument for type-safety reasons */
9811005
dsinfo->arg[1] = PointerGetDatum(NULL);
9821006
dsinfo->argnull[1] = false;
@@ -995,8 +1019,8 @@ combine_aggregates(AggState *aggstate, AggStatePerGroup pergroup)
9951019
}
9961020
else
9971021
{
998-
fcinfo->arg[1] = slot->tts_values[0];
999-
fcinfo->argnull[1] = slot->tts_isnull[0];
1022+
fcinfo->arg[1] = slot->tts_values[inputoff];
1023+
fcinfo->argnull[1] = slot->tts_isnull[inputoff];
10001024
}
10011025

10021026
advance_combine_function(aggstate, pertrans, pergroupstate);
@@ -1233,7 +1257,7 @@ process_ordered_aggregate_multi(AggState *aggstate,
12331257
{
12341258
MemoryContext workcontext = aggstate->tmpcontext->ecxt_per_tuple_memory;
12351259
FunctionCallInfo fcinfo = &pertrans->transfn_fcinfo;
1236-
TupleTableSlot *slot1 = pertrans->evalslot;
1260+
TupleTableSlot *slot1 = pertrans->sortslot;
12371261
TupleTableSlot *slot2 = pertrans->uniqslot;
12381262
int numTransInputs = pertrans->numTransInputs;
12391263
int numDistinctCols = pertrans->numDistinctCols;
@@ -2343,10 +2367,12 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
23432367
transno,
23442368
aggno;
23452369
int phase;
2370+
List *combined_inputeval;
23462371
ListCell *l;
23472372
Bitmapset *all_grouped_cols = NULL;
23482373
int numGroupingSets = 1;
23492374
int numPhases;
2375+
int column_offset;
23502376
int i = 0;
23512377
int j = 0;
23522378

@@ -2928,6 +2954,53 @@ ExecInitAgg(Agg *node, EState *estate, int eflags)
29282954
aggstate->numaggs = aggno + 1;
29292955
aggstate->numtrans = transno + 1;
29302956

2957+
/*
2958+
* Build a single projection computing the aggregate arguments for all
2959+
* aggregates at once, that's considerably faster than doing it separately
2960+
* for each.
2961+
*
2962+
* First create a targetlist combining the targetlist of all the
2963+
* transitions.
2964+
*/
2965+
combined_inputeval = NIL;
2966+
column_offset = 0;
2967+
for (transno = 0; transno < aggstate->numtrans; transno++)
2968+
{
2969+
AggStatePerTrans pertrans = &pertransstates[transno];
2970+
ListCell *arg;
2971+
2972+
pertrans->inputoff = column_offset;
2973+
2974+
/*
2975+
* Adjust resno in a copied target entries, to point into the combined
2976+
* slot.
2977+
*/
2978+
foreach(arg, pertrans->aggref->args)
2979+
{
2980+
TargetEntry *source_tle = (TargetEntry *) lfirst(arg);
2981+
TargetEntry *tle;
2982+
2983+
Assert(IsA(source_tle, TargetEntry));
2984+
tle = flatCopyTargetEntry(source_tle);
2985+
tle->resno += column_offset;
2986+
2987+
combined_inputeval = lappend(combined_inputeval, tle);
2988+
}
2989+
2990+
column_offset += list_length(pertrans->aggref->args);
2991+
}
2992+
2993+
/* and then create a projection for that targetlist */
2994+
aggstate->evaldesc = ExecTypeFromTL(combined_inputeval, false);
2995+
aggstate->evalslot = ExecInitExtraTupleSlot(estate);
2996+
combined_inputeval = (List *) ExecInitExpr((Expr *) combined_inputeval,
2997+
(PlanState *) aggstate);
2998+
aggstate->evalproj = ExecBuildProjectionInfo(combined_inputeval,
2999+
aggstate->tmpcontext,
3000+
aggstate->evalslot,
3001+
NULL);
3002+
ExecSetSlotDescriptor(aggstate->evalslot, aggstate->evaldesc);
3003+
29313004
return aggstate;
29323005
}
29333006

@@ -3098,24 +3171,12 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
30983171

30993172
}
31003173

3101-
/*
3102-
* Get a tupledesc corresponding to the aggregated inputs (including sort
3103-
* expressions) of the agg.
3104-
*/
3105-
pertrans->evaldesc = ExecTypeFromTL(aggref->args, false);
3106-
3107-
/* Create slot we're going to do argument evaluation in */
3108-
pertrans->evalslot = ExecInitExtraTupleSlot(estate);
3109-
ExecSetSlotDescriptor(pertrans->evalslot, pertrans->evaldesc);
3110-
31113174
/* Initialize the input and FILTER expressions */
31123175
naggs = aggstate->numaggs;
31133176
pertrans->aggfilter = ExecInitExpr(aggref->aggfilter,
31143177
(PlanState *) aggstate);
31153178
pertrans->aggdirectargs = (List *) ExecInitExpr((Expr *) aggref->aggdirectargs,
31163179
(PlanState *) aggstate);
3117-
pertrans->args = (List *) ExecInitExpr((Expr *) aggref->args,
3118-
(PlanState *) aggstate);
31193180

31203181
/*
31213182
* Complain if the aggregate's arguments contain any aggregates; nested
@@ -3127,12 +3188,6 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
31273188
(errcode(ERRCODE_GROUPING_ERROR),
31283189
errmsg("aggregate function calls cannot be nested")));
31293190

3130-
/* Set up projection info for evaluation */
3131-
pertrans->evalproj = ExecBuildProjectionInfo(pertrans->args,
3132-
aggstate->tmpcontext,
3133-
pertrans->evalslot,
3134-
NULL);
3135-
31363191
/*
31373192
* If we're doing either DISTINCT or ORDER BY for a plain agg, then we
31383193
* have a list of SortGroupClause nodes; fish out the data in them and
@@ -3165,6 +3220,14 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
31653220

31663221
if (numSortCols > 0)
31673222
{
3223+
/*
3224+
* Get a tupledesc and slot corresponding to the aggregated inputs
3225+
* (including sort expressions) of the agg.
3226+
*/
3227+
pertrans->sortdesc = ExecTypeFromTL(aggref->args, false);
3228+
pertrans->sortslot = ExecInitExtraTupleSlot(estate);
3229+
ExecSetSlotDescriptor(pertrans->sortslot, pertrans->sortdesc);
3230+
31683231
/*
31693232
* We don't implement DISTINCT or ORDER BY aggs in the HASHED case
31703233
* (yet)
@@ -3183,7 +3246,7 @@ build_pertrans_for_aggref(AggStatePerTrans pertrans,
31833246
/* we will need an extra slot to store prior values */
31843247
pertrans->uniqslot = ExecInitExtraTupleSlot(estate);
31853248
ExecSetSlotDescriptor(pertrans->uniqslot,
3186-
pertrans->evaldesc);
3249+
pertrans->sortdesc);
31873250
}
31883251

31893252
/* Extract the sort information for use later */

src/include/nodes/execnodes.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1863,6 +1863,10 @@ typedef struct AggState
18631863
List *hash_needed; /* list of columns needed in hash table */
18641864
bool table_filled; /* hash table filled yet? */
18651865
TupleHashIterator hashiter; /* for iterating through hash table */
1866+
/* support for evaluation of agg inputs */
1867+
TupleTableSlot *evalslot; /* slot for agg inputs */
1868+
ProjectionInfo *evalproj; /* projection machinery */
1869+
TupleDesc evaldesc; /* descriptor of input tuples */
18661870
} AggState;
18671871

18681872
/* ----------------

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy