Skip to content

Commit 2c00fad

Browse files
committed
Fix improper repetition of previous results from a hashed aggregate.
ExecReScanAgg's check for whether it could re-use a previously calculated hashtable neglected the possibility that the Agg node might reference PARAM_EXEC Params that are not referenced by its input plan node. That's okay if the Params are in upper tlist or qual expressions; but if one appears in aggregate input expressions, then the hashtable contents need to be recomputed when the Param's value changes. To avoid unnecessary performance degradation in the case of a Param that isn't within an aggregate input, add logic to the planner to determine which Params are within aggregate inputs. This requires a new field in struct Agg, but fortunately we never write plans to disk, so this isn't an initdb-forcing change. Per report from Jeevan Chalke. This has been broken since forever, so back-patch to all supported branches. Andrew Gierth, with minor adjustments by me Report: <CAM2+6=VY8ykfLT5Q8vb9B6EbeBk-NGuLbT6seaQ+Fq4zXvrDcA@mail.gmail.com>
1 parent 5cd3864 commit 2c00fad

File tree

9 files changed

+156
-6
lines changed

9 files changed

+156
-6
lines changed

src/backend/executor/nodeAgg.c

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3425,11 +3425,13 @@ ExecReScanAgg(AggState *node)
34253425
return;
34263426

34273427
/*
3428-
* If we do have the hash table and the subplan does not have any
3429-
* parameter changes, then we can just rescan the existing hash table;
3430-
* no need to build it again.
3428+
* If we do have the hash table, and the subplan does not have any
3429+
* parameter changes, and none of our own parameter changes affect
3430+
* input expressions of the aggregated functions, then we can just
3431+
* rescan the existing hash table; no need to build it again.
34313432
*/
3432-
if (outerPlan->chgParam == NULL)
3433+
if (outerPlan->chgParam == NULL &&
3434+
!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
34333435
{
34343436
ResetTupleHashIterator(node->hashtable, &node->hashiter);
34353437
return;

src/backend/nodes/copyfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -877,6 +877,7 @@ _copyAgg(const Agg *from)
877877
COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
878878
}
879879
COPY_SCALAR_FIELD(numGroups);
880+
COPY_BITMAPSET_FIELD(aggParams);
880881
COPY_NODE_FIELD(groupingSets);
881882
COPY_NODE_FIELD(chain);
882883

src/backend/nodes/outfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -716,6 +716,7 @@ _outAgg(StringInfo str, const Agg *node)
716716
appendStringInfo(str, " %u", node->grpOperators[i]);
717717

718718
WRITE_LONG_FIELD(numGroups);
719+
WRITE_BITMAPSET_FIELD(aggParams);
719720
WRITE_NODE_FIELD(groupingSets);
720721
WRITE_NODE_FIELD(chain);
721722
}

src/backend/nodes/readfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2007,6 +2007,7 @@ _readAgg(void)
20072007
READ_ATTRNUMBER_ARRAY(grpColIdx, local_node->numCols);
20082008
READ_OID_ARRAY(grpOperators, local_node->numCols);
20092009
READ_LONG_FIELD(numGroups);
2010+
READ_BITMAPSET_FIELD(aggParams);
20102011
READ_NODE_FIELD(groupingSets);
20112012
READ_NODE_FIELD(chain);
20122013

src/backend/optimizer/plan/createplan.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5664,6 +5664,7 @@ make_agg(List *tlist, List *qual,
56645664
node->grpColIdx = grpColIdx;
56655665
node->grpOperators = grpOperators;
56665666
node->numGroups = numGroups;
5667+
node->aggParams = NULL; /* SS_finalize_plan() will fill this */
56675668
node->groupingSets = groupingSets;
56685669
node->chain = chain;
56695670

src/backend/optimizer/plan/subselect.c

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
8282
Bitmapset *valid_params,
8383
Bitmapset *scan_params);
8484
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
85+
static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
8586

8687

8788
/*
@@ -2652,6 +2653,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
26522653
locally_added_param);
26532654
break;
26542655

2656+
case T_Agg:
2657+
{
2658+
Agg *agg = (Agg *) plan;
2659+
2660+
/*
2661+
* AGG_HASHED plans need to know which Params are referenced
2662+
* in aggregate calls. Do a separate scan to identify them.
2663+
*/
2664+
if (agg->aggstrategy == AGG_HASHED)
2665+
{
2666+
finalize_primnode_context aggcontext;
2667+
2668+
aggcontext.root = root;
2669+
aggcontext.paramids = NULL;
2670+
finalize_agg_primnode((Node *) agg->plan.targetlist,
2671+
&aggcontext);
2672+
finalize_agg_primnode((Node *) agg->plan.qual,
2673+
&aggcontext);
2674+
agg->aggParams = aggcontext.paramids;
2675+
}
2676+
}
2677+
break;
2678+
26552679
case T_WindowAgg:
26562680
finalize_primnode(((WindowAgg *) plan)->startOffset,
26572681
&context);
@@ -2660,7 +2684,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
26602684
break;
26612685

26622686
case T_Hash:
2663-
case T_Agg:
26642687
case T_Material:
26652688
case T_Sort:
26662689
case T_Unique:
@@ -2811,6 +2834,29 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
28112834
(void *) context);
28122835
}
28132836

2837+
/*
2838+
* finalize_agg_primnode: find all Aggref nodes in the given expression tree,
2839+
* and add IDs of all PARAM_EXEC params appearing within their aggregated
2840+
* arguments to the result set.
2841+
*/
2842+
static bool
2843+
finalize_agg_primnode(Node *node, finalize_primnode_context *context)
2844+
{
2845+
if (node == NULL)
2846+
return false;
2847+
if (IsA(node, Aggref))
2848+
{
2849+
Aggref *agg = (Aggref *) node;
2850+
2851+
/* we should not consider the direct arguments, if any */
2852+
finalize_primnode((Node *) agg->args, context);
2853+
finalize_primnode((Node *) agg->aggfilter, context);
2854+
return false; /* there can't be any Aggrefs below here */
2855+
}
2856+
return expression_tree_walker(node, finalize_agg_primnode,
2857+
(void *) context);
2858+
}
2859+
28142860
/*
28152861
* SS_make_initplan_output_param - make a Param for an initPlan's output
28162862
*

src/include/nodes/plannodes.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -715,7 +715,8 @@ typedef struct Agg
715715
AttrNumber *grpColIdx; /* their indexes in the target list */
716716
Oid *grpOperators; /* equality operators to compare with */
717717
long numGroups; /* estimated number of groups in input */
718-
/* Note: the planner only provides numGroups in AGG_HASHED case */
718+
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
719+
/* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
719720
List *groupingSets; /* grouping sets to use */
720721
List *chain; /* chained Agg/Sort nodes */
721722
} Agg;

src/test/regress/expected/aggregates.out

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,81 @@ from tenk1 o;
366366
9999
367367
(1 row)
368368

369+
-- Test handling of Params within aggregate arguments in hashed aggregation.
370+
-- Per bug report from Jeevan Chalke.
371+
explain (verbose, costs off)
372+
select s1, s2, sm
373+
from generate_series(1, 3) s1,
374+
lateral (select s2, sum(s1 + s2) sm
375+
from generate_series(1, 3) s2 group by s2) ss
376+
order by 1, 2;
377+
QUERY PLAN
378+
------------------------------------------------------------------
379+
Sort
380+
Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
381+
Sort Key: s1.s1, s2.s2
382+
-> Nested Loop
383+
Output: s1.s1, s2.s2, (sum((s1.s1 + s2.s2)))
384+
-> Function Scan on pg_catalog.generate_series s1
385+
Output: s1.s1
386+
Function Call: generate_series(1, 3)
387+
-> HashAggregate
388+
Output: s2.s2, sum((s1.s1 + s2.s2))
389+
Group Key: s2.s2
390+
-> Function Scan on pg_catalog.generate_series s2
391+
Output: s2.s2
392+
Function Call: generate_series(1, 3)
393+
(14 rows)
394+
395+
select s1, s2, sm
396+
from generate_series(1, 3) s1,
397+
lateral (select s2, sum(s1 + s2) sm
398+
from generate_series(1, 3) s2 group by s2) ss
399+
order by 1, 2;
400+
s1 | s2 | sm
401+
----+----+----
402+
1 | 1 | 2
403+
1 | 2 | 3
404+
1 | 3 | 4
405+
2 | 1 | 3
406+
2 | 2 | 4
407+
2 | 3 | 5
408+
3 | 1 | 4
409+
3 | 2 | 5
410+
3 | 3 | 6
411+
(9 rows)
412+
413+
explain (verbose, costs off)
414+
select array(select sum(x+y) s
415+
from generate_series(1,3) y group by y order by s)
416+
from generate_series(1,3) x;
417+
QUERY PLAN
418+
-------------------------------------------------------------------
419+
Function Scan on pg_catalog.generate_series x
420+
Output: (SubPlan 1)
421+
Function Call: generate_series(1, 3)
422+
SubPlan 1
423+
-> Sort
424+
Output: (sum((x.x + y.y))), y.y
425+
Sort Key: (sum((x.x + y.y)))
426+
-> HashAggregate
427+
Output: sum((x.x + y.y)), y.y
428+
Group Key: y.y
429+
-> Function Scan on pg_catalog.generate_series y
430+
Output: y.y
431+
Function Call: generate_series(1, 3)
432+
(13 rows)
433+
434+
select array(select sum(x+y) s
435+
from generate_series(1,3) y group by y order by s)
436+
from generate_series(1,3) x;
437+
array
438+
---------
439+
{2,3,4}
440+
{3,4,5}
441+
{4,5,6}
442+
(3 rows)
443+
369444
--
370445
-- test for bitwise integer aggregates
371446
--

src/test/regress/sql/aggregates.sql

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,28 @@ select
9898
(select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)))
9999
from tenk1 o;
100100

101+
-- Test handling of Params within aggregate arguments in hashed aggregation.
102+
-- Per bug report from Jeevan Chalke.
103+
explain (verbose, costs off)
104+
select s1, s2, sm
105+
from generate_series(1, 3) s1,
106+
lateral (select s2, sum(s1 + s2) sm
107+
from generate_series(1, 3) s2 group by s2) ss
108+
order by 1, 2;
109+
select s1, s2, sm
110+
from generate_series(1, 3) s1,
111+
lateral (select s2, sum(s1 + s2) sm
112+
from generate_series(1, 3) s2 group by s2) ss
113+
order by 1, 2;
114+
115+
explain (verbose, costs off)
116+
select array(select sum(x+y) s
117+
from generate_series(1,3) y group by y order by s)
118+
from generate_series(1,3) x;
119+
select array(select sum(x+y) s
120+
from generate_series(1,3) y group by y order by s)
121+
from generate_series(1,3) x;
122+
101123
--
102124
-- test for bitwise integer aggregates
103125
--

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy