Skip to content

Commit 3570ea4

Browse files
committed
Fix improper repetition of previous results from a hashed aggregate.
ExecReScanAgg's check for whether it could re-use a previously calculated hashtable neglected the possibility that the Agg node might reference PARAM_EXEC Params that are not referenced by its input plan node. That's okay if the Params are in upper tlist or qual expressions; but if one appears in aggregate input expressions, then the hashtable contents need to be recomputed when the Param's value changes. To avoid unnecessary performance degradation in the case of a Param that isn't within an aggregate input, add logic to the planner to determine which Params are within aggregate inputs. This requires a new field in struct Agg, but fortunately we never write plans to disk, so this isn't an initdb-forcing change. Per report from Jeevan Chalke. This has been broken since forever, so back-patch to all supported branches. Andrew Gierth, with minor adjustments by me Report: <CAM2+6=VY8ykfLT5Q8vb9B6EbeBk-NGuLbT6seaQ+Fq4zXvrDcA@mail.gmail.com>
1 parent 9942376 commit 3570ea4

File tree

8 files changed

+102
-7
lines changed

8 files changed

+102
-7
lines changed

src/backend/executor/nodeAgg.c

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1905,13 +1905,14 @@ void
19051905
ExecReScanAgg(AggState *node)
19061906
{
19071907
ExprContext *econtext = node->ss.ps.ps_ExprContext;
1908+
Agg *aggnode = (Agg *) node->ss.ps.plan;
19081909
int aggno;
19091910

19101911
node->agg_done = false;
19111912

19121913
node->ss.ps.ps_TupFromTlist = false;
19131914

1914-
if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
1915+
if (aggnode->aggstrategy == AGG_HASHED)
19151916
{
19161917
/*
19171918
* In the hashed case, if we haven't yet built the hash table then we
@@ -1923,11 +1924,13 @@ ExecReScanAgg(AggState *node)
19231924
return;
19241925

19251926
/*
1926-
* If we do have the hash table and the subplan does not have any
1927-
* parameter changes, then we can just rescan the existing hash table;
1928-
* no need to build it again.
1927+
* If we do have the hash table, and the subplan does not have any
1928+
* parameter changes, and none of our own parameter changes affect
1929+
* input expressions of the aggregated functions, then we can just
1930+
* rescan the existing hash table; no need to build it again.
19291931
*/
1930-
if (node->ss.ps.lefttree->chgParam == NULL)
1932+
if (node->ss.ps.lefttree->chgParam == NULL &&
1933+
!bms_overlap(node->ss.ps.chgParam, aggnode->aggParams))
19311934
{
19321935
ResetTupleHashIterator(node->hashtable, &node->hashiter);
19331936
return;
@@ -1964,7 +1967,7 @@ ExecReScanAgg(AggState *node)
19641967
*/
19651968
MemoryContextResetAndDeleteChildren(node->aggcontext);
19661969

1967-
if (((Agg *) node->ss.ps.plan)->aggstrategy == AGG_HASHED)
1970+
if (aggnode->aggstrategy == AGG_HASHED)
19681971
{
19691972
/* Rebuild an empty hash table */
19701973
build_hash_table(node);

src/backend/nodes/copyfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -769,6 +769,7 @@ _copyAgg(Agg *from)
769769
COPY_POINTER_FIELD(grpOperators, from->numCols * sizeof(Oid));
770770
}
771771
COPY_SCALAR_FIELD(numGroups);
772+
COPY_BITMAPSET_FIELD(aggParams);
772773

773774
return newnode;
774775
}

src/backend/nodes/outfuncs.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,7 @@ _outAgg(StringInfo str, Agg *node)
642642
appendStringInfo(str, " %u", node->grpOperators[i]);
643643

644644
WRITE_LONG_FIELD(numGroups);
645+
WRITE_BITMAPSET_FIELD(aggParams);
645646
}
646647

647648
static void

src/backend/optimizer/plan/createplan.c

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3940,6 +3940,7 @@ make_agg(PlannerInfo *root, List *tlist, List *qual,
39403940
node->grpColIdx = grpColIdx;
39413941
node->grpOperators = grpOperators;
39423942
node->numGroups = numGroups;
3943+
node->aggParams = NULL; /* SS_finalize_plan() will fill this */
39433944

39443945
copy_plan_costsize(plan, lefttree); /* only care about copying size */
39453946
cost_agg(&agg_path, root,

src/backend/optimizer/plan/subselect.c

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ static Bitmapset *finalize_plan(PlannerInfo *root,
8181
Bitmapset *valid_params,
8282
Bitmapset *scan_params);
8383
static bool finalize_primnode(Node *node, finalize_primnode_context *context);
84+
static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context);
8485

8586

8687
/*
@@ -2351,6 +2352,29 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
23512352
locally_added_param);
23522353
break;
23532354

2355+
case T_Agg:
2356+
{
2357+
Agg *agg = (Agg *) plan;
2358+
2359+
/*
2360+
* AGG_HASHED plans need to know which Params are referenced
2361+
* in aggregate calls. Do a separate scan to identify them.
2362+
*/
2363+
if (agg->aggstrategy == AGG_HASHED)
2364+
{
2365+
finalize_primnode_context aggcontext;
2366+
2367+
aggcontext.root = root;
2368+
aggcontext.paramids = NULL;
2369+
finalize_agg_primnode((Node *) agg->plan.targetlist,
2370+
&aggcontext);
2371+
finalize_agg_primnode((Node *) agg->plan.qual,
2372+
&aggcontext);
2373+
agg->aggParams = aggcontext.paramids;
2374+
}
2375+
}
2376+
break;
2377+
23542378
case T_WindowAgg:
23552379
finalize_primnode(((WindowAgg *) plan)->startOffset,
23562380
&context);
@@ -2359,7 +2383,6 @@ finalize_plan(PlannerInfo *root, Plan *plan, Bitmapset *valid_params,
23592383
break;
23602384

23612385
case T_Hash:
2362-
case T_Agg:
23632386
case T_Material:
23642387
case T_Sort:
23652388
case T_Unique:
@@ -2503,6 +2526,28 @@ finalize_primnode(Node *node, finalize_primnode_context *context)
25032526
(void *) context);
25042527
}
25052528

2529+
/*
2530+
* finalize_agg_primnode: find all Aggref nodes in the given expression tree,
2531+
* and add IDs of all PARAM_EXEC params appearing within their aggregated
2532+
* arguments to the result set.
2533+
*/
2534+
static bool
2535+
finalize_agg_primnode(Node *node, finalize_primnode_context *context)
2536+
{
2537+
if (node == NULL)
2538+
return false;
2539+
if (IsA(node, Aggref))
2540+
{
2541+
Aggref *agg = (Aggref *) node;
2542+
2543+
/* we should not consider the direct arguments, if any */
2544+
finalize_primnode((Node *) agg->args, context);
2545+
return false; /* there can't be any Aggrefs below here */
2546+
}
2547+
return expression_tree_walker(node, finalize_agg_primnode,
2548+
(void *) context);
2549+
}
2550+
25062551
/*
25072552
* SS_make_initplan_from_plan - given a plan tree, make it an InitPlan
25082553
*

src/include/nodes/plannodes.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -604,6 +604,8 @@ typedef struct Agg
604604
AttrNumber *grpColIdx; /* their indexes in the target list */
605605
Oid *grpOperators; /* equality operators to compare with */
606606
long numGroups; /* estimated number of groups in input */
607+
Bitmapset *aggParams; /* IDs of Params used in Aggref inputs */
608+
/* Note: planner provides numGroups & aggParams only in AGG_HASHED case */
607609
} Agg;
608610

609611
/* ----------------

src/test/regress/expected/aggregates.out

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,38 @@ from tenk1 o;
305305
9999
306306
(1 row)
307307

308+
-- Test handling of Params within aggregate arguments in hashed aggregation.
309+
-- Per bug report from Jeevan Chalke.
310+
explain (verbose, costs off)
311+
select array(select sum(x+y) s
312+
from generate_series(1,3) y group by y order by s)
313+
from generate_series(1,3) x;
314+
QUERY PLAN
315+
-------------------------------------------------------------------
316+
Function Scan on pg_catalog.generate_series x
317+
Output: (SubPlan 1)
318+
Function Call: generate_series(1, 3)
319+
SubPlan 1
320+
-> Sort
321+
Output: (sum(($0 + y.y))), y.y
322+
Sort Key: (sum(($0 + y.y)))
323+
-> HashAggregate
324+
Output: sum((x.x + y.y)), y.y
325+
-> Function Scan on pg_catalog.generate_series y
326+
Output: y.y
327+
Function Call: generate_series(1, 3)
328+
(12 rows)
329+
330+
select array(select sum(x+y) s
331+
from generate_series(1,3) y group by y order by s)
332+
from generate_series(1,3) x;
333+
?column?
334+
----------
335+
{2,3,4}
336+
{3,4,5}
337+
{4,5,6}
338+
(3 rows)
339+
308340
--
309341
-- test for bitwise integer aggregates
310342
--

src/test/regress/sql/aggregates.sql

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,16 @@ select
8686
(select max((select i.unique2 from tenk1 i where i.unique1 = o.unique1)))
8787
from tenk1 o;
8888

89+
-- Test handling of Params within aggregate arguments in hashed aggregation.
90+
-- Per bug report from Jeevan Chalke.
91+
explain (verbose, costs off)
92+
select array(select sum(x+y) s
93+
from generate_series(1,3) y group by y order by s)
94+
from generate_series(1,3) x;
95+
select array(select sum(x+y) s
96+
from generate_series(1,3) y group by y order by s)
97+
from generate_series(1,3) x;
98+
8999
--
90100
-- test for bitwise integer aggregates
91101
--

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy