Skip to content

Commit 7351bfe

Browse files
committed
Fix costing for disk-based hash aggregation.
Report and suggestions from Richard Guo and Tomas Vondra. Discussion: https://postgr.es/m/CAMbWs4_W8fYbAn8KxgidAaZHON_Oo08OYn9ze=7remJymLqo5g@mail.gmail.com
1 parent 4083f44 commit 7351bfe

File tree

2 files changed

+16
-16
lines changed

2 files changed

+16
-16
lines changed

src/backend/executor/nodeAgg.c

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1728,6 +1728,8 @@ hash_agg_set_limits(double hashentrysize, uint64 input_groups, int used_bits,
17281728
/* if not expected to spill, use all of work_mem */
17291729
if (input_groups * hashentrysize < work_mem * 1024L)
17301730
{
1731+
if (num_partitions != NULL)
1732+
*num_partitions = 0;
17311733
*mem_limit = work_mem * 1024L;
17321734
*ngroups_limit = *mem_limit / hashentrysize;
17331735
return;

src/backend/optimizer/path/costsize.c

Lines changed: 14 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2257,14 +2257,15 @@ cost_agg(Path *path, PlannerInfo *root,
22572257
*/
22582258
if (aggstrategy == AGG_HASHED || aggstrategy == AGG_MIXED)
22592259
{
2260+
double pages;
22602261
double pages_written = 0.0;
22612262
double pages_read = 0.0;
22622263
double hashentrysize;
22632264
double nbatches;
22642265
Size mem_limit;
22652266
uint64 ngroups_limit;
22662267
int num_partitions;
2267-
2268+
int depth;
22682269

22692270
/*
22702271
* Estimate number of batches based on the computed limits. If less
@@ -2279,25 +2280,22 @@ cost_agg(Path *path, PlannerInfo *root,
22792280
nbatches = Max( (numGroups * hashentrysize) / mem_limit,
22802281
numGroups / ngroups_limit );
22812282

2283+
nbatches = Max(ceil(nbatches), 1.0);
2284+
num_partitions = Max(num_partitions, 2);
2285+
2286+
/*
2287+
* The number of partitions can change at different levels of
2288+
* recursion; but for the purposes of this calculation assume it stays
2289+
* constant.
2290+
*/
2291+
depth = ceil( log(nbatches) / log(num_partitions) );
2292+
22822293
/*
22832294
* Estimate number of pages read and written. For each level of
22842295
* recursion, a tuple must be written and then later read.
22852296
*/
2286-
if (nbatches > 1.0)
2287-
{
2288-
double depth;
2289-
double pages;
2290-
2291-
pages = relation_byte_size(input_tuples, input_width) / BLCKSZ;
2292-
2293-
/*
2294-
* The number of partitions can change at different levels of
2295-
* recursion; but for the purposes of this calculation assume it
2296-
* stays constant.
2297-
*/
2298-
depth = ceil( log(nbatches - 1) / log(num_partitions) );
2299-
pages_written = pages_read = pages * depth;
2300-
}
2297+
pages = relation_byte_size(input_tuples, input_width) / BLCKSZ;
2298+
pages_written = pages_read = pages * depth;
23012299

23022300
startup_cost += pages_written * random_page_cost;
23032301
total_cost += pages_written * random_page_cost;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy