Skip to content

Commit 6985592

Browse files
committed
Split out into a separate function the code in grouping_planner() that
decides whether to use hashed grouping instead of sort-plus-uniq grouping. The function needs an annoyingly large number of parameters, but this still seems like a win for legibility, since it removes over a hundred lines from grouping_planner (which is still too big :-().
1 parent 313de22 commit 6985592

File tree

1 file changed

+156
-140
lines changed

1 file changed

+156
-140
lines changed

src/backend/optimizer/plan/planner.c

Lines changed: 156 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.182 2005/04/06 16:34:05 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/plan/planner.c,v 1.183 2005/04/10 19:50:08 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -58,6 +58,10 @@ static Node *preprocess_expression(Query *parse, Node *expr, int kind);
5858
static void preprocess_qual_conditions(Query *parse, Node *jtnode);
5959
static Plan *inheritance_planner(Query *parse, List *inheritlist);
6060
static Plan *grouping_planner(Query *parse, double tuple_fraction);
61+
static bool choose_hashed_grouping(Query *parse, double tuple_fraction,
62+
Path *cheapest_path, Path *sorted_path,
63+
List *sort_pathkeys, List *group_pathkeys,
64+
double dNumGroups, AggClauseCounts *agg_counts);
6165
static bool hash_safe_grouping(Query *parse);
6266
static List *make_subplanTargetList(Query *parse, List *tlist,
6367
AttrNumber **groupColIdx, bool *need_tlist_eval);
@@ -920,34 +924,25 @@ grouping_planner(Query *parse, double tuple_fraction)
920924
sort_pathkeys = canonicalize_pathkeys(parse, sort_pathkeys);
921925

922926
/*
923-
* Consider whether we might want to use hashed grouping.
927+
* If grouping, estimate the number of groups. (We can't do this
928+
* until after running query_planner(), either.) Then decide
929+
* whether we want to use hashed grouping.
924930
*/
925931
if (parse->groupClause)
926932
{
927933
List *groupExprs;
928934
double cheapest_path_rows;
929-
int cheapest_path_width;
930935

931936
/*
932-
* Beware in this section of the possibility that
933-
* cheapest_path->parent is NULL. This could happen if user
934-
* does something silly like SELECT 'foo' GROUP BY 1;
937+
* Beware of the possibility that cheapest_path->parent is NULL.
938+
* This could happen if user does something silly like
939+
* SELECT 'foo' GROUP BY 1;
935940
*/
936941
if (cheapest_path->parent)
937-
{
938942
cheapest_path_rows = cheapest_path->parent->rows;
939-
cheapest_path_width = cheapest_path->parent->width;
940-
}
941943
else
942-
{
943944
cheapest_path_rows = 1; /* assume non-set result */
944-
cheapest_path_width = 100; /* arbitrary */
945-
}
946945

947-
/*
948-
* Always estimate the number of groups. We can't do this
949-
* until after running query_planner(), either.
950-
*/
951946
groupExprs = get_sortgrouplist_exprs(parse->groupClause,
952947
parse->targetList);
953948
dNumGroups = estimate_num_groups(parse,
@@ -956,130 +951,11 @@ grouping_planner(Query *parse, double tuple_fraction)
956951
/* Also want it as a long int --- but 'ware overflow! */
957952
numGroups = (long) Min(dNumGroups, (double) LONG_MAX);
958953

959-
/*
960-
* Check can't-do-it conditions, including whether the
961-
* grouping operators are hashjoinable.
962-
*
963-
* Executor doesn't support hashed aggregation with DISTINCT
964-
* aggregates. (Doing so would imply storing *all* the input
965-
* values in the hash table, which seems like a certain
966-
* loser.)
967-
*/
968-
if (!enable_hashagg || !hash_safe_grouping(parse))
969-
use_hashed_grouping = false;
970-
else if (agg_counts.numDistinctAggs != 0)
971-
use_hashed_grouping = false;
972-
else
973-
{
974-
/*
975-
* Use hashed grouping if (a) we think we can fit the
976-
* hashtable into work_mem, *and* (b) the estimated cost
977-
* is no more than doing it the other way. While avoiding
978-
* the need for sorted input is usually a win, the fact
979-
* that the output won't be sorted may be a loss; so we
980-
* need to do an actual cost comparison.
981-
*/
982-
Size hashentrysize;
983-
984-
/* Estimate per-hash-entry space at tuple width... */
985-
hashentrysize = cheapest_path_width;
986-
/* plus space for pass-by-ref transition values... */
987-
hashentrysize += agg_counts.transitionSpace;
988-
/* plus the per-hash-entry overhead */
989-
hashentrysize += hash_agg_entry_size(agg_counts.numAggs);
990-
991-
if (hashentrysize * dNumGroups <= work_mem * 1024L)
992-
{
993-
/*
994-
* Okay, do the cost comparison. We need to consider
995-
* cheapest_path + hashagg [+ final sort] versus
996-
* either cheapest_path [+ sort] + group or agg [+
997-
* final sort] or presorted_path + group or agg [+
998-
* final sort] where brackets indicate a step that may
999-
* not be needed. We assume query_planner() will have
1000-
* returned a presorted path only if it's a winner
1001-
* compared to cheapest_path for this purpose.
1002-
*
1003-
* These path variables are dummies that just hold cost
1004-
* fields; we don't make actual Paths for these steps.
1005-
*/
1006-
Path hashed_p;
1007-
Path sorted_p;
1008-
1009-
cost_agg(&hashed_p, parse,
1010-
AGG_HASHED, agg_counts.numAggs,
1011-
numGroupCols, dNumGroups,
1012-
cheapest_path->startup_cost,
1013-
cheapest_path->total_cost,
1014-
cheapest_path_rows);
1015-
/* Result of hashed agg is always unsorted */
1016-
if (sort_pathkeys)
1017-
cost_sort(&hashed_p, parse, sort_pathkeys,
1018-
hashed_p.total_cost,
1019-
dNumGroups,
1020-
cheapest_path_width);
1021-
1022-
if (sorted_path)
1023-
{
1024-
sorted_p.startup_cost = sorted_path->startup_cost;
1025-
sorted_p.total_cost = sorted_path->total_cost;
1026-
current_pathkeys = sorted_path->pathkeys;
1027-
}
1028-
else
1029-
{
1030-
sorted_p.startup_cost = cheapest_path->startup_cost;
1031-
sorted_p.total_cost = cheapest_path->total_cost;
1032-
current_pathkeys = cheapest_path->pathkeys;
1033-
}
1034-
if (!pathkeys_contained_in(group_pathkeys,
1035-
current_pathkeys))
1036-
{
1037-
cost_sort(&sorted_p, parse, group_pathkeys,
1038-
sorted_p.total_cost,
1039-
cheapest_path_rows,
1040-
cheapest_path_width);
1041-
current_pathkeys = group_pathkeys;
1042-
}
1043-
if (parse->hasAggs)
1044-
cost_agg(&sorted_p, parse,
1045-
AGG_SORTED, agg_counts.numAggs,
1046-
numGroupCols, dNumGroups,
1047-
sorted_p.startup_cost,
1048-
sorted_p.total_cost,
1049-
cheapest_path_rows);
1050-
else
1051-
cost_group(&sorted_p, parse,
1052-
numGroupCols, dNumGroups,
1053-
sorted_p.startup_cost,
1054-
sorted_p.total_cost,
1055-
cheapest_path_rows);
1056-
/* The Agg or Group node will preserve ordering */
1057-
if (sort_pathkeys &&
1058-
!pathkeys_contained_in(sort_pathkeys,
1059-
current_pathkeys))
1060-
{
1061-
cost_sort(&sorted_p, parse, sort_pathkeys,
1062-
sorted_p.total_cost,
1063-
dNumGroups,
1064-
cheapest_path_width);
1065-
}
1066-
1067-
/*
1068-
* Now make the decision using the top-level tuple
1069-
* fraction. First we have to convert an absolute
1070-
* count (LIMIT) into fractional form.
1071-
*/
1072-
if (tuple_fraction >= 1.0)
1073-
tuple_fraction /= dNumGroups;
1074-
1075-
if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1076-
tuple_fraction) < 0)
1077-
{
1078-
/* Hashed is cheaper, so use it */
1079-
use_hashed_grouping = true;
1080-
}
1081-
}
1082-
}
954+
use_hashed_grouping =
955+
choose_hashed_grouping(parse, tuple_fraction,
956+
cheapest_path, sorted_path,
957+
sort_pathkeys, group_pathkeys,
958+
dNumGroups, &agg_counts);
1083959
}
1084960

1085961
/*
@@ -1331,6 +1207,146 @@ grouping_planner(Query *parse, double tuple_fraction)
13311207
return result_plan;
13321208
}
13331209

1210+
/*
1211+
* choose_hashed_grouping - should we use hashed grouping?
1212+
*/
1213+
static bool
1214+
choose_hashed_grouping(Query *parse, double tuple_fraction,
1215+
Path *cheapest_path, Path *sorted_path,
1216+
List *sort_pathkeys, List *group_pathkeys,
1217+
double dNumGroups, AggClauseCounts *agg_counts)
1218+
{
1219+
int numGroupCols = list_length(parse->groupClause);
1220+
double cheapest_path_rows;
1221+
int cheapest_path_width;
1222+
Size hashentrysize;
1223+
List *current_pathkeys;
1224+
Path hashed_p;
1225+
Path sorted_p;
1226+
1227+
/*
1228+
* Check can't-do-it conditions, including whether the grouping operators
1229+
* are hashjoinable.
1230+
*
1231+
* Executor doesn't support hashed aggregation with DISTINCT aggregates.
1232+
* (Doing so would imply storing *all* the input values in the hash table,
1233+
* which seems like a certain loser.)
1234+
*/
1235+
if (!enable_hashagg)
1236+
return false;
1237+
if (agg_counts->numDistinctAggs != 0)
1238+
return false;
1239+
if (!hash_safe_grouping(parse))
1240+
return false;
1241+
1242+
/*
1243+
* Don't do it if it doesn't look like the hashtable will fit into
1244+
* work_mem.
1245+
*
1246+
* Beware here of the possibility that cheapest_path->parent is NULL.
1247+
* This could happen if user does something silly like
1248+
* SELECT 'foo' GROUP BY 1;
1249+
*/
1250+
if (cheapest_path->parent)
1251+
{
1252+
cheapest_path_rows = cheapest_path->parent->rows;
1253+
cheapest_path_width = cheapest_path->parent->width;
1254+
}
1255+
else
1256+
{
1257+
cheapest_path_rows = 1; /* assume non-set result */
1258+
cheapest_path_width = 100; /* arbitrary */
1259+
}
1260+
1261+
/* Estimate per-hash-entry space at tuple width... */
1262+
hashentrysize = cheapest_path_width;
1263+
/* plus space for pass-by-ref transition values... */
1264+
hashentrysize += agg_counts->transitionSpace;
1265+
/* plus the per-hash-entry overhead */
1266+
hashentrysize += hash_agg_entry_size(agg_counts->numAggs);
1267+
1268+
if (hashentrysize * dNumGroups > work_mem * 1024L)
1269+
return false;
1270+
1271+
/*
1272+
* See if the estimated cost is no more than doing it the other way.
1273+
* While avoiding the need for sorted input is usually a win, the fact
1274+
* that the output won't be sorted may be a loss; so we need to do an
1275+
* actual cost comparison.
1276+
*
1277+
* We need to consider
1278+
* cheapest_path + hashagg [+ final sort]
1279+
* versus either
1280+
* cheapest_path [+ sort] + group or agg [+ final sort]
1281+
* or
1282+
* presorted_path + group or agg [+ final sort]
1283+
* where brackets indicate a step that may not be needed. We assume
1284+
* query_planner() will have returned a presorted path only if it's a
1285+
* winner compared to cheapest_path for this purpose.
1286+
*
1287+
* These path variables are dummies that just hold cost fields; we don't
1288+
* make actual Paths for these steps.
1289+
*/
1290+
cost_agg(&hashed_p, parse, AGG_HASHED, agg_counts->numAggs,
1291+
numGroupCols, dNumGroups,
1292+
cheapest_path->startup_cost, cheapest_path->total_cost,
1293+
cheapest_path_rows);
1294+
/* Result of hashed agg is always unsorted */
1295+
if (sort_pathkeys)
1296+
cost_sort(&hashed_p, parse, sort_pathkeys, hashed_p.total_cost,
1297+
dNumGroups, cheapest_path_width);
1298+
1299+
if (sorted_path)
1300+
{
1301+
sorted_p.startup_cost = sorted_path->startup_cost;
1302+
sorted_p.total_cost = sorted_path->total_cost;
1303+
current_pathkeys = sorted_path->pathkeys;
1304+
}
1305+
else
1306+
{
1307+
sorted_p.startup_cost = cheapest_path->startup_cost;
1308+
sorted_p.total_cost = cheapest_path->total_cost;
1309+
current_pathkeys = cheapest_path->pathkeys;
1310+
}
1311+
if (!pathkeys_contained_in(group_pathkeys,
1312+
current_pathkeys))
1313+
{
1314+
cost_sort(&sorted_p, parse, group_pathkeys, sorted_p.total_cost,
1315+
cheapest_path_rows, cheapest_path_width);
1316+
current_pathkeys = group_pathkeys;
1317+
}
1318+
1319+
if (parse->hasAggs)
1320+
cost_agg(&sorted_p, parse, AGG_SORTED, agg_counts->numAggs,
1321+
numGroupCols, dNumGroups,
1322+
sorted_p.startup_cost, sorted_p.total_cost,
1323+
cheapest_path_rows);
1324+
else
1325+
cost_group(&sorted_p, parse, numGroupCols, dNumGroups,
1326+
sorted_p.startup_cost, sorted_p.total_cost,
1327+
cheapest_path_rows);
1328+
/* The Agg or Group node will preserve ordering */
1329+
if (sort_pathkeys &&
1330+
!pathkeys_contained_in(sort_pathkeys, current_pathkeys))
1331+
cost_sort(&sorted_p, parse, sort_pathkeys, sorted_p.total_cost,
1332+
dNumGroups, cheapest_path_width);
1333+
1334+
/*
1335+
* Now make the decision using the top-level tuple fraction. First we
1336+
* have to convert an absolute count (LIMIT) into fractional form.
1337+
*/
1338+
if (tuple_fraction >= 1.0)
1339+
tuple_fraction /= dNumGroups;
1340+
1341+
if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1342+
tuple_fraction) < 0)
1343+
{
1344+
/* Hashed is cheaper, so use it */
1345+
return true;
1346+
}
1347+
return false;
1348+
}
1349+
13341350
/*
13351351
* hash_safe_grouping - are grouping operators hashable?
13361352
*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy