Skip to content

Commit e211481

Browse files
committed
Implement choice between hash-based and sort-based grouping for doing
DISTINCT processing on the output of an IN sub-select.
1 parent a4482f4 commit e211481

File tree

2 files changed

+105
-12
lines changed

2 files changed

+105
-12
lines changed

src/backend/optimizer/plan/createplan.c

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,13 @@
1010
*
1111
*
1212
* IDENTIFICATION
13-
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.132 2003/01/20 18:54:52 tgl Exp $
13+
* $Header: /cvsroot/pgsql/src/backend/optimizer/plan/createplan.c,v 1.133 2003/01/22 00:07:00 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
1717
#include "postgres.h"
1818

19+
#include <limits.h>
1920

2021
#include "nodes/makefuncs.h"
2122
#include "nodes/nodeFuncs.h"
@@ -418,6 +419,7 @@ create_unique_plan(Query *root, UniquePath *best_path)
418419
Plan *plan;
419420
Plan *subplan;
420421
List *sub_targetlist;
422+
List *my_tlist;
421423
List *l;
422424

423425
subplan = create_plan(root, best_path->subpath);
@@ -474,21 +476,39 @@ create_unique_plan(Query *root, UniquePath *best_path)
474476
subplan->targetlist = newtlist;
475477
}
476478

479+
my_tlist = new_unsorted_tlist(subplan->targetlist);
480+
477481
if (best_path->use_hash)
478482
{
479-
elog(ERROR, "create_unique_plan: hash case not implemented yet");
480-
plan = NULL;
483+
int numGroupCols = length(my_tlist);
484+
long numGroups;
485+
AttrNumber *groupColIdx;
486+
int i;
487+
488+
numGroups = (long) Min(best_path->rows, (double) LONG_MAX);
489+
490+
groupColIdx = (AttrNumber *) palloc(numGroupCols * sizeof(AttrNumber));
491+
for (i = 0; i < numGroupCols; i++)
492+
groupColIdx[i] = i+1;
493+
494+
plan = (Plan *) make_agg(root,
495+
my_tlist,
496+
NIL,
497+
AGG_HASHED,
498+
numGroupCols,
499+
groupColIdx,
500+
numGroups,
501+
0,
502+
subplan);
481503
}
482504
else
483505
{
484-
List *sort_tlist;
485506
List *sortList;
486507

487-
sort_tlist = new_unsorted_tlist(subplan->targetlist);
488-
sortList = addAllTargetsToSortList(NIL, sort_tlist);
489-
plan = (Plan *) make_sort_from_sortclauses(root, sort_tlist,
508+
sortList = addAllTargetsToSortList(NIL, my_tlist);
509+
plan = (Plan *) make_sort_from_sortclauses(root, my_tlist,
490510
subplan, sortList);
491-
plan = (Plan *) make_unique(sort_tlist, plan, sortList);
511+
plan = (Plan *) make_unique(my_tlist, plan, sortList);
492512
}
493513

494514
plan->plan_rows = best_path->rows;

src/backend/optimizer/util/pathnode.c

Lines changed: 77 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,22 +8,30 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.84 2003/01/20 18:54:56 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/optimizer/util/pathnode.c,v 1.85 2003/01/22 00:07:00 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
1515
#include "postgres.h"
1616

1717
#include <math.h>
1818

19+
#include "catalog/pg_operator.h"
1920
#include "executor/executor.h"
21+
#include "miscadmin.h"
2022
#include "nodes/plannodes.h"
2123
#include "optimizer/cost.h"
2224
#include "optimizer/pathnode.h"
2325
#include "optimizer/paths.h"
2426
#include "optimizer/restrictinfo.h"
27+
#include "parser/parse_expr.h"
28+
#include "parser/parse_oper.h"
2529
#include "utils/memutils.h"
2630
#include "utils/selfuncs.h"
31+
#include "utils/syscache.h"
32+
33+
34+
static bool hash_safe_tlist(List *tlist);
2735

2836

2937
/*****************************************************************************
@@ -506,6 +514,7 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
506514
{
507515
UniquePath *pathnode;
508516
Path sort_path; /* dummy for result of cost_sort */
517+
Path agg_path; /* dummy for result of cost_agg */
509518
MemoryContext oldcontext;
510519
List *sub_targetlist;
511520
List *l;
@@ -587,16 +596,80 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
587596
*/
588597
sort_path.total_cost += cpu_operator_cost * rel->rows * numCols;
589598

590-
pathnode->use_hash = false; /* for now */
599+
/*
600+
* Is it safe to use a hashed implementation? If so, estimate and
601+
* compare costs. We only try this if we know the targetlist for
602+
* sure (else we can't be sure about the datatypes involved).
603+
*/
604+
pathnode->use_hash = false;
605+
if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist))
606+
{
607+
/*
608+
* Estimate the overhead per hashtable entry at 64 bytes (same
609+
* as in planner.c).
610+
*/
611+
int hashentrysize = rel->width + 64;
591612

592-
pathnode->path.startup_cost = sort_path.startup_cost;
593-
pathnode->path.total_cost = sort_path.total_cost;
613+
if (hashentrysize * pathnode->rows <= SortMem * 1024L)
614+
{
615+
cost_agg(&agg_path, root,
616+
AGG_HASHED, 0,
617+
numCols, pathnode->rows,
618+
subpath->startup_cost,
619+
subpath->total_cost,
620+
rel->rows);
621+
if (agg_path.total_cost < sort_path.total_cost)
622+
pathnode->use_hash = true;
623+
}
624+
}
625+
626+
if (pathnode->use_hash)
627+
{
628+
pathnode->path.startup_cost = agg_path.startup_cost;
629+
pathnode->path.total_cost = agg_path.total_cost;
630+
}
631+
else
632+
{
633+
pathnode->path.startup_cost = sort_path.startup_cost;
634+
pathnode->path.total_cost = sort_path.total_cost;
635+
}
594636

595637
rel->cheapest_unique_path = (Path *) pathnode;
596638

597639
return pathnode;
598640
}
599641

642+
/*
643+
* hash_safe_tlist - can datatypes of given tlist be hashed?
644+
*
645+
* We assume hashed aggregation will work if the datatype's equality operator
646+
* is marked hashjoinable.
647+
*
648+
* XXX this probably should be somewhere else. See also hash_safe_grouping
649+
* in plan/planner.c.
650+
*/
651+
static bool
652+
hash_safe_tlist(List *tlist)
653+
{
654+
List *tl;
655+
656+
foreach(tl, tlist)
657+
{
658+
Node *expr = (Node *) lfirst(tl);
659+
Operator optup;
660+
bool oprcanhash;
661+
662+
optup = equality_oper(exprType(expr), true);
663+
if (!optup)
664+
return false;
665+
oprcanhash = ((Form_pg_operator) GETSTRUCT(optup))->oprcanhash;
666+
ReleaseSysCache(optup);
667+
if (!oprcanhash)
668+
return false;
669+
}
670+
return true;
671+
}
672+
600673
/*
601674
* create_subqueryscan_path
602675
* Creates a path corresponding to a sequential scan of a subquery,

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy