Skip to content

Commit 5c74ce2

Browse files
committed
Improve UniquePath logic to detect the case where the input is already
known unique (eg, it is a SELECT DISTINCT ... subquery), and not do a redundant unique-ification step.
1 parent cce442d commit 5c74ce2

File tree

6 files changed

+72
-15
lines changed

6 files changed

+72
-15
lines changed

src/backend/nodes/outfuncs.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.226 2004/01/05 05:07:35 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/nodes/outfuncs.c,v 1.227 2004/01/05 18:04:38 tgl Exp $
1212
*
1313
* NOTES
1414
* Every node type that can appear in stored rules' parsetrees *must*
@@ -1023,7 +1023,7 @@ _outUniquePath(StringInfo str, UniquePath *node)
10231023
_outPathInfo(str, (Path *) node);
10241024

10251025
WRITE_NODE_FIELD(subpath);
1026-
WRITE_BOOL_FIELD(use_hash);
1026+
WRITE_ENUM_FIELD(umethod, UniquePathMethod);
10271027
WRITE_FLOAT_FIELD(rows, "%.0f");
10281028
}
10291029

src/backend/optimizer/plan/createplan.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
*
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.162 2004/01/05 05:07:35 tgl Exp $
13+
* $PostgreSQL: pgsql/src/backend/optimizer/plan/createplan.c,v 1.163 2004/01/05 18:04:38 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
@@ -605,10 +605,14 @@ create_unique_plan(Query *root, UniquePath *best_path)
605605
subplan->targetlist = newtlist;
606606
}
607607

608+
/* Done if we don't need to do any actual unique-ifying */
609+
if (best_path->umethod == UNIQUE_PATH_NOOP)
610+
return subplan;
611+
608612
/* Copy tlist again to make one we can put sorting labels on */
609613
my_tlist = copyObject(subplan->targetlist);
610614

611-
if (best_path->use_hash)
615+
if (best_path->umethod == UNIQUE_PATH_HASH)
612616
{
613617
long numGroups;
614618

src/backend/optimizer/util/clauses.c

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.159 2004/01/04 03:51:52 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/util/clauses.c,v 1.160 2004/01/05 18:04:39 tgl Exp $
1212
*
1313
* HISTORY
1414
* AUTHOR DATE MAJOR EVENT
@@ -921,6 +921,21 @@ has_distinct_on_clause(Query *query)
921921
return false;
922922
}
923923

924+
/*
925+
* Test whether a query uses simple DISTINCT, ie, has a distinct-list that
926+
* is the same as the set of output columns.
927+
*/
928+
bool
929+
has_distinct_clause(Query *query)
930+
{
931+
/* Is there a DISTINCT clause at all? */
932+
if (query->distinctClause == NIL)
933+
return false;
934+
935+
/* It's DISTINCT if it's not DISTINCT ON */
936+
return !has_distinct_on_clause(query);
937+
}
938+
924939

925940
/*****************************************************************************
926941
* *

src/backend/optimizer/util/pathnode.c

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.97 2004/01/05 05:07:35 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/optimizer/util/pathnode.c,v 1.98 2004/01/05 18:04:39 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -20,12 +20,14 @@
2020
#include "executor/executor.h"
2121
#include "miscadmin.h"
2222
#include "nodes/plannodes.h"
23+
#include "optimizer/clauses.h"
2324
#include "optimizer/cost.h"
2425
#include "optimizer/pathnode.h"
2526
#include "optimizer/paths.h"
2627
#include "optimizer/restrictinfo.h"
2728
#include "parser/parse_expr.h"
2829
#include "parser/parse_oper.h"
30+
#include "parser/parsetree.h"
2931
#include "utils/memutils.h"
3032
#include "utils/selfuncs.h"
3133
#include "utils/syscache.h"
@@ -546,6 +548,30 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
546548

547549
pathnode->subpath = subpath;
548550

551+
/*
552+
* If the input is a subquery that uses DISTINCT, we don't need to do
553+
* anything; its output is already unique. (Are there any other cases
554+
* in which we can easily prove the input must be distinct?)
555+
*/
556+
if (rel->rtekind == RTE_SUBQUERY)
557+
{
558+
RangeTblEntry *rte = rt_fetch(rel->relid, root->rtable);
559+
Query *subquery = rte->subquery;
560+
561+
if (has_distinct_clause(subquery))
562+
{
563+
pathnode->umethod = UNIQUE_PATH_NOOP;
564+
pathnode->rows = rel->rows;
565+
pathnode->path.startup_cost = subpath->startup_cost;
566+
pathnode->path.total_cost = subpath->total_cost;
567+
pathnode->path.pathkeys = subpath->pathkeys;
568+
569+
rel->cheapest_unique_path = (Path *) pathnode;
570+
571+
return pathnode;
572+
}
573+
}
574+
549575
/*
550576
* Try to identify the targetlist that will actually be unique-ified.
551577
* In current usage, this routine is only used for sub-selects of IN
@@ -599,7 +625,7 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
599625
* compare costs. We only try this if we know the targetlist for sure
600626
* (else we can't be sure about the datatypes involved).
601627
*/
602-
pathnode->use_hash = false;
628+
pathnode->umethod = UNIQUE_PATH_SORT;
603629
if (enable_hashagg && sub_targetlist && hash_safe_tlist(sub_targetlist))
604630
{
605631
/*
@@ -617,11 +643,11 @@ create_unique_path(Query *root, RelOptInfo *rel, Path *subpath)
617643
subpath->total_cost,
618644
rel->rows);
619645
if (agg_path.total_cost < sort_path.total_cost)
620-
pathnode->use_hash = true;
646+
pathnode->umethod = UNIQUE_PATH_HASH;
621647
}
622648
}
623649

624-
if (pathnode->use_hash)
650+
if (pathnode->umethod == UNIQUE_PATH_HASH)
625651
{
626652
pathnode->path.startup_cost = agg_path.startup_cost;
627653
pathnode->path.total_cost = agg_path.total_cost;

src/include/nodes/relation.h

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.91 2004/01/05 05:07:36 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/nodes/relation.h,v 1.92 2004/01/05 18:04:39 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -442,15 +442,26 @@ typedef struct MaterialPath
442442
* its subpath.
443443
*
444444
* This is unlike the other Path nodes in that it can actually generate
445-
* two different plans: either hash-based or sort-based implementation.
446-
* The decision is sufficiently localized that it's not worth having two
447-
* separate Path node types.
445+
* different plans: either hash-based or sort-based implementation, or a
446+
* no-op if the input path can be proven distinct already. The decision
447+
* is sufficiently localized that it's not worth having separate Path node
448+
* types. (Note: in the no-op case, we could eliminate the UniquePath node
449+
* entirely and just return the subpath; but it's convenient to have a
450+
* UniquePath in the path tree to signal upper-level routines that the input
451+
* is known distinct.)
448452
*/
453+
typedef enum
454+
{
455+
UNIQUE_PATH_NOOP, /* input is known unique already */
456+
UNIQUE_PATH_HASH, /* use hashing */
457+
UNIQUE_PATH_SORT /* use sorting */
458+
} UniquePathMethod;
459+
449460
typedef struct UniquePath
450461
{
451462
Path path;
452463
Path *subpath;
453-
bool use_hash;
464+
UniquePathMethod umethod;
454465
double rows; /* estimated number of result tuples */
455466
} UniquePath;
456467

src/include/optimizer/clauses.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.71 2004/01/04 03:51:52 tgl Exp $
10+
* $PostgreSQL: pgsql/src/include/optimizer/clauses.h,v 1.72 2004/01/05 18:04:39 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -57,6 +57,7 @@ extern bool is_pseudo_constant_clause(Node *clause);
5757
extern bool is_pseudo_constant_clause_relids(Node *clause, Relids relids);
5858
extern List *pull_constant_clauses(List *quals, List **constantQual);
5959

60+
extern bool has_distinct_clause(Query *query);
6061
extern bool has_distinct_on_clause(Query *query);
6162

6263
extern int NumRelids(Node *clause);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy