Skip to content

Commit 003dd96

Browse files
committed
Apply the heuristic proposed by Taral (see pgsql-general archives for
2-Oct-98 or TODO.detail/cnfify) to decide whether we want to reduce WHERE clause to CNF form, DNF form, or neither. This is a HUGE win. The heuristic conditions could probably still use a little tweaking to make sure we don't pick CNF when DNF would be better, or vice versa, but the risk of exponential explosion in cnfify() is gone. I was able to run ten-thousand-AND-subclause queries through the planner in a reasonable amount of time.
1 parent b53955f commit 003dd96

File tree

1 file changed

+106
-53
lines changed

1 file changed

+106
-53
lines changed

src/backend/optimizer/prep/prepqual.c

Lines changed: 106 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.21 2000/01/26 05:56:39 momjian Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/optimizer/prep/prepqual.c,v 1.22 2000/01/28 03:22:36 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -32,7 +32,8 @@ static Expr *find_ands(Expr *qual);
3232
static Expr *and_normalize(List *andlist);
3333
static Expr *qual_cleanup(Expr *qual);
3434
static List *remove_duplicates(List *list);
35-
static int count_bool_nodes(Expr *qual);
35+
static void count_bool_nodes(Expr *qual, double *nodes,
36+
double *cnfnodes, double *dnfnodes);
3637

3738
/*****************************************************************************
3839
*
@@ -84,12 +85,12 @@ static int count_bool_nodes(Expr *qual);
8485
List *
8586
canonicalize_qual(Expr *qual, bool removeAndFlag)
8687
{
87-
Expr *newqual,
88-
*cnfqual,
89-
*dnfqual;
90-
int qualcnt,
91-
cnfcnt,
92-
dnfcnt;
88+
Expr *newqual;
89+
double nodes,
90+
cnfnodes,
91+
dnfnodes;
92+
bool cnfok,
93+
dnfok;
9394

9495
if (qual == NULL)
9596
return NIL;
@@ -98,57 +99,64 @@ canonicalize_qual(Expr *qual, bool removeAndFlag)
9899
* This improvement is always worthwhile, so do it unconditionally.
99100
*/
100101
qual = flatten_andors(qual);
102+
101103
/* Push down NOTs. We do this only in the top-level boolean
102104
* expression, without examining arguments of operators/functions.
103105
* Even so, it might not be a win if we are unable to find negators
104-
* for all the operators involved; so we keep the flattened-but-not-
105-
* NOT-pushed qual as the reference point for comparsions.
106+
* for all the operators involved; perhaps we should compare before-
107+
* and-after tree sizes?
106108
*/
107109
newqual = find_nots(qual);
108-
/*
109-
* Generate both CNF and DNF forms from newqual.
110-
*/
111-
/* Normalize into conjunctive normal form, and clean up the result. */
112-
cnfqual = qual_cleanup(find_ors(newqual));
113-
/* Likewise for DNF */
114-
dnfqual = qual_cleanup(find_ands(newqual));
115110

116111
/*
117-
* Now, choose whether to return qual, cnfqual, or dnfqual.
112+
* Choose whether to convert to CNF, or DNF, or leave well enough alone.
118113
*
119-
* First heuristic is to forget about either CNF or DNF if it shows
114+
* We make an approximate estimate of the number of bottom-level nodes
115+
* that will appear in the CNF and DNF forms of the query.
116+
*/
117+
count_bool_nodes(newqual, &nodes, &cnfnodes, &dnfnodes);
118+
/*
119+
* First heuristic is to forget about *both* normal forms if there are
120+
* a huge number of terms in the qual clause. This would only happen
121+
* with machine-generated queries, presumably; and most likely such
122+
* a query is already in either CNF or DNF.
123+
*/
124+
cnfok = dnfok = true;
125+
if (nodes >= 500.0)
126+
cnfok = dnfok = false;
127+
/*
128+
* Second heuristic is to forget about either CNF or DNF if it shows
120129
* unreasonable growth compared to the original form of the qual,
121130
* where we define "unreasonable" a tad arbitrarily as 4x more
122131
* operators.
123132
*/
124-
qualcnt = count_bool_nodes(qual);
125-
cnfcnt = count_bool_nodes(cnfqual);
126-
dnfcnt = count_bool_nodes(dnfqual);
127-
if (cnfcnt >= 4 * qualcnt)
128-
cnfqual = NULL; /* mark CNF not usable */
129-
if (dnfcnt >= 4 * qualcnt)
130-
dnfqual = NULL; /* mark DNF not usable */
131-
133+
if (cnfnodes >= 4.0 * nodes)
134+
cnfok = false;
135+
if (dnfnodes >= 4.0 * nodes)
136+
dnfok = false;
132137
/*
133-
* Second heuristic is to prefer DNF if only one relation is mentioned
134-
* and it is smaller than the CNF representation.
138+
* Third heuristic is to prefer DNF if top level is already an OR,
139+
* and only one relation is mentioned, and DNF is no larger than
140+
* the CNF representation. (Pretty shaky; can we improve on this?)
135141
*/
136-
if (dnfqual && dnfcnt < cnfcnt && NumRelids((Node *) dnfqual) == 1)
137-
cnfqual = NULL;
138-
142+
if (dnfok && dnfnodes <= cnfnodes && or_clause((Node *) newqual) &&
143+
NumRelids((Node *) newqual) == 1)
144+
cnfok = false;
139145
/*
140146
* Otherwise, we prefer CNF.
141147
*
142148
* XXX obviously, these rules could be improved upon.
143149
*/
144-
145-
/* pick preferred survivor */
146-
if (cnfqual)
147-
newqual = cnfqual;
148-
else if (dnfqual)
149-
newqual = dnfqual;
150-
else
151-
newqual = qual;
150+
if (cnfok)
151+
{
152+
/* Normalize into conjunctive normal form, and clean up the result. */
153+
newqual = qual_cleanup(find_ors(newqual));
154+
}
155+
else if (dnfok)
156+
{
157+
/* Normalize into disjunctive normal form, and clean up the result. */
158+
newqual = qual_cleanup(find_ands(newqual));
159+
}
152160

153161
/* Convert to implicit-AND list if requested */
154162
if (removeAndFlag)
@@ -828,27 +836,72 @@ remove_duplicates(List *list)
828836
/*
829837
* count_bool_nodes
830838
* Support for heuristics in canonicalize_qual(): count the
831-
* number of nodes in the top level AND/OR/NOT part of a qual tree
839+
* number of nodes that are inputs to the top level AND/OR/NOT
840+
* part of a qual tree, and estimate how many nodes will appear
841+
* in the CNF'ified or DNF'ified equivalent of the expression.
842+
*
843+
* This is just an approximate calculation; it doesn't deal with NOTs
844+
* very well, and of course it cannot detect possible simplifications
845+
* from eliminating duplicate subclauses. The idea is just to cheaply
846+
* determine whether CNF will be markedly worse than DNF or vice versa.
847+
*
848+
* The counts/estimates are represented as doubles to avoid risk of overflow.
832849
*/
833-
static int
834-
count_bool_nodes(Expr *qual)
850+
static void
851+
count_bool_nodes(Expr *qual,
852+
double *nodes,
853+
double *cnfnodes,
854+
double *dnfnodes)
835855
{
836-
if (qual == NULL)
837-
return 0;
856+
List *temp;
857+
double subnodes, subcnfnodes, subdnfnodes;
838858

839-
if (and_clause((Node *) qual) ||
840-
or_clause((Node *) qual))
859+
if (and_clause((Node *) qual))
841860
{
842-
int sum = 1; /* for the and/or itself */
843-
List *temp;
861+
*nodes = *cnfnodes = 0.0;
862+
*dnfnodes = 1.0; /* DNF nodes will be product of sub-counts */
844863

845864
foreach(temp, qual->args)
846-
sum += count_bool_nodes(lfirst(temp));
865+
{
866+
count_bool_nodes(lfirst(temp),
867+
&subnodes, &subcnfnodes, &subdnfnodes);
868+
*nodes += subnodes;
869+
*cnfnodes += subcnfnodes;
870+
*dnfnodes *= subdnfnodes;
871+
}
872+
/* we could get dnfnodes < cnfnodes here, if all the sub-nodes are
873+
* simple ones with count 1. Make sure dnfnodes isn't too small.
874+
*/
875+
if (*dnfnodes < *cnfnodes)
876+
*dnfnodes = *cnfnodes;
877+
}
878+
else if (or_clause((Node *) qual))
879+
{
880+
*nodes = *dnfnodes = 0.0;
881+
*cnfnodes = 1.0; /* CNF nodes will be product of sub-counts */
847882

848-
return sum;
883+
foreach(temp, qual->args)
884+
{
885+
count_bool_nodes(lfirst(temp),
886+
&subnodes, &subcnfnodes, &subdnfnodes);
887+
*nodes += subnodes;
888+
*cnfnodes *= subcnfnodes;
889+
*dnfnodes += subdnfnodes;
890+
}
891+
/* we could get cnfnodes < dnfnodes here, if all the sub-nodes are
892+
* simple ones with count 1. Make sure cnfnodes isn't too small.
893+
*/
894+
if (*cnfnodes < *dnfnodes)
895+
*cnfnodes = *dnfnodes;
849896
}
850897
else if (not_clause((Node *) qual))
851-
return count_bool_nodes(get_notclausearg(qual)) + 1;
898+
{
899+
count_bool_nodes(get_notclausearg(qual),
900+
nodes, cnfnodes, dnfnodes);
901+
}
852902
else
853-
return 1; /* anything else counts 1 for my purposes */
903+
{
904+
/* anything else counts 1 for my purposes */
905+
*nodes = *cnfnodes = *dnfnodes = 1.0;
906+
}
854907
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy