Skip to content

Commit 0dbffa7

Browse files
committed
First cut at making useful selectivity estimates for range queries
(ie, WHERE x > lowbound AND x < highbound). It's not very bright yet but it does something useful. Also, rename intltsel/intgtsel to scalarltsel/scalargtsel to reflect usage better. Extend convert_to_scalar to do something a little bit useful with string data types. Still need to make it do something with date/time datatypes, but I'll wait for Thomas's datetime unification dust to settle first. Eventually the routine ought not have any type-specific knowledge at all; it ought to be calling a type-dependent routine found via a pg_type column; but that's a task for another day.
1 parent 8bcac56 commit 0dbffa7

File tree

9 files changed

+528
-247
lines changed

9 files changed

+528
-247
lines changed

doc/src/sgml/xindex.sgml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
<!--
2-
$Header: /cvsroot/pgsql/doc/src/sgml/xindex.sgml,v 1.6 2000/01/22 23:50:08 tgl Exp $
2+
$Header: /cvsroot/pgsql/doc/src/sgml/xindex.sgml,v 1.7 2000/01/24 07:16:49 tgl Exp $
33
Postgres documentation
44
-->
55

@@ -542,25 +542,25 @@ CREATE OPERATOR = (
542542
oprleft = (SELECT oid FROM pg_type WHERE typname = 'complex_abs');
543543

544544
UPDATE pg_operator
545-
SET oprrest = 'intltsel'::regproc, oprjoin = 'intltjoinsel'
545+
SET oprrest = 'scalarltsel'::regproc, oprjoin = 'scalarltjoinsel'
546546
WHERE oprname = '<' AND
547547
oprleft = oprright AND
548548
oprleft = (SELECT oid FROM pg_type WHERE typname = 'complex_abs');
549549

550550
UPDATE pg_operator
551-
SET oprrest = 'intltsel'::regproc, oprjoin = 'intltjoinsel'
551+
SET oprrest = 'scalarltsel'::regproc, oprjoin = 'scalarltjoinsel'
552552
WHERE oprname = '<=' AND
553553
oprleft = oprright AND
554554
oprleft = (SELECT oid FROM pg_type WHERE typname = 'complex_abs');
555555

556556
UPDATE pg_operator
557-
SET oprrest = 'intgtsel'::regproc, oprjoin = 'intgtjoinsel'
557+
SET oprrest = 'scalargtsel'::regproc, oprjoin = 'scalargtjoinsel'
558558
WHERE oprname = '>' AND
559559
oprleft = oprright AND
560560
oprleft = (SELECT oid FROM pg_type WHERE typname = 'complex_abs');
561561

562562
UPDATE pg_operator
563-
SET oprrest = 'intgtsel'::regproc, oprjoin = 'intgtjoinsel'
563+
SET oprrest = 'scalargtsel'::regproc, oprjoin = 'scalargtjoinsel'
564564
WHERE oprname = '>=' AND
565565
oprleft = oprright AND
566566
oprleft = (SELECT oid FROM pg_type WHERE typname = 'complex_abs');</filename></filename>

doc/src/sgml/xoper.sgml

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -231,8 +231,8 @@ SELECT (a + b) AS c FROM test_complex;
231231
<ProgramListing>
232232
eqsel for =
233233
neqsel for &lt;&gt;
234-
intltsel for &lt; or &lt;=
235-
intgtsel for &gt; or &gt;=
234+
scalarltsel for &lt; or &lt;=
235+
scalargtsel for &gt; or &gt;=
236236
</ProgramListing>
237237
It might seem a little odd that these are the categories, but they
238238
make sense if you think about it. '=' will typically accept only
@@ -254,6 +254,17 @@ SELECT (a + b) AS c FROM test_complex;
254254
matching operators (~, ~*, etc) use eqsel on the assumption that they'll
255255
usually only match a small fraction of the entries in a table.
256256
</para>
257+
258+
<para>
259+
You can use scalarltsel and scalargtsel for comparisons on datatypes that
260+
have some sensible means of being converted into numeric scalars for
261+
range comparisons. If possible, add the datatype to those understood
262+
by the routine convert_to_scalar() in src/backend/utils/adt/selfuncs.c.
263+
(Eventually, this routine should be replaced by per-datatype functions
264+
identified through a column of the pg_type table; but that hasn't happened
265+
yet.) If you do not do this, things will still work, but the optimizer's
266+
estimates won't be as good as they could be.
267+
</para>
257268
</sect2>
258269

259270
<sect2>
@@ -281,8 +292,8 @@ SELECT (a + b) AS c FROM test_complex;
281292
<ProgramListing>
282293
eqjoinsel for =
283294
neqjoinsel for &lt;&gt;
284-
intltjoinsel for &lt; or &lt;=
285-
intgtjoinsel for &gt; or &gt;=
295+
scalarltjoinsel for &lt; or &lt;=
296+
scalargtjoinsel for &gt; or &gt;=
286297
</ProgramListing>
287298
</para>
288299
</sect2>

src/backend/optimizer/path/clausesel.c

Lines changed: 235 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.28 2000/01/23 02:06:58 tgl Exp $
10+
* $Header: /cvsroot/pgsql/src/backend/optimizer/path/clausesel.c,v 1.29 2000/01/24 07:16:46 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -23,6 +23,23 @@
2323
#include "utils/lsyscache.h"
2424

2525

26+
/*
27+
* Data structure for accumulating info about possible range-query
28+
* clause pairs in clauselist_selectivity.
29+
*/
30+
typedef struct RangeQueryClause {
31+
struct RangeQueryClause *next; /* next in linked list */
32+
Node *var; /* The common variable of the clauses */
33+
bool have_lobound; /* found a low-bound clause yet? */
34+
bool have_hibound; /* found a high-bound clause yet? */
35+
Selectivity lobound; /* Selectivity of a var > something clause */
36+
Selectivity hibound; /* Selectivity of a var < something clause */
37+
} RangeQueryClause;
38+
39+
static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
40+
int flag, bool isLTsel, Selectivity s2);
41+
42+
2643
/****************************************************************************
2744
* ROUTINES TO COMPUTE SELECTIVITIES
2845
****************************************************************************/
@@ -55,29 +72,237 @@ restrictlist_selectivity(Query *root,
5572
* must be returned.
5673
*
5774
* See clause_selectivity() for the meaning of the varRelid parameter.
75+
*
76+
* Our basic approach is to take the product of the selectivities of the
77+
* subclauses. However, that's only right if the subclauses have independent
78+
* probabilities, and in reality they are often NOT independent. So,
79+
* we want to be smarter where we can.
80+
81+
* Currently, the only extra smarts we have is to recognize "range queries",
82+
* such as "x > 34 AND x < 42". Clauses are recognized as possible range
83+
* query components if they are restriction opclauses whose operators have
84+
* scalarltsel() or scalargtsel() as their restriction selectivity estimator.
85+
* We pair up clauses of this form that refer to the same variable. An
86+
* unpairable clause of this kind is simply multiplied into the selectivity
87+
* product in the normal way. But when we find a pair, we know that the
88+
* selectivities represent the relative positions of the low and high bounds
89+
* within the column's range, so instead of figuring the selectivity as
90+
* hisel * losel, we can figure it as hisel + losel - 1. (To visualize this,
91+
* see that hisel is the fraction of the range below the high bound, while
92+
* losel is the fraction above the low bound; so hisel can be interpreted
93+
* directly as a 0..1 value but we need to convert losel to 1-losel before
94+
* interpreting it as a value. Then the available range is 1-losel to hisel.)
95+
* If the calculation yields zero or negative, however, we chicken out and
96+
* use the default interpretation; that probably means that one or both
97+
* selectivities is a default estimate rather than an actual range value.
98+
* Of course this is all very dependent on the behavior of
99+
* scalarltsel/scalargtsel; perhaps some day we can generalize the approach.
58100
*/
59101
Selectivity
60102
clauselist_selectivity(Query *root,
61103
List *clauses,
62104
int varRelid)
63105
{
64-
Selectivity s1 = 1.0;
65-
List *clause;
106+
Selectivity s1 = 1.0;
107+
RangeQueryClause *rqlist = NULL;
108+
List *clist;
66109

67-
/* Use the product of the selectivities of the subclauses.
68-
* XXX this is too optimistic, since the subclauses
69-
* are very likely not independent...
110+
/*
111+
* Initial scan over clauses. Anything that doesn't look like a
112+
* potential rangequery clause gets multiplied into s1 and forgotten.
113+
* Anything that does gets inserted into an rqlist entry.
70114
*/
71-
foreach(clause, clauses)
115+
foreach(clist, clauses)
72116
{
73-
Selectivity s2 = clause_selectivity(root,
74-
(Node *) lfirst(clause),
75-
varRelid);
117+
Node *clause = (Node *) lfirst(clist);
118+
Selectivity s2;
119+
120+
/*
121+
* See if it looks like a restriction clause with a constant.
122+
* (If it's not a constant we can't really trust the selectivity!)
123+
* NB: for consistency of results, this fragment of code had
124+
* better match what clause_selectivity() would do.
125+
*/
126+
if (varRelid != 0 || NumRelids(clause) == 1)
127+
{
128+
int relidx;
129+
AttrNumber attno;
130+
Datum constval;
131+
int flag;
132+
133+
get_relattval(clause, varRelid,
134+
&relidx, &attno, &constval, &flag);
135+
if (relidx != 0 && (flag & SEL_CONSTANT))
136+
{
137+
/* if get_relattval succeeded, it must be an opclause */
138+
Oid opno = ((Oper *) ((Expr *) clause)->oper)->opno;
139+
RegProcedure oprrest = get_oprrest(opno);
140+
141+
if (!oprrest)
142+
s2 = (Selectivity) 0.5;
143+
else
144+
s2 = restriction_selectivity(oprrest, opno,
145+
getrelid(relidx,
146+
root->rtable),
147+
attno,
148+
constval, flag);
149+
/*
150+
* If we reach here, we have computed the same result
151+
* that clause_selectivity would, so we can just use s2
152+
* if it's the wrong oprrest. But if it's the right
153+
* oprrest, add the clause to rqlist for later processing.
154+
*/
155+
switch (oprrest)
156+
{
157+
case F_SCALARLTSEL:
158+
addRangeClause(&rqlist, clause, flag, true, s2);
159+
break;
160+
case F_SCALARGTSEL:
161+
addRangeClause(&rqlist, clause, flag, false, s2);
162+
break;
163+
default:
164+
/* Just merge the selectivity in generically */
165+
s1 = s1 * s2;
166+
break;
167+
}
168+
continue; /* drop to loop bottom */
169+
}
170+
}
171+
/* Not the right form, so treat it generically. */
172+
s2 = clause_selectivity(root, clause, varRelid);
76173
s1 = s1 * s2;
77174
}
175+
176+
/*
177+
* Now scan the rangequery pair list.
178+
*/
179+
while (rqlist != NULL)
180+
{
181+
RangeQueryClause *rqnext;
182+
183+
if (rqlist->have_lobound && rqlist->have_hibound)
184+
{
185+
/* Successfully matched a pair of range clauses */
186+
Selectivity s2 = rqlist->hibound + rqlist->lobound - 1.0;
187+
188+
if (s2 > 0.0)
189+
{
190+
/* All our hard work has paid off! */
191+
s1 *= s2;
192+
}
193+
else
194+
{
195+
/* One or both is probably a default estimate,
196+
* so punt and just merge them in generically.
197+
*/
198+
s1 *= rqlist->hibound * rqlist->lobound;
199+
}
200+
}
201+
else
202+
{
203+
/* Only found one of a pair, merge it in generically */
204+
if (rqlist->have_lobound)
205+
s1 *= rqlist->lobound;
206+
else
207+
s1 *= rqlist->hibound;
208+
}
209+
/* release storage and advance */
210+
rqnext = rqlist->next;
211+
pfree(rqlist);
212+
rqlist = rqnext;
213+
}
214+
78215
return s1;
79216
}
80217

218+
/*
219+
* addRangeClause --- add a new range clause for clauselist_selectivity
220+
*
221+
* Here is where we try to match up pairs of range-query clauses
222+
*/
223+
static void
224+
addRangeClause(RangeQueryClause **rqlist, Node *clause,
225+
int flag, bool isLTsel, Selectivity s2)
226+
{
227+
RangeQueryClause *rqelem;
228+
Node *var;
229+
bool is_lobound;
230+
231+
/* get_relattval sets flag&SEL_RIGHT if the var is on the LEFT. */
232+
if (flag & SEL_RIGHT)
233+
{
234+
var = (Node *) get_leftop((Expr *) clause);
235+
is_lobound = ! isLTsel; /* x < something is high bound */
236+
}
237+
else
238+
{
239+
var = (Node *) get_rightop((Expr *) clause);
240+
is_lobound = isLTsel; /* something < x is low bound */
241+
}
242+
243+
for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
244+
{
245+
/* We use full equal() here because the "var" might be a function
246+
* of one or more attributes of the same relation...
247+
*/
248+
if (! equal(var, rqelem->var))
249+
continue;
250+
/* Found the right group to put this clause in */
251+
if (is_lobound)
252+
{
253+
if (! rqelem->have_lobound)
254+
{
255+
rqelem->have_lobound = true;
256+
rqelem->lobound = s2;
257+
}
258+
else
259+
{
260+
/* We have found two similar clauses, such as
261+
* x < y AND x < z. Keep only the more restrictive one.
262+
*/
263+
if (rqelem->lobound > s2)
264+
rqelem->lobound = s2;
265+
}
266+
}
267+
else
268+
{
269+
if (! rqelem->have_hibound)
270+
{
271+
rqelem->have_hibound = true;
272+
rqelem->hibound = s2;
273+
}
274+
else
275+
{
276+
/* We have found two similar clauses, such as
277+
* x > y AND x > z. Keep only the more restrictive one.
278+
*/
279+
if (rqelem->hibound > s2)
280+
rqelem->hibound = s2;
281+
}
282+
}
283+
return;
284+
}
285+
286+
/* No matching var found, so make a new clause-pair data structure */
287+
rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
288+
rqelem->var = var;
289+
if (is_lobound)
290+
{
291+
rqelem->have_lobound = true;
292+
rqelem->have_hibound = false;
293+
rqelem->lobound = s2;
294+
}
295+
else
296+
{
297+
rqelem->have_lobound = false;
298+
rqelem->have_hibound = true;
299+
rqelem->hibound = s2;
300+
}
301+
rqelem->next = *rqlist;
302+
*rqlist = rqelem;
303+
}
304+
305+
81306
/*
82307
* clause_selectivity -
83308
* Compute the selectivity of a general boolean expression clause.

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy