Skip to content

Commit b8c798e

Browse files
committed
Tweak tsmatchsel() so that it examines the structure of the tsquery whenever
possible (ie, whenever the tsquery is a constant), even when no statistics are available for the tsvector. For example, foo @@ 'a & b'::tsquery can be expected to be more selective than foo @@ 'a'::tsquery, whether or not we know anything about foo. We use DEFAULT_TS_MATCH_SEL as the assumed selectivity of individual query terms when no stats are available, then combine the terms according to the query's AND/OR structure as usual. Per experimentation with Artur Dabrowski's example. (The fact that there are no stats available in that example is a problem in itself, but nonetheless tsmatchsel should be smarter about the case.) Back-patch to 8.4 to keep all versions of tsmatchsel() in sync.
1 parent 2ab57e0 commit b8c798e

File tree

1 file changed

+25
-16
lines changed

1 file changed

+25
-16
lines changed

src/backend/tsearch/ts_selfuncs.c

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/ts_selfuncs.c,v 1.7 2010/01/04 02:44:39 tgl Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/ts_selfuncs.c,v 1.8 2010/07/31 03:27:40 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -52,6 +52,9 @@ static Selectivity tsquery_opr_selec(QueryItem *item, char *operand,
5252
TextFreq *lookup, int length, float4 minfreq);
5353
static int compare_lexeme_textfreq(const void *e1, const void *e2);
5454

55+
#define tsquery_opr_selec_no_stats(query) \
56+
tsquery_opr_selec(GETQUERY(query), GETOPERAND(query), NULL, 0, 0)
57+
5558

5659
/*
5760
* tsmatchsel -- Selectivity of "@@"
@@ -101,21 +104,20 @@ tsmatchsel(PG_FUNCTION_ARGS)
101104
}
102105

103106
/*
104-
* OK, there's a Var and a Const we're dealing with here. We need the Var
105-
* to be a TSVector (or else we don't have any useful statistic for it).
106-
* We have to check this because the Var might be the TSQuery not the
107-
* TSVector.
107+
* OK, there's a Var and a Const we're dealing with here. We need the
108+
* Const to be a TSQuery, else we can't do anything useful. We have to
109+
* check this because the Var might be the TSQuery not the TSVector.
108110
*/
109-
if (vardata.vartype == TSVECTOROID)
111+
if (((Const *) other)->consttype == TSQUERYOID)
110112
{
111113
/* tsvector @@ tsquery or the other way around */
112-
Assert(((Const *) other)->consttype == TSQUERYOID);
114+
Assert(vardata.vartype == TSVECTOROID);
113115

114116
selec = tsquerysel(&vardata, ((Const *) other)->constvalue);
115117
}
116118
else
117119
{
118-
/* The Var is something we don't have useful statistics for */
120+
/* If we can't see the query structure, must punt */
119121
selec = DEFAULT_TS_MATCH_SEL;
120122
}
121123

@@ -184,14 +186,14 @@ tsquerysel(VariableStatData *vardata, Datum constval)
184186
}
185187
else
186188
{
187-
/* No most-common-elements info, so we must punt */
188-
selec = (Selectivity) DEFAULT_TS_MATCH_SEL;
189+
/* No most-common-elements info, so do without */
190+
selec = tsquery_opr_selec_no_stats(query);
189191
}
190192
}
191193
else
192194
{
193-
/* No stats at all, so we must punt */
194-
selec = (Selectivity) DEFAULT_TS_MATCH_SEL;
195+
/* No stats at all, so do without */
196+
selec = tsquery_opr_selec_no_stats(query);
195197
}
196198

197199
return selec;
@@ -214,7 +216,7 @@ mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem,
214216
* cells are taken for minimal and maximal frequency. Punt if not.
215217
*/
216218
if (nnumbers != nmcelem + 2)
217-
return DEFAULT_TS_MATCH_SEL;
219+
return tsquery_opr_selec_no_stats(query);
218220

219221
/*
220222
* Transpose the data into a single array so we can use bsearch().
@@ -258,9 +260,12 @@ mcelem_tsquery_selec(TSQuery query, Datum *mcelem, int nmcelem,
258260
* freq[val] in VAL nodes, if the value is in MCELEM
259261
* min(freq[MCELEM]) / 2 in VAL nodes, if it is not
260262
*
261-
*
262263
* The MCELEM array is already sorted (see ts_typanalyze.c), so we can use
263264
* binary search for determining freq[MCELEM].
265+
*
266+
* If we don't have stats for the tsvector, we still use this logic,
267+
* except we always use DEFAULT_TS_MATCH_SEL for VAL nodes. This case
268+
* is signaled by lookup == NULL.
264269
*/
265270
static Selectivity
266271
tsquery_opr_selec(QueryItem *item, char *operand,
@@ -279,6 +284,10 @@ tsquery_opr_selec(QueryItem *item, char *operand,
279284
{
280285
QueryOperand *oper = (QueryOperand *) item;
281286

287+
/* If no stats for the variable, use DEFAULT_TS_MATCH_SEL */
288+
if (lookup == NULL)
289+
return (Selectivity) DEFAULT_TS_MATCH_SEL;
290+
282291
/*
283292
* Prepare the key for bsearch().
284293
*/
@@ -292,15 +301,15 @@ tsquery_opr_selec(QueryItem *item, char *operand,
292301
if (searchres)
293302
{
294303
/*
295-
* The element is in MCELEM. Return precise selectivity (or at
304+
* The element is in MCELEM. Return precise selectivity (or at
296305
* least as precise as ANALYZE could find out).
297306
*/
298307
return (Selectivity) searchres->frequency;
299308
}
300309
else
301310
{
302311
/*
303-
* The element is not in MCELEM. Punt, but assert that the
312+
* The element is not in MCELEM. Punt, but assume that the
304313
* selectivity cannot be more than minfreq / 2.
305314
*/
306315
return (Selectivity) Min(DEFAULT_TS_MATCH_SEL, minfreq / 2);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy