Skip to content

Commit 38bb3ae

Browse files
committed
Convert tsginidx.c's GIN indexing logic to fully ternary operation.
Commit 2f2007f did this partially, but there were two remaining warts. checkcondition_gin handled some uncertain cases by setting the out-of-band recheck flag, some by returning TS_MAYBE, and some by doing both. Meanwhile, TS_execute arbitrarily converted a TS_MAYBE result to TS_YES. Thus, if checkcondition_gin chose to only return TS_MAYBE, the outcome would be TS_YES with no recheck flag, potentially resulting in wrong query outputs. The case where this'd happen is if there were GIN_MAYBE entries in the indexscan results passed to gin_tsquery_[tri]consistent, which so far as I can see would only happen if the tidbitmap used to accumulate indexscan results grew large enough to become lossy. I initially thought of fixing this by ensuring we always set the recheck flag as well as returning TS_MAYBE in uncertain cases. But that errs in the other direction, potentially forcing rechecks of rows that provably match the query (since the recheck flag remains set even if TS_execute later finds that the answer must be TS_YES). Instead, let's get rid of the out-of-band recheck flag altogether and rely on returning TS_MAYBE. This requires exporting a version of TS_execute that will actually return the full ternary result of the evaluation ... but we likely should have done that to start with. Unfortunately it doesn't seem practical to add a regression test case that covers this: the amount of data needed to cause the GIN bitmap to become lossy results in a longer runtime than I think we want to have in the tests. (I'm wondering about allowing smaller work_mem settings to ameliorate that, but it'd be a matter for a separate patch.) Per bug #16865 from Dimitri Nüscheler. Back-patch to v13 where the faulty commit came in. Discussion: https://postgr.es/m/16865-4ffdc3e682e6d75b@postgresql.org
1 parent f672df5 commit 38bb3ae

File tree

3 files changed

+44
-28
lines changed

3 files changed

+44
-28
lines changed

src/backend/utils/adt/tsginidx.c

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,6 @@ typedef struct
175175
QueryItem *first_item;
176176
GinTernaryValue *check;
177177
int *map_item_operand;
178-
bool *need_recheck;
179178
} GinChkVal;
180179

181180
/*
@@ -186,33 +185,30 @@ checkcondition_gin(void *checkval, QueryOperand *val, ExecPhraseData *data)
186185
{
187186
GinChkVal *gcv = (GinChkVal *) checkval;
188187
int j;
189-
190-
/*
191-
* if any val requiring a weight is used or caller needs position
192-
* information then set recheck flag
193-
*/
194-
if (val->weight != 0 || data != NULL)
195-
*(gcv->need_recheck) = true;
188+
GinTernaryValue result;
196189

197190
/* convert item's number to corresponding entry's (operand's) number */
198191
j = gcv->map_item_operand[((QueryItem *) val) - gcv->first_item];
199192

193+
/* determine presence of current entry in indexed value */
194+
result = gcv->check[j];
195+
200196
/*
201-
* return presence of current entry in indexed value; but TRUE becomes
202-
* MAYBE in the presence of a query requiring recheck
197+
* If any val requiring a weight is used or caller needs position
198+
* information then we must recheck, so replace TRUE with MAYBE.
203199
*/
204-
if (gcv->check[j] == GIN_TRUE)
200+
if (result == GIN_TRUE)
205201
{
206202
if (val->weight != 0 || data != NULL)
207-
return TS_MAYBE;
203+
result = GIN_MAYBE;
208204
}
209205

210206
/*
211207
* We rely on GinTernaryValue and TSTernaryValue using equivalent value
212208
* assignments. We could use a switch statement to map the values if that
213209
* ever stops being true, but it seems unlikely to happen.
214210
*/
215-
return (TSTernaryValue) gcv->check[j];
211+
return (TSTernaryValue) result;
216212
}
217213

218214
Datum
@@ -244,12 +240,23 @@ gin_tsquery_consistent(PG_FUNCTION_ARGS)
244240
"sizes of GinTernaryValue and bool are not equal");
245241
gcv.check = (GinTernaryValue *) check;
246242
gcv.map_item_operand = (int *) (extra_data[0]);
247-
gcv.need_recheck = recheck;
248243

249-
res = TS_execute(GETQUERY(query),
250-
&gcv,
251-
TS_EXEC_PHRASE_NO_POS,
252-
checkcondition_gin);
244+
switch (TS_execute_ternary(GETQUERY(query),
245+
&gcv,
246+
TS_EXEC_PHRASE_NO_POS,
247+
checkcondition_gin))
248+
{
249+
case TS_NO:
250+
res = false;
251+
break;
252+
case TS_YES:
253+
res = true;
254+
break;
255+
case TS_MAYBE:
256+
res = true;
257+
*recheck = true;
258+
break;
259+
}
253260
}
254261

255262
PG_RETURN_BOOL(res);
@@ -266,10 +273,6 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
266273
/* int32 nkeys = PG_GETARG_INT32(3); */
267274
Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4);
268275
GinTernaryValue res = GIN_FALSE;
269-
bool recheck;
270-
271-
/* Initially assume query doesn't require recheck */
272-
recheck = false;
273276

274277
if (query->size > 0)
275278
{
@@ -282,13 +285,11 @@ gin_tsquery_triconsistent(PG_FUNCTION_ARGS)
282285
gcv.first_item = GETQUERY(query);
283286
gcv.check = check;
284287
gcv.map_item_operand = (int *) (extra_data[0]);
285-
gcv.need_recheck = &recheck;
286288

287-
if (TS_execute(GETQUERY(query),
288-
&gcv,
289-
TS_EXEC_PHRASE_NO_POS,
290-
checkcondition_gin))
291-
res = recheck ? GIN_MAYBE : GIN_TRUE;
289+
res = TS_execute_ternary(GETQUERY(query),
290+
&gcv,
291+
TS_EXEC_PHRASE_NO_POS,
292+
checkcondition_gin);
292293
}
293294

294295
PG_RETURN_GIN_TERNARY_VALUE(res);

src/backend/utils/adt/tsvector_op.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1854,6 +1854,18 @@ TS_execute(QueryItem *curitem, void *arg, uint32 flags,
18541854
return TS_execute_recurse(curitem, arg, flags, chkcond) != TS_NO;
18551855
}
18561856

1857+
/*
1858+
* Evaluate tsquery boolean expression.
1859+
*
1860+
* This is the same as TS_execute except that TS_MAYBE is returned as-is.
1861+
*/
1862+
TSTernaryValue
1863+
TS_execute_ternary(QueryItem *curitem, void *arg, uint32 flags,
1864+
TSExecuteCallback chkcond)
1865+
{
1866+
return TS_execute_recurse(curitem, arg, flags, chkcond);
1867+
}
1868+
18571869
/*
18581870
* TS_execute recursion for operators above any phrase operator. Here we do
18591871
* not need to worry about lexeme positions. As soon as we hit an OP_PHRASE

src/include/tsearch/ts_utils.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,9 @@ typedef TSTernaryValue (*TSExecuteCallback) (void *arg, QueryOperand *val,
199199

200200
extern bool TS_execute(QueryItem *curitem, void *arg, uint32 flags,
201201
TSExecuteCallback chkcond);
202+
extern TSTernaryValue TS_execute_ternary(QueryItem *curitem, void *arg,
203+
uint32 flags,
204+
TSExecuteCallback chkcond);
202205
extern bool tsquery_requires_match(QueryItem *curitem);
203206

204207
/*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy