Skip to content

Commit eb08605

Browse files
committed
Make websearch_to_tsquery() parse text in quotes as a single token
websearch_to_tsquery() splits text in quotes into tokens and connects them with phrase operator on its own. However, that leads to surprising results when the token contains no words. For instance, websearch_to_tsquery('"aaa: bbb"') is 'aaa <2> bbb', because it is equivalent of to_tsquery(E'aaa <-> \':\' <-> bbb'). But websearch_to_tsquery('"aaa: bbb"') has to be 'aaa <-> bbb' in order to match to_tsvector('aaa: bbb'). Since 0c4f355, we anyway connect lexemes of complex tokens with phrase operators. Thus, let's just websearch_to_tsquery() parse text in quotes as a single token. Therefore, websearch_to_tsquery() should process the quoted text in the same way phraseto_tsquery() does. This solution is what we exactly need and also simplifies the code. This commit is an incompatible change, so we don't backpatch it. Reported-by: Valentin Gatien-Baron Discussion: https://postgr.es/m/CA%2B0DEqiZs7gdOd4ikmg%3D0UWG%2BSwWOLxPsk_JW-sx9WNOyrb0KQ%40mail.gmail.com Author: Alexander Korotkov Reviewed-by: Tom Lane, Zhihong Yu
1 parent 651d005 commit eb08605

File tree

3 files changed

+39
-67
lines changed

3 files changed

+39
-67
lines changed

src/backend/utils/adt/tsquery.c

Lines changed: 23 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,6 @@ struct TSQueryParserStateData
7777
char *buf; /* current scan point */
7878
int count; /* nesting count, incremented by (,
7979
* decremented by ) */
80-
bool in_quotes; /* phrase in quotes "" */
8180
ts_parserstate state;
8281

8382
/* polish (prefix) notation in list, filled in by push* functions */
@@ -235,9 +234,6 @@ parse_or_operator(TSQueryParserState pstate)
235234
{
236235
char *ptr = pstate->buf;
237236

238-
if (pstate->in_quotes)
239-
return false;
240-
241237
/* it should begin with "OR" literal */
242238
if (pg_strncasecmp(ptr, "or", 2) != 0)
243239
return false;
@@ -398,38 +394,29 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
398394
state->buf++;
399395
state->state = WAITOPERAND;
400396

401-
if (state->in_quotes)
402-
continue;
403-
404397
*operator = OP_NOT;
405398
return PT_OPR;
406399
}
407400
else if (t_iseq(state->buf, '"'))
408401
{
402+
/* Everything in quotes is processed as a single token */
403+
404+
/* skip opening quote */
409405
state->buf++;
406+
*strval = state->buf;
410407

411-
if (!state->in_quotes)
412-
{
413-
state->state = WAITOPERAND;
408+
/* iterate to the closing quote or end of the string */
409+
while (*state->buf != '\0' && !t_iseq(state->buf, '"'))
410+
state->buf++;
411+
*lenval = state->buf - *strval;
414412

415-
if (strchr(state->buf, '"'))
416-
{
417-
/* quoted text should be ordered <-> */
418-
state->in_quotes = true;
419-
return PT_OPEN;
420-
}
413+
/* skip closing quote if not end of the string */
414+
if (*state->buf != '\0')
415+
state->buf++;
421416

422-
/* web search tolerates missing quotes */
423-
continue;
424-
}
425-
else
426-
{
427-
/* we have to provide an operand */
428-
state->in_quotes = false;
429-
state->state = WAITOPERATOR;
430-
pushStop(state);
431-
return PT_CLOSE;
432-
}
417+
state->state = WAITOPERATOR;
418+
state->count++;
419+
return PT_VAL;
433420
}
434421
else if (ISOPERATOR(state->buf))
435422
{
@@ -467,24 +454,13 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
467454
case WAITOPERATOR:
468455
if (t_iseq(state->buf, '"'))
469456
{
470-
if (!state->in_quotes)
471-
{
472-
/*
473-
* put implicit AND after an operand and handle this
474-
* quote in WAITOPERAND
475-
*/
476-
state->state = WAITOPERAND;
477-
*operator = OP_AND;
478-
return PT_OPR;
479-
}
480-
else
481-
{
482-
state->buf++;
483-
484-
/* just close quotes */
485-
state->in_quotes = false;
486-
return PT_CLOSE;
487-
}
457+
/*
458+
* put implicit AND after an operand and handle this quote
459+
* in WAITOPERAND
460+
*/
461+
state->state = WAITOPERAND;
462+
*operator = OP_AND;
463+
return PT_OPR;
488464
}
489465
else if (parse_or_operator(state))
490466
{
@@ -498,18 +474,8 @@ gettoken_query_websearch(TSQueryParserState state, int8 *operator,
498474
}
499475
else if (!t_isspace(state->buf))
500476
{
501-
if (state->in_quotes)
502-
{
503-
/* put implicit <-> after an operand */
504-
*operator = OP_PHRASE;
505-
*weight = 1;
506-
}
507-
else
508-
{
509-
/* put implicit AND after an operand */
510-
*operator = OP_AND;
511-
}
512-
477+
/* put implicit AND after an operand */
478+
*operator = OP_AND;
513479
state->state = WAITOPERAND;
514480
return PT_OPR;
515481
}
@@ -846,7 +812,6 @@ parse_tsquery(char *buf,
846812
state.buffer = buf;
847813
state.buf = buf;
848814
state.count = 0;
849-
state.in_quotes = false;
850815
state.state = WAITFIRSTOPERAND;
851816
state.polstr = NIL;
852817

src/test/regress/expected/tsearch.out

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2678,9 +2678,9 @@ select websearch_to_tsquery('simple', 'abc OR_abc');
26782678

26792679
-- test quotes
26802680
select websearch_to_tsquery('english', '"pg_class pg');
2681-
websearch_to_tsquery
2682-
-------------------------
2683-
'pg' <-> 'class' & 'pg'
2681+
websearch_to_tsquery
2682+
---------------------------
2683+
'pg' <-> 'class' <-> 'pg'
26842684
(1 row)
26852685

26862686
select websearch_to_tsquery('english', 'pg_class pg"');
@@ -2695,6 +2695,12 @@ select websearch_to_tsquery('english', '"pg_class pg"');
26952695
'pg' <-> 'class' <-> 'pg'
26962696
(1 row)
26972697

2698+
select websearch_to_tsquery('english', '"pg_class : pg"');
2699+
websearch_to_tsquery
2700+
---------------------------
2701+
'pg' <-> 'class' <-> 'pg'
2702+
(1 row)
2703+
26982704
select websearch_to_tsquery('english', 'abc "pg_class pg"');
26992705
websearch_to_tsquery
27002706
-----------------------------------
@@ -2708,15 +2714,15 @@ select websearch_to_tsquery('english', '"pg_class pg" def');
27082714
(1 row)
27092715

27102716
select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');
2711-
websearch_to_tsquery
2712-
--------------------------------------------------------
2713-
'abc' & 'pg' <-> ( 'pg' <-> 'class' ) <-> 'pg' & 'def'
2717+
websearch_to_tsquery
2718+
----------------------------------------------------
2719+
'abc' & 'pg' <-> 'pg' <-> 'class' <-> 'pg' & 'def'
27142720
(1 row)
27152721

27162722
select websearch_to_tsquery('english', ' or "pg pg_class pg" or ');
2717-
websearch_to_tsquery
2718-
----------------------------------------
2719-
'pg' <-> ( 'pg' <-> 'class' ) <-> 'pg'
2723+
websearch_to_tsquery
2724+
------------------------------------
2725+
'pg' <-> 'pg' <-> 'class' <-> 'pg'
27202726
(1 row)
27212727

27222728
select websearch_to_tsquery('english', '""pg pg_class pg""');

src/test/regress/sql/tsearch.sql

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -759,6 +759,7 @@ select websearch_to_tsquery('simple', 'abc OR_abc');
759759
select websearch_to_tsquery('english', '"pg_class pg');
760760
select websearch_to_tsquery('english', 'pg_class pg"');
761761
select websearch_to_tsquery('english', '"pg_class pg"');
762+
select websearch_to_tsquery('english', '"pg_class : pg"');
762763
select websearch_to_tsquery('english', 'abc "pg_class pg"');
763764
select websearch_to_tsquery('english', '"pg_class pg" def');
764765
select websearch_to_tsquery('english', 'abc "pg pg_class pg" def');

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy