Skip to content

Commit 6734a1c

Browse files
committed
Change predecence of phrase operator.
<-> operator now have higher predecence than & (AND) operator. This change was motivated by unexpected difference of similar queries: 'a & b <-> c'::tsquery and 'b <-> c & a'. Before first query means (a & b) <-> c and second one - '(b <-> c) & a', now phrase operator evaluates first. Per suggestion from Tom Lane 32260.1465402409@sss.pgh.pa.us
1 parent 3dbbd0f commit 6734a1c

File tree

6 files changed

+121
-134
lines changed

6 files changed

+121
-134
lines changed

src/backend/utils/adt/tsquery.c

Lines changed: 52 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,10 @@
2626
/* FTS operator priorities, see ts_type.h */
2727
const int tsearch_op_priority[OP_COUNT] =
2828
{
29-
3, /* OP_NOT */
29+
4, /* OP_NOT */
3030
2, /* OP_AND */
3131
1, /* OP_OR */
32-
4 /* OP_PHRASE */
32+
3 /* OP_PHRASE */
3333
};
3434

3535
struct TSQueryParserStateData
@@ -430,6 +430,40 @@ pushStop(TSQueryParserState state)
430430

431431
#define STACKDEPTH 32
432432

433+
typedef struct OperatorElement {
434+
int8 op;
435+
int16 distance;
436+
} OperatorElement;
437+
438+
static void
439+
pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance)
440+
{
441+
if (*lenstack == STACKDEPTH) /* internal error */
442+
elog(ERROR, "tsquery stack too small");
443+
444+
stack[*lenstack].op = op;
445+
stack[*lenstack].distance = distance;
446+
447+
(*lenstack)++;
448+
}
449+
450+
static void
451+
cleanOpStack(TSQueryParserState state,
452+
OperatorElement *stack, int *lenstack, int8 op)
453+
{
454+
int opPriority = OP_PRIORITY(op);
455+
456+
while(*lenstack)
457+
{
458+
if (opPriority > OP_PRIORITY(stack[*lenstack - 1].op))
459+
break;
460+
461+
(*lenstack)--;
462+
pushOperator(state, stack[*lenstack].op,
463+
stack[*lenstack].distance);
464+
}
465+
}
466+
433467
/*
434468
* Make polish (prefix) notation of query.
435469
*
@@ -444,11 +478,7 @@ makepol(TSQueryParserState state,
444478
ts_tokentype type;
445479
int lenval = 0;
446480
char *strval = NULL;
447-
struct
448-
{
449-
int8 op;
450-
int16 distance;
451-
} opstack[STACKDEPTH];
481+
OperatorElement opstack[STACKDEPTH];
452482
int lenstack = 0;
453483
int16 weight = 0;
454484
bool prefix;
@@ -462,49 +492,16 @@ makepol(TSQueryParserState state,
462492
{
463493
case PT_VAL:
464494
pushval(opaque, state, strval, lenval, weight, prefix);
465-
while (lenstack && (opstack[lenstack - 1].op == OP_AND ||
466-
opstack[lenstack - 1].op == OP_PHRASE ||
467-
opstack[lenstack - 1].op == OP_NOT))
468-
{
469-
lenstack--;
470-
pushOperator(state,
471-
opstack[lenstack].op,
472-
opstack[lenstack].distance);
473-
}
474495
break;
475496
case PT_OPR:
476-
if (lenstack && operator == OP_OR)
477-
pushOperator(state, OP_OR, 0);
478-
else
479-
{
480-
if (lenstack == STACKDEPTH) /* internal error */
481-
elog(ERROR, "tsquery stack too small");
482-
opstack[lenstack].op = operator;
483-
opstack[lenstack].distance = weight;
484-
lenstack++;
485-
}
497+
cleanOpStack(state, opstack, &lenstack, operator);
498+
pushOpStack(opstack, &lenstack, operator, weight);
486499
break;
487500
case PT_OPEN:
488501
makepol(state, pushval, opaque);
489-
490-
while (lenstack && (opstack[lenstack - 1].op == OP_AND ||
491-
opstack[lenstack - 1].op == OP_PHRASE ||
492-
opstack[lenstack - 1].op == OP_NOT))
493-
{
494-
lenstack--;
495-
pushOperator(state,
496-
opstack[lenstack].op,
497-
opstack[lenstack].distance);
498-
}
499502
break;
500503
case PT_CLOSE:
501-
while (lenstack)
502-
{
503-
lenstack--;
504-
pushOperator(state,
505-
opstack[lenstack].op,
506-
opstack[lenstack].distance);
507-
};
504+
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */);
508505
return;
509506
case PT_ERR:
510507
default:
@@ -514,13 +511,8 @@ makepol(TSQueryParserState state,
514511
state->buffer)));
515512
}
516513
}
517-
while (lenstack)
518-
{
519-
lenstack--;
520-
pushOperator(state,
521-
opstack[lenstack].op,
522-
opstack[lenstack].distance);
523-
}
514+
515+
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */);
524516
}
525517

526518
static void
@@ -750,7 +742,7 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
750742
* print it in infix (human-readable) form
751743
*/
752744
static void
753-
infix(INFIX *in, int parentPriority)
745+
infix(INFIX *in, int parentPriority, bool rightPhraseOp)
754746
{
755747
/* since this function recurses, it could be driven to stack overflow. */
756748
check_stack_depth();
@@ -819,7 +811,7 @@ infix(INFIX *in, int parentPriority)
819811
}
820812
else if (in->curpol->qoperator.oper == OP_NOT)
821813
{
822-
int priority = PRINT_PRIORITY(in->curpol);
814+
int priority = QO_PRIORITY(in->curpol);
823815

824816
if (priority < parentPriority)
825817
{
@@ -833,7 +825,7 @@ infix(INFIX *in, int parentPriority)
833825
*(in->cur) = '\0';
834826
in->curpol++;
835827

836-
infix(in, priority);
828+
infix(in, priority, false);
837829
if (priority < parentPriority)
838830
{
839831
RESIZEBUF(in, 2);
@@ -844,17 +836,15 @@ infix(INFIX *in, int parentPriority)
844836
else
845837
{
846838
int8 op = in->curpol->qoperator.oper;
847-
int priority = PRINT_PRIORITY(in->curpol);
839+
int priority = QO_PRIORITY(in->curpol);
848840
int16 distance = in->curpol->qoperator.distance;
849841
INFIX nrm;
850842
bool needParenthesis = false;
851843

852844
in->curpol++;
853845
if (priority < parentPriority ||
854-
(op == OP_PHRASE &&
855-
(priority == parentPriority || /* phrases are not
856-
* commutative! */
857-
parentPriority == OP_PRIORITY(OP_AND))))
846+
/* phrase operator depends on order */
847+
(op == OP_PHRASE && rightPhraseOp))
858848
{
859849
needParenthesis = true;
860850
RESIZEBUF(in, 2);
@@ -868,11 +858,11 @@ infix(INFIX *in, int parentPriority)
868858
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
869859

870860
/* get right operand */
871-
infix(&nrm, priority);
861+
infix(&nrm, priority, (op == OP_PHRASE));
872862

873863
/* get & print left operand */
874864
in->curpol = nrm.curpol;
875-
infix(in, priority);
865+
infix(in, priority, false);
876866

877867
/* print operator & right operand */
878868
RESIZEBUF(in, 3 + (2 + 10 /* distance */ ) + (nrm.cur - nrm.buf));
@@ -924,7 +914,7 @@ tsqueryout(PG_FUNCTION_ARGS)
924914
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
925915
*(nrm.cur) = '\0';
926916
nrm.op = GETOPERAND(query);
927-
infix(&nrm, -1 /* lowest priority */ );
917+
infix(&nrm, -1 /* lowest priority */, false);
928918

929919
PG_FREE_IF_COPY(query, 0);
930920
PG_RETURN_CSTRING(nrm.buf);
@@ -1151,7 +1141,7 @@ tsquerytree(PG_FUNCTION_ARGS)
11511141
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
11521142
*(nrm.cur) = '\0';
11531143
nrm.op = GETOPERAND(query);
1154-
infix(&nrm, true);
1144+
infix(&nrm, -1, false);
11551145
res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
11561146
pfree(q);
11571147
}

src/backend/utils/adt/tsquery_cleanup.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,11 +25,18 @@ typedef struct NODE
2525
QueryItem *valnode;
2626
} NODE;
2727

28-
/* Non-operator nodes have fake (but highest) priority */
28+
/*
29+
* To simplify walking on query tree and pushing down of phrase operator
30+
* we define some fake priority here: phrase operator has highest priority
31+
* of any other operators (and we believe here that OP_PHRASE is a highest
32+
* code of operations) and value node has ever highest priority.
33+
* Priority values of other operations don't matter until they are less than
34+
* phrase operator and value node.
35+
*/
36+
#define VALUE_PRIORITY (OP_COUNT + 1)
2937
#define NODE_PRIORITY(x) \
3038
( ((x)->valnode->qoperator.type == QI_OPR) ? \
31-
QO_PRIORITY((x)->valnode) : \
32-
TOP_PRIORITY )
39+
(x)->valnode->qoperator.oper : VALUE_PRIORITY )
3340

3441
/*
3542
* make query tree from plain view of query
@@ -416,6 +423,10 @@ normalize_phrase_tree(NODE *node)
416423
node->left = normalize_phrase_tree(node->left);
417424
node->right = normalize_phrase_tree(node->right);
418425

426+
/*
427+
* if subtree contains only nodes with higher "priority" then
428+
* we are done. See comment near NODE_PRIORITY()
429+
*/
419430
if (NODE_PRIORITY(node) <= NODE_PRIORITY(node->right) &&
420431
NODE_PRIORITY(node) <= NODE_PRIORITY(node->left))
421432
return node;

src/include/tsearch/ts_type.h

Lines changed: 1 addition & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -217,33 +217,19 @@ typedef struct
217217

218218
/*
219219
* Legal values for QueryOperator.operator.
220-
* They should be ordered by priority! We assume that phrase
221-
* has highest priority, but this agreement is only
222-
* for query transformation! That's need to simplify
223-
* algorithm of query transformation.
224220
*/
225221
#define OP_NOT 1
226222
#define OP_AND 2
227223
#define OP_OR 3
228-
#define OP_PHRASE 4
224+
#define OP_PHRASE 4 /* highest code, tsquery_cleanup.c */
229225
#define OP_COUNT 4
230226

231227
extern const int tsearch_op_priority[OP_COUNT];
232228

233-
#define NOT_PHRASE_P 5 /* OP_PHRASE negation operations must have
234-
* greater priority in order to force infix()
235-
* to surround the whole OP_PHRASE expression
236-
* with parentheses. */
237-
238-
#define TOP_PRIORITY 6 /* highest priority for val nodes */
239-
240229
/* get operation priority by its code*/
241230
#define OP_PRIORITY(x) ( tsearch_op_priority[(x) - 1] )
242231
/* get QueryOperator priority */
243232
#define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper)
244-
/* special case: get QueryOperator priority for correct printing !(a <-> b>) */
245-
#define PRINT_PRIORITY(x) \
246-
( (((QueryOperator *) (x))->oper == OP_NOT) ? NOT_PHRASE_P : QO_PRIORITY(x) )
247233

248234
typedef struct
249235
{

src/test/regress/expected/tsdicts.out

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -470,15 +470,15 @@ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
470470
(1 row)
471471

472472
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky');
473-
to_tsquery
474-
-----------------------------------------------------------------------------
475-
( 'foot':B <-> 'sky' ) & ( 'ball':B <-> 'sky' ) & ( 'klubber':B <-> 'sky' )
473+
to_tsquery
474+
-----------------------------------------------------------------
475+
'foot':B <-> 'sky' & 'ball':B <-> 'sky' & 'klubber':B <-> 'sky'
476476
(1 row)
477477

478478
SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky');
479-
phraseto_tsquery
480-
-----------------------------------------------------------------------
481-
( 'foot' <-> 'sky' ) & ( 'ball' <-> 'sky' ) & ( 'klubber' <-> 'sky' )
479+
phraseto_tsquery
480+
-----------------------------------------------------------
481+
'foot' <-> 'sky' & 'ball' <-> 'sky' & 'klubber' <-> 'sky'
482482
(1 row)
483483

484484
-- Test ispell dictionary with hunspell affix with FLAG long in configuration

src/test/regress/expected/tsearch.out

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -778,9 +778,9 @@ SELECT to_tsquery('english', 'foo <-> a <-> the <-> bar');
778778
(1 row)
779779

780780
SELECT phraseto_tsquery('english', 'PostgreSQL can be extended by the user in many ways');
781-
phraseto_tsquery
782-
-----------------------------------------------------------------------
783-
( ( ( 'postgresql' <3> 'extend' ) <3> 'user' ) <2> 'mani' ) <-> 'way'
781+
phraseto_tsquery
782+
-----------------------------------------------------------
783+
'postgresql' <3> 'extend' <3> 'user' <2> 'mani' <-> 'way'
784784
(1 row)
785785

786786
SELECT ts_rank_cd(to_tsvector('english', '
@@ -1215,15 +1215,15 @@ SELECT ts_rewrite('1 & (2 <-> 3)', 'SELECT keyword, sample FROM test_tsquery'::t
12151215
(1 row)
12161216

12171217
SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
1218-
ts_rewrite
1219-
-----------------------
1220-
'1' & ( '2' <2> '3' )
1218+
ts_rewrite
1219+
-------------------
1220+
'1' & '2' <2> '3'
12211221
(1 row)
12221222

12231223
SELECT ts_rewrite('5 <-> (1 & (2 <-> 3))', 'SELECT keyword, sample FROM test_tsquery'::text );
1224-
ts_rewrite
1225-
-----------------------------------------------
1226-
( '5' <-> '1' ) & ( '5' <-> ( '2' <-> '3' ) )
1224+
ts_rewrite
1225+
---------------------------------------
1226+
'5' <-> '1' & '5' <-> ( '2' <-> '3' )
12271227
(1 row)
12281228

12291229
SELECT ts_rewrite('5 <-> (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text );

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy