Skip to content

Commit dcabf52

Browse files
feodordmpgpro
authored andcommitted
Change predecence of phrase operator.
<-> operator now have higher predecence than & (AND) operator. This change was motivated by unexpected difference of similar queries: 'a & b <-> c'::tsquery and 'b <-> c & a'. Before first query means (a & b) <-> c and second one - '(b <-> c) & a', now phrase operator evaluates first. Per suggestion from Tom Lane 32260.1465402409@sss.pgh.pa.us Conflicts: src/backend/utils/adt/tsquery.c src/include/tsearch/ts_type.h
1 parent 4eecbb0 commit dcabf52

File tree

6 files changed

+130
-143
lines changed

6 files changed

+130
-143
lines changed

src/backend/utils/adt/tsquery.c

Lines changed: 61 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,10 @@
2727
/* FTS operator priorities, see ts_type.h */
2828
const int tsearch_op_priority[OP_COUNT] =
2929
{
30-
3, /* OP_NOT */
31-
2, /* OP_AND */
32-
1, /* OP_OR */
33-
4 /* OP_PHRASE */
30+
4, /* OP_NOT */
31+
2, /* OP_AND */
32+
1, /* OP_OR */
33+
3 /* OP_PHRASE */
3434
};
3535

3636
struct TSQueryParserStateData
@@ -431,6 +431,40 @@ pushStop(TSQueryParserState state)
431431

432432
#define STACKDEPTH 32
433433

434+
typedef struct OperatorElement {
435+
int8 op;
436+
int16 distance;
437+
} OperatorElement;
438+
439+
static void
440+
pushOpStack(OperatorElement *stack, int *lenstack, int8 op, int16 distance)
441+
{
442+
if (*lenstack == STACKDEPTH) /* internal error */
443+
elog(ERROR, "tsquery stack too small");
444+
445+
stack[*lenstack].op = op;
446+
stack[*lenstack].distance = distance;
447+
448+
(*lenstack)++;
449+
}
450+
451+
static void
452+
cleanOpStack(TSQueryParserState state,
453+
OperatorElement *stack, int *lenstack, int8 op)
454+
{
455+
int opPriority = OP_PRIORITY(op);
456+
457+
while(*lenstack)
458+
{
459+
if (opPriority > OP_PRIORITY(stack[*lenstack - 1].op))
460+
break;
461+
462+
(*lenstack)--;
463+
pushOperator(state, stack[*lenstack].op,
464+
stack[*lenstack].distance);
465+
}
466+
}
467+
434468
/*
435469
* Make polish (prefix) notation of query.
436470
*
@@ -441,18 +475,14 @@ makepol(TSQueryParserState state,
441475
PushFunction pushval,
442476
Datum opaque)
443477
{
444-
int8 operator = 0;
445-
ts_tokentype type;
446-
int lenval = 0;
447-
char *strval = NULL;
448-
struct
449-
{
450-
int8 op;
451-
int16 distance;
452-
} opstack[STACKDEPTH];
453-
int lenstack = 0;
454-
int16 weight = 0;
455-
bool prefix;
478+
int8 operator = 0;
479+
ts_tokentype type;
480+
int lenval = 0;
481+
char *strval = NULL;
482+
OperatorElement opstack[STACKDEPTH];
483+
int lenstack = 0;
484+
int16 weight = 0;
485+
bool prefix;
456486

457487
/* since this function recurses, it could be driven to stack overflow */
458488
check_stack_depth();
@@ -463,49 +493,16 @@ makepol(TSQueryParserState state,
463493
{
464494
case PT_VAL:
465495
pushval(opaque, state, strval, lenval, weight, prefix);
466-
while (lenstack && (opstack[lenstack - 1].op == OP_AND ||
467-
opstack[lenstack - 1].op == OP_PHRASE ||
468-
opstack[lenstack - 1].op == OP_NOT))
469-
{
470-
lenstack--;
471-
pushOperator(state,
472-
opstack[lenstack].op,
473-
opstack[lenstack].distance);
474-
}
475496
break;
476497
case PT_OPR:
477-
if (lenstack && operator == OP_OR)
478-
pushOperator(state, OP_OR, 0);
479-
else
480-
{
481-
if (lenstack == STACKDEPTH) /* internal error */
482-
elog(ERROR, "tsquery stack too small");
483-
opstack[lenstack].op = operator;
484-
opstack[lenstack].distance = weight;
485-
lenstack++;
486-
}
498+
cleanOpStack(state, opstack, &lenstack, operator);
499+
pushOpStack(opstack, &lenstack, operator, weight);
487500
break;
488501
case PT_OPEN:
489502
makepol(state, pushval, opaque);
490-
491-
while (lenstack && (opstack[lenstack - 1].op == OP_AND ||
492-
opstack[lenstack - 1].op == OP_PHRASE ||
493-
opstack[lenstack - 1].op == OP_NOT))
494-
{
495-
lenstack--;
496-
pushOperator(state,
497-
opstack[lenstack].op,
498-
opstack[lenstack].distance);
499-
}
500503
break;
501504
case PT_CLOSE:
502-
while (lenstack)
503-
{
504-
lenstack--;
505-
pushOperator(state,
506-
opstack[lenstack].op,
507-
opstack[lenstack].distance);
508-
};
505+
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */);
509506
return;
510507
case PT_ERR:
511508
default:
@@ -515,13 +512,8 @@ makepol(TSQueryParserState state,
515512
state->buffer)));
516513
}
517514
}
518-
while (lenstack)
519-
{
520-
lenstack--;
521-
pushOperator(state,
522-
opstack[lenstack].op,
523-
opstack[lenstack].distance);
524-
}
515+
516+
cleanOpStack(state, opstack, &lenstack, OP_OR /* lowest */);
525517
}
526518

527519
static void
@@ -751,7 +743,7 @@ while( ( (inf)->cur - (inf)->buf ) + (addsize) + 1 >= (inf)->buflen ) \
751743
* print it in infix (human-readable) form
752744
*/
753745
static void
754-
infix(INFIX *in, int parentPriority)
746+
infix(INFIX *in, int parentPriority, bool rightPhraseOp)
755747
{
756748
/* since this function recurses, it could be driven to stack overflow. */
757749
check_stack_depth();
@@ -820,7 +812,7 @@ infix(INFIX *in, int parentPriority)
820812
}
821813
else if (in->curpol->qoperator.oper == OP_NOT)
822814
{
823-
int priority = PRINT_PRIORITY(in->curpol);
815+
int priority = QO_PRIORITY(in->curpol);
824816

825817
if (priority < parentPriority)
826818
{
@@ -834,7 +826,7 @@ infix(INFIX *in, int parentPriority)
834826
*(in->cur) = '\0';
835827
in->curpol++;
836828

837-
infix(in, priority);
829+
infix(in, priority, false);
838830
if (priority < parentPriority)
839831
{
840832
RESIZEBUF(in, 2);
@@ -845,16 +837,15 @@ infix(INFIX *in, int parentPriority)
845837
else
846838
{
847839
int8 op = in->curpol->qoperator.oper;
848-
int priority = PRINT_PRIORITY(in->curpol);
840+
int priority = QO_PRIORITY(in->curpol);
849841
int16 distance = in->curpol->qoperator.distance;
850842
INFIX nrm;
851843
bool needParenthesis = false;
852844

853845
in->curpol++;
854846
if (priority < parentPriority ||
855-
(op == OP_PHRASE &&
856-
(priority == parentPriority || /* phrases are not commutative! */
857-
parentPriority == OP_PRIORITY(OP_AND))))
847+
/* phrase operator depends on order */
848+
(op == OP_PHRASE && rightPhraseOp))
858849
{
859850
needParenthesis = true;
860851
RESIZEBUF(in, 2);
@@ -868,11 +859,11 @@ infix(INFIX *in, int parentPriority)
868859
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
869860

870861
/* get right operand */
871-
infix(&nrm, priority);
862+
infix(&nrm, priority, (op == OP_PHRASE));
872863

873864
/* get & print left operand */
874865
in->curpol = nrm.curpol;
875-
infix(in, priority);
866+
infix(in, priority, false);
876867

877868
/* print operator & right operand */
878869
RESIZEBUF(in, 3 + (2 + 10 /* distance */) + (nrm.cur - nrm.buf));
@@ -924,7 +915,7 @@ tsqueryout(PG_FUNCTION_ARGS)
924915
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
925916
*(nrm.cur) = '\0';
926917
nrm.op = GETOPERAND(query);
927-
infix(&nrm, -1 /* lowest priority */);
918+
infix(&nrm, -1 /* lowest priority */, false);
928919

929920
PG_FREE_IF_COPY(query, 0);
930921
PG_RETURN_CSTRING(nrm.buf);
@@ -1151,7 +1142,7 @@ tsquerytree(PG_FUNCTION_ARGS)
11511142
nrm.cur = nrm.buf = (char *) palloc(sizeof(char) * nrm.buflen);
11521143
*(nrm.cur) = '\0';
11531144
nrm.op = GETOPERAND(query);
1154-
infix(&nrm, true);
1145+
infix(&nrm, -1, false);
11551146
res = cstring_to_text_with_len(nrm.buf, nrm.cur - nrm.buf);
11561147
pfree(q);
11571148
}

src/backend/utils/adt/tsquery_cleanup.c

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,11 +26,18 @@ typedef struct NODE
2626
QueryItem *valnode;
2727
} NODE;
2828

29-
/* Non-operator nodes have fake (but highest) priority */
29+
/*
30+
* To simplify walking on query tree and pushing down of phrase operator
31+
* we define some fake priority here: phrase operator has highest priority
32+
* of any other operators (and we believe here that OP_PHRASE is a highest
33+
* code of operations) and value node has ever highest priority.
34+
* Priority values of other operations don't matter until they are less than
35+
* phrase operator and value node.
36+
*/
37+
#define VALUE_PRIORITY (OP_COUNT + 1)
3038
#define NODE_PRIORITY(x) \
3139
( ((x)->valnode->qoperator.type == QI_OPR) ? \
32-
QO_PRIORITY((x)->valnode) : \
33-
TOP_PRIORITY )
40+
(x)->valnode->qoperator.oper : VALUE_PRIORITY )
3441

3542
/*
3643
* make query tree from plain view of query
@@ -413,6 +420,10 @@ normalize_phrase_tree(NODE *node)
413420
node->left = normalize_phrase_tree(node->left);
414421
node->right = normalize_phrase_tree(node->right);
415422

423+
/*
424+
* if subtree contains only nodes with higher "priority" then
425+
* we are done. See comment near NODE_PRIORITY()
426+
*/
416427
if (NODE_PRIORITY(node) <= NODE_PRIORITY(node->right) &&
417428
NODE_PRIORITY(node) <= NODE_PRIORITY(node->left))
418429
return node;

src/include/tsearch/ts_type.h

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -211,34 +211,19 @@ typedef struct
211211

212212
/*
213213
* Legal values for QueryOperator.operator.
214-
* They should be ordered by priority! We assume that phrase
215-
* has highest priority, but this agreement is only
216-
* for query transformation! That's need to simplify
217-
* algorithm of query transformation.
218214
*/
219215
#define OP_NOT 1
220216
#define OP_AND 2
221217
#define OP_OR 3
222-
#define OP_PHRASE 4
218+
#define OP_PHRASE 4 /* highest code, tsquery_cleanup.c */
223219
#define OP_COUNT 4
224220

225221
extern const int tsearch_op_priority[OP_COUNT];
226222

227-
#define NOT_PHRASE_P 5 /*
228-
* OP_PHRASE negation operations must have greater
229-
* priority in order to force infix() to surround
230-
* the whole OP_PHRASE expression with parentheses.
231-
*/
232-
233-
#define TOP_PRIORITY 6 /* highest priority for val nodes */
234-
235223
/* get operation priority by its code*/
236224
#define OP_PRIORITY(x) ( tsearch_op_priority[(x) - 1] )
237225
/* get QueryOperator priority */
238226
#define QO_PRIORITY(x) OP_PRIORITY(((QueryOperator *) (x))->oper)
239-
/* special case: get QueryOperator priority for correct printing !(a <-> b>) */
240-
#define PRINT_PRIORITY(x) \
241-
( (((QueryOperator *) (x))->oper == OP_NOT) ? NOT_PHRASE_P : QO_PRIORITY(x) )
242227

243228
typedef struct
244229
{

src/test/regress/expected/tsdicts.out

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -470,15 +470,15 @@ SELECT to_tsquery('hunspell_tst', 'footballyklubber:b & rebookings:A & sky');
470470
(1 row)
471471

472472
SELECT to_tsquery('hunspell_tst', 'footballyklubber:b <-> sky');
473-
to_tsquery
474-
-----------------------------------------------------------------------------
475-
( 'foot':B <-> 'sky' ) & ( 'ball':B <-> 'sky' ) & ( 'klubber':B <-> 'sky' )
473+
to_tsquery
474+
-----------------------------------------------------------------
475+
'foot':B <-> 'sky' & 'ball':B <-> 'sky' & 'klubber':B <-> 'sky'
476476
(1 row)
477477

478478
SELECT phraseto_tsquery('hunspell_tst', 'footballyklubber sky');
479-
phraseto_tsquery
480-
-----------------------------------------------------------------------
481-
( 'foot' <-> 'sky' ) & ( 'ball' <-> 'sky' ) & ( 'klubber' <-> 'sky' )
479+
phraseto_tsquery
480+
-----------------------------------------------------------
481+
'foot' <-> 'sky' & 'ball' <-> 'sky' & 'klubber' <-> 'sky'
482482
(1 row)
483483

484484
-- Test ispell dictionary with hunspell affix with FLAG long in configuration

src/test/regress/expected/tsearch.out

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -772,9 +772,9 @@ SELECT to_tsquery('english', 'foo <-> a <-> the <-> bar');
772772
(1 row)
773773

774774
SELECT phraseto_tsquery('english', 'PostgreSQL can be extended by the user in many ways');
775-
phraseto_tsquery
776-
-----------------------------------------------------------------------
777-
( ( ( 'postgresql' <3> 'extend' ) <3> 'user' ) <2> 'mani' ) <-> 'way'
775+
phraseto_tsquery
776+
-----------------------------------------------------------
777+
'postgresql' <3> 'extend' <3> 'user' <2> 'mani' <-> 'way'
778778
(1 row)
779779

780780
SELECT ts_rank_cd(to_tsvector('english', '
@@ -1209,15 +1209,15 @@ SELECT ts_rewrite('1 & (2 <-> 3)', 'SELECT keyword, sample FROM test_tsquery'::t
12091209
(1 row)
12101210

12111211
SELECT ts_rewrite('1 & (2 <2> 3)', 'SELECT keyword, sample FROM test_tsquery'::text );
1212-
ts_rewrite
1213-
-----------------------
1214-
'1' & ( '2' <2> '3' )
1212+
ts_rewrite
1213+
-------------------
1214+
'1' & '2' <2> '3'
12151215
(1 row)
12161216

12171217
SELECT ts_rewrite('5 <-> (1 & (2 <-> 3))', 'SELECT keyword, sample FROM test_tsquery'::text );
1218-
ts_rewrite
1219-
-----------------------------------------------
1220-
( '5' <-> '1' ) & ( '5' <-> ( '2' <-> '3' ) )
1218+
ts_rewrite
1219+
---------------------------------------
1220+
'5' <-> '1' & '5' <-> ( '2' <-> '3' )
12211221
(1 row)
12221222

12231223
SELECT ts_rewrite('5 <-> (6 | 8)', 'SELECT keyword, sample FROM test_tsquery'::text );

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy