Skip to content

Commit 21b748e

Browse files
committed
1 Fix problem with lost precision in rank with OR-ed lexemes
2 Allow tsquery_in to input void tsquery: resolve dump/restore problem with tsquery
1 parent fbff2e9 commit 21b748e

File tree

3 files changed

+49
-23
lines changed

3 files changed

+49
-23
lines changed

contrib/tsearch2/expected/tsearch2.out

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -746,21 +746,21 @@ select count(*) FROM test_tsvector WHERE a @@ to_tsquery('copyright');
746746
(1 row)
747747

748748
select rank(' a:1 s:2C d g'::tsvector, 'a | s');
749-
rank
750-
------
751-
0.28
749+
rank
750+
-----------
751+
0.0911891
752752
(1 row)
753753

754754
select rank(' a:1 s:2B d g'::tsvector, 'a | s');
755-
rank
756-
------
757-
0.46
755+
rank
756+
----------
757+
0.151982
758758
(1 row)
759759

760760
select rank(' a:1 s:2 d g'::tsvector, 'a | s');
761-
rank
762-
------
763-
0.19
761+
rank
762+
-----------
763+
0.0607927
764764
(1 row)
765765

766766
select rank(' a:1 s:2C d g'::tsvector, 'a & s');

contrib/tsearch2/query.c

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ Datum to_tsquery_current(PG_FUNCTION_ARGS);
5555
/* parser's states */
5656
#define WAITOPERAND 1
5757
#define WAITOPERATOR 2
58+
#define WAITFIRSTOPERAND 3
5859

5960
/*
6061
* node of query tree, also used
@@ -137,6 +138,7 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
137138
{
138139
switch (state->state)
139140
{
141+
case WAITFIRSTOPERAND:
140142
case WAITOPERAND:
141143
if (*(state->buf) == '!')
142144
{
@@ -159,14 +161,16 @@ gettoken_query(QPRS_STATE * state, int4 *val, int4 *lenval, char **strval, int2
159161
else if (*(state->buf) != ' ')
160162
{
161163
state->valstate.prsbuf = state->buf;
162-
state->state = WAITOPERATOR;
163164
if (gettoken_tsvector(&(state->valstate)))
164165
{
165166
*strval = state->valstate.word;
166167
*lenval = state->valstate.curpos - state->valstate.word;
167168
state->buf = get_weight(state->valstate.prsbuf, weight);
169+
state->state = WAITOPERATOR;
168170
return VAL;
169171
}
172+
else if ( state->state == WAITFIRSTOPERAND )
173+
return END;
170174
else
171175
ereport(ERROR,
172176
(errcode(ERRCODE_SYNTAX_ERROR),
@@ -596,7 +600,7 @@ static QUERYTYPE *
596600

597601
/* init state */
598602
state.buf = buf;
599-
state.state = WAITOPERAND;
603+
state.state = WAITFIRSTOPERAND;
600604
state.count = 0;
601605
state.num = 0;
602606
state.str = NULL;
@@ -616,10 +620,13 @@ static QUERYTYPE *
616620
/* parse query & make polish notation (postfix, but in reverse order) */
617621
makepol(&state, pushval);
618622
pfree(state.valstate.word);
619-
if (!state.num)
620-
ereport(ERROR,
621-
(errcode(ERRCODE_SYNTAX_ERROR),
622-
errmsg("empty query")));
623+
if (!state.num) {
624+
elog(NOTICE, "Query doesn't contain lexem(s)");
625+
query = (QUERYTYPE*)palloc( HDRSIZEQT );
626+
query->len = HDRSIZEQT;
627+
query->size = 0;
628+
return query;
629+
}
623630

624631
/* make finish struct */
625632
commonlen = COMPUTESIZE(state.num, state.sumlen);
@@ -905,6 +912,10 @@ to_tsquery(PG_FUNCTION_ARGS)
905912
PG_FREE_IF_COPY(in, 1);
906913

907914
query = queryin(str, pushval_morph, PG_GETARG_INT32(0));
915+
916+
if ( query->size == 0 )
917+
PG_RETURN_POINTER(query);
918+
908919
res = clean_fakeval_v2(GETQUERY(query), &len);
909920
if (!res)
910921
{

contrib/tsearch2/rank.c

Lines changed: 23 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,7 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
257257
int4 dimt,
258258
j,
259259
i;
260-
float res = -1.0;
260+
float res = 0.0;
261261
ITEM **item;
262262
int size = q->size;
263263

@@ -266,6 +266,8 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
266266

267267
for (i = 0; i < size; i++)
268268
{
269+
float resj,wjm;
270+
int4 jm;
269271
entry = find_wordentry(t, q, item[i]);
270272
if (!entry)
271273
continue;
@@ -281,14 +283,27 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
281283
post = POSNULL + 1;
282284
}
283285

284-
for (j = 0; j < dimt; j++)
285-
{
286-
if (res < 0)
287-
res = wpos(post[j]);
288-
else
289-
res = 1.0 - (1.0 - res) * (1.0 - wpos(post[j]));
290-
}
286+
resj = 0.0;
287+
wjm = -1.0;
288+
jm = 0;
289+
for (j = 0; j < dimt; j++)
290+
{
291+
resj = resj + wpos(post[j])/((j+1)*(j+1));
292+
if ( wpos(post[j]) > wjm ) {
293+
wjm = wpos(post[j]);
294+
jm = j;
295+
}
296+
}
297+
/*
298+
limit (sum(i/i^2),i->inf) = pi^2/6
299+
resj = sum(wi/i^2),i=1,noccurence,
300+
wi - should be sorted desc,
301+
don't sort for now, just choose maximum weight. This should be corrected
302+
Oleg Bartunov
303+
*/
304+
res = res + ( wjm + resj - wjm/((jm+1)*(jm+1)))/1.64493406685;
291305
}
306+
res = res /size;
292307
pfree(item);
293308
return res;
294309
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy