Skip to content

Commit 31b6d84

Browse files
committed
Prevent rank change in case of duplicate search terms
1 parent 5d50873 commit 31b6d84

File tree

1 file changed

+74
-18
lines changed

1 file changed

+74
-18
lines changed

contrib/tsearch2/rank.c

Lines changed: 74 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ static float weights[] = {0.1, 0.2, 0.4, 1.0};
4343

4444
#define DEF_NORM_METHOD 0
4545

46+
static float calc_rank_or(float *w, tsvector * t, QUERYTYPE * q);
47+
static float calc_rank_and(float *w, tsvector * t, QUERYTYPE * q);
4648
/*
4749
* Returns a weight of a word collocation
4850
*/
@@ -112,6 +114,55 @@ find_wordentry(tsvector * t, QUERYTYPE * q, ITEM * item)
112114
return NULL;
113115
}
114116

117+
118+
static char * SortAndUniqOperand=NULL;
119+
120+
static int
121+
compareITEM( const void * a, const void * b ) {
122+
if ( (*(ITEM**)a)->length == (*(ITEM**)b)->length )
123+
return strncmp( SortAndUniqOperand + (*(ITEM**)a)->distance,
124+
SortAndUniqOperand + (*(ITEM**)b)->distance,
125+
(*(ITEM**)b)->length );
126+
127+
return ((*(ITEM**)a)->length > (*(ITEM**)b)->length) ? 1 : -1;
128+
}
129+
130+
static ITEM**
131+
SortAndUniqItems( char *operand, ITEM *item, int *size ) {
132+
ITEM **res, **ptr, **prevptr;
133+
134+
ptr = res = (ITEM**) palloc( sizeof(ITEM*) * *size );
135+
136+
while( (*size)-- ) {
137+
if ( item->type == VAL ) {
138+
*ptr = item;
139+
ptr++;
140+
}
141+
item++;
142+
}
143+
144+
*size = ptr-res;
145+
if ( *size < 2 )
146+
return res;
147+
148+
SortAndUniqOperand=operand;
149+
qsort( res, *size, sizeof(ITEM**), compareITEM );
150+
151+
ptr = res + 1;
152+
prevptr = res;
153+
154+
while( ptr - res < *size ) {
155+
if ( compareITEM( (void*) ptr, (void*) prevptr ) != 0 ) {
156+
prevptr++;
157+
*prevptr = *ptr;
158+
}
159+
ptr++;
160+
}
161+
162+
*size = prevptr + 1 - res;
163+
return res;
164+
}
165+
115166
static WordEntryPos POSNULL[] = {
116167
0,
117168
0
@@ -120,7 +171,7 @@ static WordEntryPos POSNULL[] = {
120171
static float
121172
calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
122173
{
123-
uint16 **pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
174+
uint16 **pos;
124175
int i,
125176
k,
126177
l,
@@ -132,19 +183,22 @@ calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
132183
lenct,
133184
dist;
134185
float res = -1.0;
135-
ITEM *item = GETQUERY(q);
136-
137-
memset(pos, 0, sizeof(uint16 **) * q->size);
186+
ITEM **item;
187+
int size = q->size;
188+
189+
item = SortAndUniqItems( GETOPERAND(q), GETQUERY(q), &size);
190+
if ( size < 2 ) {
191+
pfree(item);
192+
return calc_rank_or(w, t, q);
193+
}
194+
pos = (uint16 **) palloc(sizeof(uint16 *) * q->size);
195+
memset(pos, 0, sizeof(uint16 *) * q->size);
138196
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
139-
WEP_SETPOS(POSNULL[1], MAXENTRYPOS-1);
197+
WEP_SETPOS(POSNULL[1], MAXENTRYPOS-1);
140198

141-
for (i = 0; i < q->size; i++)
199+
for (i = 0; i < size; i++)
142200
{
143-
144-
if (item[i].type != VAL)
145-
continue;
146-
147-
entry = find_wordentry(t, q, &(item[i]));
201+
entry = find_wordentry(t, q, item[i]);
148202
if (!entry)
149203
continue;
150204

@@ -181,6 +235,7 @@ calc_rank_and(float *w, tsvector * t, QUERYTYPE * q)
181235
}
182236
}
183237
pfree(pos);
238+
pfree(item);
184239
return res;
185240
}
186241

@@ -193,16 +248,15 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
193248
j,
194249
i;
195250
float res = -1.0;
196-
ITEM *item = GETQUERY(q);
251+
ITEM **item;
252+
int size = q->size;
197253

198254
*(uint16 *) POSNULL = lengthof(POSNULL) - 1;
255+
item = SortAndUniqItems( GETOPERAND(q), GETQUERY(q), &size);
199256

200-
for (i = 0; i < q->size; i++)
257+
for (i = 0; i < size; i++)
201258
{
202-
if (item[i].type != VAL)
203-
continue;
204-
205-
entry = find_wordentry(t, q, &(item[i]));
259+
entry = find_wordentry(t, q, item[i]);
206260
if (!entry)
207261
continue;
208262

@@ -225,6 +279,7 @@ calc_rank_or(float *w, tsvector * t, QUERYTYPE * q)
225279
res = 1.0 - (1.0 - res) * (1.0 - wpos(post[j]));
226280
}
227281
}
282+
pfree( item );
228283
return res;
229284
}
230285

@@ -349,7 +404,7 @@ checkcondition_DR(void *checkval, ITEM * val)
349404

350405
while (ptr - ((ChkDocR *) checkval)->doc < ((ChkDocR *) checkval)->len)
351406
{
352-
if (val == ptr->item)
407+
if ( val == ptr->item || compareITEM( &val, &(ptr->item) ) == 0 )
353408
return true;
354409
ptr++;
355410
}
@@ -439,6 +494,7 @@ Cover(DocRepresentation * doc, int len, QUERYTYPE * query, int *pos, int *p, int
439494
ch.doc = f;
440495
ch.len = (doc + lastpos) - f + 1;
441496
*pos = f - doc + 1;
497+
SortAndUniqOperand = GETOPERAND(query);
442498
if (TS_execute(GETQUERY(query), &ch, false, checkcondition_DR))
443499
{
444500
/*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy