Skip to content

Commit b2a01b9

Browse files
committed
Fix bugs in contrib/pg_trgm's LIKE pattern analysis code.
Extraction of trigrams did not process LIKE escape sequences properly, leading to possible misidentification of trigrams near escapes, resulting in incorrect index search results. Fujii Masao
1 parent 51fed14 commit b2a01b9

File tree

3 files changed

+39
-19
lines changed

3 files changed

+39
-19
lines changed

contrib/pg_trgm/expected/pg_trgm.out

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3497,6 +3497,12 @@ select * from test2 where t like '%bcd%';
34973497
abcdef
34983498
(1 row)
34993499

3500+
select * from test2 where t like E'%\\bcd%';
3501+
t
3502+
--------
3503+
abcdef
3504+
(1 row)
3505+
35003506
select * from test2 where t ilike '%BCD%';
35013507
t
35023508
--------
@@ -3539,6 +3545,12 @@ select * from test2 where t like '%bcd%';
35393545
abcdef
35403546
(1 row)
35413547

3548+
select * from test2 where t like E'%\\bcd%';
3549+
t
3550+
--------
3551+
abcdef
3552+
(1 row)
3553+
35423554
select * from test2 where t ilike '%BCD%';
35433555
t
35443556
--------

contrib/pg_trgm/sql/pg_trgm.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ explain (costs off)
4949
select * from test2 where t ilike '%BCD%';
5050
select * from test2 where t like '%BCD%';
5151
select * from test2 where t like '%bcd%';
52+
select * from test2 where t like E'%\\bcd%';
5253
select * from test2 where t ilike '%BCD%';
5354
select * from test2 where t ilike 'qua%';
5455
drop index test2_idx_gin;
@@ -60,5 +61,6 @@ explain (costs off)
6061
select * from test2 where t ilike '%BCD%';
6162
select * from test2 where t like '%BCD%';
6263
select * from test2 where t like '%bcd%';
64+
select * from test2 where t like E'%\\bcd%';
6365
select * from test2 where t ilike '%BCD%';
6466
select * from test2 where t ilike 'qua%';

contrib/pg_trgm/trgm_op.c

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -272,33 +272,36 @@ get_wildcard_part(const char *str, int lenstr,
272272
const char *beginword = str;
273273
const char *endword;
274274
char *s = buf;
275-
bool in_wildcard_meta = false;
275+
bool in_leading_wildcard_meta = false;
276+
bool in_trailing_wildcard_meta = false;
276277
bool in_escape = false;
277278
int clen;
278279

279280
/*
280-
* Find the first word character remembering whether last character was
281-
* wildcard meta-character.
281+
* Find the first word character, remembering whether preceding character
282+
* was wildcard meta-character. Note that the in_escape state persists
283+
* from this loop to the next one, since we may exit at a word character
284+
* that is in_escape.
282285
*/
283286
while (beginword - str < lenstr)
284287
{
285288
if (in_escape)
286289
{
287-
in_escape = false;
288-
in_wildcard_meta = false;
289290
if (iswordchr(beginword))
290291
break;
292+
in_escape = false;
293+
in_leading_wildcard_meta = false;
291294
}
292295
else
293296
{
294297
if (ISESCAPECHAR(beginword))
295298
in_escape = true;
296299
else if (ISWILDCARDCHAR(beginword))
297-
in_wildcard_meta = true;
300+
in_leading_wildcard_meta = true;
298301
else if (iswordchr(beginword))
299302
break;
300303
else
301-
in_wildcard_meta = false;
304+
in_leading_wildcard_meta = false;
302305
}
303306
beginword += pg_mblen(beginword);
304307
}
@@ -310,11 +313,11 @@ get_wildcard_part(const char *str, int lenstr,
310313
return NULL;
311314

312315
/*
313-
* Add left padding spaces if last character wasn't wildcard
316+
* Add left padding spaces if preceding character wasn't wildcard
314317
* meta-character.
315318
*/
316319
*charlen = 0;
317-
if (!in_wildcard_meta)
320+
if (!in_leading_wildcard_meta)
318321
{
319322
if (LPADDING > 0)
320323
{
@@ -333,31 +336,37 @@ get_wildcard_part(const char *str, int lenstr,
333336
* string boundary. Strip escapes during copy.
334337
*/
335338
endword = beginword;
336-
in_wildcard_meta = false;
337-
in_escape = false;
338339
while (endword - str < lenstr)
339340
{
340341
clen = pg_mblen(endword);
341342
if (in_escape)
342343
{
343-
in_escape = false;
344-
in_wildcard_meta = false;
345344
if (iswordchr(endword))
346345
{
347346
memcpy(s, endword, clen);
348347
(*charlen)++;
349348
s += clen;
350349
}
351350
else
351+
{
352+
/*
353+
* Back up endword to the escape character when stopping at
354+
* an escaped char, so that subsequent get_wildcard_part will
355+
* restart from the escape character. We assume here that
356+
* escape chars are single-byte.
357+
*/
358+
endword--;
352359
break;
360+
}
361+
in_escape = false;
353362
}
354363
else
355364
{
356365
if (ISESCAPECHAR(endword))
357366
in_escape = true;
358367
else if (ISWILDCARDCHAR(endword))
359368
{
360-
in_wildcard_meta = true;
369+
in_trailing_wildcard_meta = true;
361370
break;
362371
}
363372
else if (iswordchr(endword))
@@ -367,19 +376,16 @@ get_wildcard_part(const char *str, int lenstr,
367376
s += clen;
368377
}
369378
else
370-
{
371-
in_wildcard_meta = false;
372379
break;
373-
}
374380
}
375381
endword += clen;
376382
}
377383

378384
/*
379-
* Add right padding spaces if last character wasn't wildcard
385+
* Add right padding spaces if next character isn't wildcard
380386
* meta-character.
381387
*/
382-
if (!in_wildcard_meta)
388+
if (!in_trailing_wildcard_meta)
383389
{
384390
if (RPADDING > 0)
385391
{

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy