Skip to content

Commit ad004ce

Browse files
committed
Change ILIKE to invoke lower() and then do plain LIKE comparison when
working in a multibyte encoding. This fixes the problems exhibited in bug #1931 and other reports of ILIKE misbehavior in UTF8 encoding. It's a pretty grotty solution though --- should rethink how to do it after we install better locale support, someday.
1 parent 6dc920d commit ad004ce

File tree

1 file changed

+100
-63
lines changed

1 file changed

+100
-63
lines changed

src/backend/utils/adt/like.c

Lines changed: 100 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
* Portions Copyright (c) 1994, Regents of the University of California
1212
*
1313
* IDENTIFICATION
14-
* $PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.64 2006/03/05 15:58:42 momjian Exp $
14+
* $PostgreSQL: pgsql/src/backend/utils/adt/like.c,v 1.65 2006/09/04 18:32:55 tgl Exp $
1515
*
1616
*-------------------------------------------------------------------------
1717
*/
@@ -64,50 +64,23 @@ wchareq(char *p1, char *p2)
6464
return 1;
6565
}
6666

67-
/*--------------------
68-
* Support routine for MatchTextIC. Compares given multibyte streams
69-
* as wide characters ignoring case.
70-
* If they match, returns 1 otherwise returns 0.
71-
*--------------------
67+
/*
68+
* Formerly we had a routine iwchareq() here that tried to do case-insensitive
69+
* comparison of multibyte characters. It did not work at all, however,
70+
* because it relied on tolower() which has a single-byte API ... and
71+
* towlower() wouldn't be much better since we have no suitably cheap way
72+
* of getting a single character transformed to the system's wchar_t format.
73+
* So now, we just downcase the strings using lower() and apply regular LIKE
74+
* comparison. This should be revisited when we install better locale support.
75+
*
76+
* Note that MBMatchText and MBMatchTextIC do exactly the same thing now.
77+
* Is it worth refactoring to avoid duplicated code? They might become
78+
* different again in the future.
7279
*/
73-
#define CHARMAX 0x80
74-
75-
static int
76-
iwchareq(char *p1, char *p2)
77-
{
78-
pg_wchar c1[2],
79-
c2[2];
80-
int l;
81-
82-
/*
83-
* short cut. if *p1 and *p2 is lower than CHARMAX, then we could assume
84-
* they are ASCII
85-
*/
86-
if ((unsigned char) *p1 < CHARMAX && (unsigned char) *p2 < CHARMAX)
87-
return (tolower((unsigned char) *p1) == tolower((unsigned char) *p2));
88-
89-
/*
90-
* if one of them is an ASCII while the other is not, then they must be
91-
* different characters
92-
*/
93-
else if ((unsigned char) *p1 < CHARMAX || (unsigned char) *p2 < CHARMAX)
94-
return 0;
95-
96-
/*
97-
* ok, p1 and p2 are both > CHARMAX, then they must be multibyte
98-
* characters
99-
*/
100-
l = pg_mblen(p1);
101-
(void) pg_mb2wchar_with_len(p1, c1, l);
102-
c1[0] = tolower(c1[0]);
103-
l = pg_mblen(p2);
104-
(void) pg_mb2wchar_with_len(p2, c2, l);
105-
c2[0] = tolower(c2[0]);
106-
return (c1[0] == c2[0]);
107-
}
10880

81+
/* Set up to compile like_match.c for multibyte characters */
10982
#define CHAREQ(p1, p2) wchareq(p1, p2)
110-
#define ICHAREQ(p1, p2) iwchareq(p1, p2)
83+
#define ICHAREQ(p1, p2) wchareq(p1, p2)
11184
#define NextChar(p, plen) \
11285
do { int __l = pg_mblen(p); (p) +=__l; (plen) -=__l; } while (0)
11386
#define CopyAdvChar(dst, src, srclen) \
@@ -120,7 +93,9 @@ iwchareq(char *p1, char *p2)
12093
#define MatchText MBMatchText
12194
#define MatchTextIC MBMatchTextIC
12295
#define do_like_escape MB_do_like_escape
96+
12397
#include "like_match.c"
98+
12499
#undef CHAREQ
125100
#undef ICHAREQ
126101
#undef NextChar
@@ -129,15 +104,19 @@ iwchareq(char *p1, char *p2)
129104
#undef MatchTextIC
130105
#undef do_like_escape
131106

107+
/* Set up to compile like_match.c for single-byte characters */
132108
#define CHAREQ(p1, p2) (*(p1) == *(p2))
133109
#define ICHAREQ(p1, p2) (tolower((unsigned char) *(p1)) == tolower((unsigned char) *(p2)))
134110
#define NextChar(p, plen) ((p)++, (plen)--)
135111
#define CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
136112

113+
#include "like_match.c"
114+
115+
/* And some support for BYTEA */
137116
#define BYTEA_CHAREQ(p1, p2) (*(p1) == *(p2))
138117
#define BYTEA_NextChar(p, plen) ((p)++, (plen)--)
139118
#define BYTEA_CopyAdvChar(dst, src, srclen) (*(dst)++ = *(src)++, (srclen)--)
140-
#include "like_match.c"
119+
141120

142121
/*
143122
* interface routines called by the function manager
@@ -296,15 +275,32 @@ nameiclike(PG_FUNCTION_ARGS)
296275
int slen,
297276
plen;
298277

299-
s = NameStr(*str);
300-
slen = strlen(s);
301-
p = VARDATA(pat);
302-
plen = (VARSIZE(pat) - VARHDRSZ);
303-
304278
if (pg_database_encoding_max_length() == 1)
279+
{
280+
s = NameStr(*str);
281+
slen = strlen(s);
282+
p = VARDATA(pat);
283+
plen = (VARSIZE(pat) - VARHDRSZ);
305284
result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
285+
}
306286
else
287+
{
288+
/* Force inputs to lower case to achieve case insensitivity */
289+
text *strtext;
290+
291+
strtext = DatumGetTextP(DirectFunctionCall1(name_text,
292+
NameGetDatum(str)));
293+
strtext = DatumGetTextP(DirectFunctionCall1(lower,
294+
PointerGetDatum(strtext)));
295+
pat = DatumGetTextP(DirectFunctionCall1(lower,
296+
PointerGetDatum(pat)));
297+
298+
s = VARDATA(strtext);
299+
slen = (VARSIZE(strtext) - VARHDRSZ);
300+
p = VARDATA(pat);
301+
plen = (VARSIZE(pat) - VARHDRSZ);
307302
result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);
303+
}
308304

309305
PG_RETURN_BOOL(result);
310306
}
@@ -320,15 +316,32 @@ nameicnlike(PG_FUNCTION_ARGS)
320316
int slen,
321317
plen;
322318

323-
s = NameStr(*str);
324-
slen = strlen(s);
325-
p = VARDATA(pat);
326-
plen = (VARSIZE(pat) - VARHDRSZ);
327-
328319
if (pg_database_encoding_max_length() == 1)
320+
{
321+
s = NameStr(*str);
322+
slen = strlen(s);
323+
p = VARDATA(pat);
324+
plen = (VARSIZE(pat) - VARHDRSZ);
329325
result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
326+
}
330327
else
328+
{
329+
/* Force inputs to lower case to achieve case insensitivity */
330+
text *strtext;
331+
332+
strtext = DatumGetTextP(DirectFunctionCall1(name_text,
333+
NameGetDatum(str)));
334+
strtext = DatumGetTextP(DirectFunctionCall1(lower,
335+
PointerGetDatum(strtext)));
336+
pat = DatumGetTextP(DirectFunctionCall1(lower,
337+
PointerGetDatum(pat)));
338+
339+
s = VARDATA(strtext);
340+
slen = (VARSIZE(strtext) - VARHDRSZ);
341+
p = VARDATA(pat);
342+
plen = (VARSIZE(pat) - VARHDRSZ);
331343
result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);
344+
}
332345

333346
PG_RETURN_BOOL(result);
334347
}
@@ -344,15 +357,27 @@ texticlike(PG_FUNCTION_ARGS)
344357
int slen,
345358
plen;
346359

347-
s = VARDATA(str);
348-
slen = (VARSIZE(str) - VARHDRSZ);
349-
p = VARDATA(pat);
350-
plen = (VARSIZE(pat) - VARHDRSZ);
351-
352360
if (pg_database_encoding_max_length() == 1)
361+
{
362+
s = VARDATA(str);
363+
slen = (VARSIZE(str) - VARHDRSZ);
364+
p = VARDATA(pat);
365+
plen = (VARSIZE(pat) - VARHDRSZ);
353366
result = (MatchTextIC(s, slen, p, plen) == LIKE_TRUE);
367+
}
354368
else
369+
{
370+
/* Force inputs to lower case to achieve case insensitivity */
371+
str = DatumGetTextP(DirectFunctionCall1(lower,
372+
PointerGetDatum(str)));
373+
pat = DatumGetTextP(DirectFunctionCall1(lower,
374+
PointerGetDatum(pat)));
375+
s = VARDATA(str);
376+
slen = (VARSIZE(str) - VARHDRSZ);
377+
p = VARDATA(pat);
378+
plen = (VARSIZE(pat) - VARHDRSZ);
355379
result = (MBMatchTextIC(s, slen, p, plen) == LIKE_TRUE);
380+
}
356381

357382
PG_RETURN_BOOL(result);
358383
}
@@ -368,15 +393,27 @@ texticnlike(PG_FUNCTION_ARGS)
368393
int slen,
369394
plen;
370395

371-
s = VARDATA(str);
372-
slen = (VARSIZE(str) - VARHDRSZ);
373-
p = VARDATA(pat);
374-
plen = (VARSIZE(pat) - VARHDRSZ);
375-
376396
if (pg_database_encoding_max_length() == 1)
397+
{
398+
s = VARDATA(str);
399+
slen = (VARSIZE(str) - VARHDRSZ);
400+
p = VARDATA(pat);
401+
plen = (VARSIZE(pat) - VARHDRSZ);
377402
result = (MatchTextIC(s, slen, p, plen) != LIKE_TRUE);
403+
}
378404
else
405+
{
406+
/* Force inputs to lower case to achieve case insensitivity */
407+
str = DatumGetTextP(DirectFunctionCall1(lower,
408+
PointerGetDatum(str)));
409+
pat = DatumGetTextP(DirectFunctionCall1(lower,
410+
PointerGetDatum(pat)));
411+
s = VARDATA(str);
412+
slen = (VARSIZE(str) - VARHDRSZ);
413+
p = VARDATA(pat);
414+
plen = (VARSIZE(pat) - VARHDRSZ);
379415
result = (MBMatchTextIC(s, slen, p, plen) != LIKE_TRUE);
416+
}
380417

381418
PG_RETURN_BOOL(result);
382419
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy