Skip to content

Commit 00f11f4

Browse files
committed
Fix ILIKE to honor collation when working in single-byte encodings.
The original collation patch only fixed the multi-byte code path. This change also ensures that ILIKE's idea of the case-folding rules is exactly the same as str_tolower's.
1 parent f89e4df commit 00f11f4

File tree

2 files changed

+72
-20
lines changed

2 files changed

+72
-20
lines changed

src/backend/utils/adt/like.c

Lines changed: 65 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -19,24 +19,30 @@
1919

2020
#include <ctype.h>
2121

22+
#include "catalog/pg_collation.h"
2223
#include "mb/pg_wchar.h"
2324
#include "utils/builtins.h"
25+
#include "utils/pg_locale.h"
2426

2527

2628
#define LIKE_TRUE 1
2729
#define LIKE_FALSE 0
2830
#define LIKE_ABORT (-1)
2931

3032

31-
static int SB_MatchText(char *t, int tlen, char *p, int plen);
33+
static int SB_MatchText(char *t, int tlen, char *p, int plen,
34+
pg_locale_t locale, bool locale_is_c);
3235
static text *SB_do_like_escape(text *, text *);
3336

34-
static int MB_MatchText(char *t, int tlen, char *p, int plen);
37+
static int MB_MatchText(char *t, int tlen, char *p, int plen,
38+
pg_locale_t locale, bool locale_is_c);
3539
static text *MB_do_like_escape(text *, text *);
3640

37-
static int UTF8_MatchText(char *t, int tlen, char *p, int plen);
41+
static int UTF8_MatchText(char *t, int tlen, char *p, int plen,
42+
pg_locale_t locale, bool locale_is_c);
3843

39-
static int SB_IMatchText(char *t, int tlen, char *p, int plen);
44+
static int SB_IMatchText(char *t, int tlen, char *p, int plen,
45+
pg_locale_t locale, bool locale_is_c);
4046

4147
static int GenericMatchText(char *s, int slen, char *p, int plen);
4248
static int Generic_Text_IC_like(text *str, text *pat, Oid collation);
@@ -78,6 +84,24 @@ wchareq(char *p1, char *p2)
7884
* comparison. This should be revisited when we install better locale support.
7985
*/
8086

87+
/*
88+
* We do handle case-insensitive matching for single-byte encodings using
89+
* fold-on-the-fly processing, however.
90+
*/
91+
static char
92+
SB_lower_char(unsigned char c, pg_locale_t locale, bool locale_is_c)
93+
{
94+
if (locale_is_c)
95+
return pg_ascii_tolower(c);
96+
#ifdef HAVE_LOCALE_T
97+
else if (locale)
98+
return tolower_l(c, locale);
99+
#endif
100+
else
101+
return pg_tolower(c);
102+
}
103+
104+
81105
#define NextByte(p, plen) ((p)++, (plen)--)
82106

83107
/* Set up to compile like_match.c for multibyte characters */
@@ -107,7 +131,7 @@ wchareq(char *p1, char *p2)
107131
#include "like_match.c"
108132

109133
/* setup to compile like_match.c for single byte case insensitive matches */
110-
#define MATCH_LOWER
134+
#define MATCH_LOWER(t) SB_lower_char((unsigned char) (t), locale, locale_is_c)
111135
#define NextChar(p, plen) NextByte((p), (plen))
112136
#define MatchText SB_IMatchText
113137

@@ -121,15 +145,16 @@ wchareq(char *p1, char *p2)
121145

122146
#include "like_match.c"
123147

148+
/* Generic for all cases not requiring inline case-folding */
124149
static inline int
125150
GenericMatchText(char *s, int slen, char *p, int plen)
126151
{
127152
if (pg_database_encoding_max_length() == 1)
128-
return SB_MatchText(s, slen, p, plen);
153+
return SB_MatchText(s, slen, p, plen, 0, true);
129154
else if (GetDatabaseEncoding() == PG_UTF8)
130-
return UTF8_MatchText(s, slen, p, plen);
155+
return UTF8_MatchText(s, slen, p, plen, 0, true);
131156
else
132-
return MB_MatchText(s, slen, p, plen);
157+
return MB_MatchText(s, slen, p, plen, 0, true);
133158
}
134159

135160
static inline int
@@ -142,8 +167,8 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
142167

143168
/*
144169
* For efficiency reasons, in the single byte case we don't call lower()
145-
* on the pattern and text, but instead call to_lower on each character.
146-
* In the multi-byte case we don't have much choice :-(
170+
* on the pattern and text, but instead call SB_lower_char on each
171+
* character. In the multi-byte case we don't have much choice :-(
147172
*/
148173

149174
if (pg_database_encoding_max_length() > 1)
@@ -156,17 +181,42 @@ Generic_Text_IC_like(text *str, text *pat, Oid collation)
156181
s = VARDATA(str);
157182
slen = (VARSIZE(str) - VARHDRSZ);
158183
if (GetDatabaseEncoding() == PG_UTF8)
159-
return UTF8_MatchText(s, slen, p, plen);
184+
return UTF8_MatchText(s, slen, p, plen, 0, true);
160185
else
161-
return MB_MatchText(s, slen, p, plen);
186+
return MB_MatchText(s, slen, p, plen, 0, true);
162187
}
163188
else
164189
{
190+
/*
191+
* Here we need to prepare locale information for SB_lower_char.
192+
* This should match the methods used in str_tolower().
193+
*/
194+
pg_locale_t locale = 0;
195+
bool locale_is_c = false;
196+
197+
if (lc_ctype_is_c(collation))
198+
locale_is_c = true;
199+
else if (collation != DEFAULT_COLLATION_OID)
200+
{
201+
if (!OidIsValid(collation))
202+
{
203+
/*
204+
* This typically means that the parser could not resolve a
205+
* conflict of implicit collations, so report it that way.
206+
*/
207+
ereport(ERROR,
208+
(errcode(ERRCODE_INDETERMINATE_COLLATION),
209+
errmsg("could not determine which collation to use for ILIKE"),
210+
errhint("Use the COLLATE clause to set the collation explicitly.")));
211+
}
212+
locale = pg_newlocale_from_collation(collation);
213+
}
214+
165215
p = VARDATA_ANY(pat);
166216
plen = VARSIZE_ANY_EXHDR(pat);
167217
s = VARDATA_ANY(str);
168218
slen = VARSIZE_ANY_EXHDR(str);
169-
return SB_IMatchText(s, slen, p, plen);
219+
return SB_IMatchText(s, slen, p, plen, locale, locale_is_c);
170220
}
171221
}
172222

@@ -274,7 +324,7 @@ bytealike(PG_FUNCTION_ARGS)
274324
p = VARDATA_ANY(pat);
275325
plen = VARSIZE_ANY_EXHDR(pat);
276326

277-
result = (SB_MatchText(s, slen, p, plen) == LIKE_TRUE);
327+
result = (SB_MatchText(s, slen, p, plen, 0, true) == LIKE_TRUE);
278328

279329
PG_RETURN_BOOL(result);
280330
}
@@ -295,7 +345,7 @@ byteanlike(PG_FUNCTION_ARGS)
295345
p = VARDATA_ANY(pat);
296346
plen = VARSIZE_ANY_EXHDR(pat);
297347

298-
result = (SB_MatchText(s, slen, p, plen) != LIKE_TRUE);
348+
result = (SB_MatchText(s, slen, p, plen, 0, true) != LIKE_TRUE);
299349

300350
PG_RETURN_BOOL(result);
301351
}

src/backend/utils/adt/like_match.c

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*
66
* This file is included by like.c four times, to provide matching code for
77
* (1) single-byte encodings, (2) UTF8, (3) other multi-byte encodings,
8-
* and (4) case insensitive matches in single byte encodings.
8+
* and (4) case insensitive matches in single-byte encodings.
99
* (UTF8 is a special case because we can use a much more efficient version
1010
* of NextChar than can be used for general multi-byte encodings.)
1111
*
@@ -14,7 +14,7 @@
1414
* NextChar
1515
* MatchText - to name of function wanted
1616
* do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
17-
* MATCH_LOWER - define for case (4), using to_lower on single-byte chars
17+
* MATCH_LOWER - define for case (4) to specify case folding for 1-byte chars
1818
*
1919
* Copyright (c) 1996-2011, PostgreSQL Global Development Group
2020
*
@@ -70,13 +70,14 @@
7070
*/
7171

7272
#ifdef MATCH_LOWER
73-
#define GETCHAR(t) ((char) tolower((unsigned char) (t)))
73+
#define GETCHAR(t) MATCH_LOWER(t)
7474
#else
7575
#define GETCHAR(t) (t)
7676
#endif
7777

7878
static int
79-
MatchText(char *t, int tlen, char *p, int plen)
79+
MatchText(char *t, int tlen, char *p, int plen,
80+
pg_locale_t locale, bool locale_is_c)
8081
{
8182
/* Fast path for match-everything pattern */
8283
if (plen == 1 && *p == '%')
@@ -170,7 +171,8 @@ MatchText(char *t, int tlen, char *p, int plen)
170171
{
171172
if (GETCHAR(*t) == firstpat)
172173
{
173-
int matched = MatchText(t, tlen, p, plen);
174+
int matched = MatchText(t, tlen, p, plen,
175+
locale, locale_is_c);
174176

175177
if (matched != LIKE_FALSE)
176178
return matched; /* TRUE or ABORT */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy