Skip to content

Commit dbde97c

Browse files
committed
Rewrite LIKE's %-followed-by-_ optimization so it really works (this time
for sure ;-)). It now also optimizes more cases, such as %_%_. Improve comments too. Per bug #5478. In passing, also rename the TCHAR macro to GETCHAR, because pgindent is messing with the formatting of the former (apparently it now thinks TCHAR is a typedef name). Back-patch to 8.3, where the bug was introduced.
1 parent e54b0cb commit dbde97c

File tree

3 files changed

+74
-70
lines changed

3 files changed

+74
-70
lines changed

src/backend/utils/adt/like_match.c

Lines changed: 64 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -14,12 +14,12 @@
1414
* NextChar
1515
* MatchText - to name of function wanted
1616
* do_like_escape - name of function if wanted - needs CHAREQ and CopyAdvChar
17-
* MATCH_LOWER - define iff using to_lower on text chars
17+
* MATCH_LOWER - define for case (4), using to_lower on single-byte chars
1818
*
1919
* Copyright (c) 1996-2010, PostgreSQL Global Development Group
2020
*
2121
* IDENTIFICATION
22-
* $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.27 2010/01/02 16:57:54 momjian Exp $
22+
* $PostgreSQL: pgsql/src/backend/utils/adt/like_match.c,v 1.28 2010/05/28 17:35:23 tgl Exp $
2323
*
2424
*-------------------------------------------------------------------------
2525
*/
@@ -70,9 +70,9 @@
7070
*/
7171

7272
#ifdef MATCH_LOWER
73-
#define TCHAR(t) ((char) tolower((unsigned char) (t)))
73+
#define GETCHAR(t) ((char) tolower((unsigned char) (t)))
7474
#else
75-
#define TCHAR(t) (t)
75+
#define GETCHAR(t) (t)
7676
#endif
7777

7878
static int
@@ -101,85 +101,80 @@ MatchText(char *t, int tlen, char *p, int plen)
101101
ereport(ERROR,
102102
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
103103
errmsg("LIKE pattern must not end with escape character")));
104-
if (TCHAR (*p) != TCHAR (*t))
104+
if (GETCHAR(*p) != GETCHAR(*t))
105105
return LIKE_FALSE;
106106
}
107107
else if (*p == '%')
108108
{
109+
char firstpat;
110+
109111
/*
110-
* % processing is essentially a search for a match for what
111-
* follows the %, plus a recursive match of the remainder. We
112-
* succeed if and only if both conditions are met.
112+
* % processing is essentially a search for a text position at
113+
* which the remainder of the text matches the remainder of the
114+
* pattern, using a recursive call to check each potential match.
115+
*
116+
* If there are wildcards immediately following the %, we can skip
117+
* over them first, using the idea that any sequence of N _'s and
118+
* one or more %'s is equivalent to N _'s and one % (ie, it will
119+
* match any sequence of at least N text characters). In this
120+
* way we will always run the recursive search loop using a
121+
* pattern fragment that begins with a literal character-to-match,
122+
* thereby not recursing more than we have to.
113123
*/
124+
NextByte(p, plen);
114125

115-
/* %% is the same as % according to the SQL standard */
116-
/* Advance past all %'s */
117-
while (plen > 0 && *p == '%')
118-
NextByte(p, plen);
119-
/* Trailing percent matches everything. */
126+
while (plen > 0)
127+
{
128+
if (*p == '%')
129+
NextByte(p, plen);
130+
else if (*p == '_')
131+
{
132+
/* If not enough text left to match the pattern, ABORT */
133+
if (tlen <= 0)
134+
return LIKE_ABORT;
135+
NextChar(t, tlen);
136+
NextByte(p, plen);
137+
}
138+
else
139+
break; /* Reached a non-wildcard pattern char */
140+
}
141+
142+
/*
143+
* If we're at end of pattern, match: we have a trailing % which
144+
* matches any remaining text string.
145+
*/
120146
if (plen <= 0)
121147
return LIKE_TRUE;
122148

123149
/*
124150
* Otherwise, scan for a text position at which we can match the
125-
* rest of the pattern.
151+
* rest of the pattern. The first remaining pattern char is known
152+
* to be a regular or escaped literal character, so we can compare
153+
* the first pattern byte to each text byte to avoid recursing
154+
* more than we have to. This fact also guarantees that we don't
155+
* have to consider a match to the zero-length substring at the
156+
* end of the text.
126157
*/
127-
if (*p == '_')
158+
if (*p == '\\')
128159
{
129-
/* %_ is the same as _% - avoid matching _ repeatedly */
160+
if (plen < 2)
161+
return LIKE_FALSE; /* XXX should throw error */
162+
firstpat = GETCHAR(p[1]);
163+
}
164+
else
165+
firstpat = GETCHAR(*p);
130166

131-
do
132-
{
133-
NextChar(t, tlen);
134-
NextByte(p, plen);
135-
} while (tlen > 0 && plen > 0 && *p == '_');
136-
137-
/*
138-
* If we are at the end of the pattern, succeed: % followed by
139-
* n _'s matches any string of at least n characters, and we
140-
* have now found there are at least n characters.
141-
*/
142-
if (plen <= 0)
143-
return LIKE_TRUE;
144-
145-
/* Look for a place that matches the rest of the pattern */
146-
while (tlen > 0)
167+
while (tlen > 0)
168+
{
169+
if (GETCHAR(*t) == firstpat)
147170
{
148171
int matched = MatchText(t, tlen, p, plen);
149172

150173
if (matched != LIKE_FALSE)
151-
return matched; /* TRUE or ABORT */
152-
153-
NextChar(t, tlen);
174+
return matched; /* TRUE or ABORT */
154175
}
155-
}
156-
else
157-
{
158-
char firstpat = TCHAR (*p);
159176

160-
if (*p == '\\')
161-
{
162-
if (plen < 2)
163-
return LIKE_FALSE;
164-
firstpat = TCHAR (p[1]);
165-
}
166-
167-
while (tlen > 0)
168-
{
169-
/*
170-
* Optimization to prevent most recursion: don't recurse
171-
* unless first pattern byte matches first text byte.
172-
*/
173-
if (TCHAR (*t) == firstpat)
174-
{
175-
int matched = MatchText(t, tlen, p, plen);
176-
177-
if (matched != LIKE_FALSE)
178-
return matched; /* TRUE or ABORT */
179-
}
180-
181-
NextChar(t, tlen);
182-
}
177+
NextChar(t, tlen);
183178
}
184179

185180
/*
@@ -195,7 +190,7 @@ MatchText(char *t, int tlen, char *p, int plen)
195190
NextByte(p, plen);
196191
continue;
197192
}
198-
else if (TCHAR (*p) != TCHAR (*t))
193+
else if (GETCHAR(*p) != GETCHAR(*t))
199194
{
200195
/* non-wildcard pattern char fails to match text char */
201196
return LIKE_FALSE;
@@ -220,11 +215,12 @@ MatchText(char *t, int tlen, char *p, int plen)
220215
if (tlen > 0)
221216
return LIKE_FALSE; /* end of pattern, but not of text */
222217

223-
/* End of text string. Do we have matching pattern remaining? */
224-
while (plen > 0 && *p == '%') /* allow multiple %'s at end of
225-
* pattern */
218+
/*
219+
* End of text, but perhaps not of pattern. Match iff the remaining
220+
* pattern can match a zero-length string, ie, it's zero or more %'s.
221+
*/
222+
while (plen > 0 && *p == '%')
226223
NextByte(p, plen);
227-
228224
if (plen <= 0)
229225
return LIKE_TRUE;
230226

@@ -348,7 +344,7 @@ do_like_escape(text *pat, text *esc)
348344
#undef do_like_escape
349345
#endif
350346

351-
#undef TCHAR
347+
#undef GETCHAR
352348

353349
#ifdef MATCH_LOWER
354350
#undef MATCH_LOWER

src/test/regress/expected/strings.out

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -996,7 +996,7 @@ SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
996996
(1 row)
997997

998998
--
999-
-- test %/_ combination cases, cf bug #4821
999+
-- test %/_ combination cases, cf bugs #4821 and #5478
10001000
--
10011001
SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
10021002
t | t | f
@@ -1022,6 +1022,12 @@ SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
10221022
t | t | f
10231023
(1 row)
10241024

1025+
SELECT 'jack' LIKE '%____%' AS t;
1026+
t
1027+
---
1028+
t
1029+
(1 row)
1030+
10251031
--
10261032
-- test implicit type conversion
10271033
--

src/test/regress/sql/strings.sql

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ SELECT 'Hawkeye' ILIKE 'h%' AS "true";
301301
SELECT 'Hawkeye' NOT ILIKE 'h%' AS "false";
302302

303303
--
304-
-- test %/_ combination cases, cf bug #4821
304+
-- test %/_ combination cases, cf bugs #4821 and #5478
305305
--
306306

307307
SELECT 'foo' LIKE '_%' as t, 'f' LIKE '_%' as t, '' LIKE '_%' as f;
@@ -310,6 +310,8 @@ SELECT 'foo' LIKE '%_' as t, 'f' LIKE '%_' as t, '' LIKE '%_' as f;
310310
SELECT 'foo' LIKE '__%' as t, 'foo' LIKE '___%' as t, 'foo' LIKE '____%' as f;
311311
SELECT 'foo' LIKE '%__' as t, 'foo' LIKE '%___' as t, 'foo' LIKE '%____' as f;
312312

313+
SELECT 'jack' LIKE '%____%' AS t;
314+
313315

314316
--
315317
-- test implicit type conversion

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy