Skip to content

Commit 76e8a87

Browse files
committed
Teach regex_fixed_prefix() the correct handling of advanced regex
escapes --- they aren't simply quoted characters. Problem noted by Antti Salmela. Also fix problem with incorrect handling of multibyte characters when followed by a quantifier.
1 parent 4e91824 commit 76e8a87

File tree

1 file changed

+26
-8
lines changed

1 file changed

+26
-8
lines changed

src/backend/utils/adt/selfuncs.c

Lines changed: 26 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
*
1616
*
1717
* IDENTIFICATION
18-
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.167 2004/11/09 00:34:42 tgl Exp $
18+
* $PostgreSQL: pgsql/src/backend/utils/adt/selfuncs.c,v 1.168 2004/12/02 02:45:07 tgl Exp $
1919
*
2020
*-------------------------------------------------------------------------
2121
*/
@@ -3459,6 +3459,8 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
34593459
char *match;
34603460
int pos,
34613461
match_pos,
3462+
prev_pos,
3463+
prev_match_pos,
34623464
paren_depth;
34633465
char *patt;
34643466
char *rest;
@@ -3519,11 +3521,13 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
35193521

35203522
/* OK, allocate space for pattern */
35213523
match = palloc(strlen(patt) + 1);
3522-
match_pos = 0;
3524+
prev_match_pos = match_pos = 0;
35233525

35243526
/* note start at pos 1 to skip leading ^ */
3525-
for (pos = 1; patt[pos]; pos++)
3527+
for (prev_pos = pos = 1; patt[pos]; )
35263528
{
3529+
int len;
3530+
35273531
/*
35283532
* Check for characters that indicate multiple possible matches
35293533
* here. XXX I suspect isalpha() is not an adequately
@@ -3537,6 +3541,14 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
35373541
(case_insensitive && isalpha((unsigned char) patt[pos])))
35383542
break;
35393543

3544+
/*
3545+
* In AREs, backslash followed by alphanumeric is an escape, not
3546+
* a quoted character. Must treat it as having multiple possible
3547+
* matches.
3548+
*/
3549+
if (patt[pos] == '\\' && isalnum((unsigned char) patt[pos + 1]))
3550+
break;
3551+
35403552
/*
35413553
* Check for quantifiers. Except for +, this means the preceding
35423554
* character is optional, so we must remove it from the prefix
@@ -3546,14 +3558,13 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
35463558
patt[pos] == '?' ||
35473559
patt[pos] == '{')
35483560
{
3549-
if (match_pos > 0)
3550-
match_pos--;
3551-
pos--;
3561+
match_pos = prev_match_pos;
3562+
pos = prev_pos;
35523563
break;
35533564
}
35543565
if (patt[pos] == '+')
35553566
{
3556-
pos--;
3567+
pos = prev_pos;
35573568
break;
35583569
}
35593570
if (patt[pos] == '\\')
@@ -3563,7 +3574,14 @@ regex_fixed_prefix(Const *patt_const, bool case_insensitive,
35633574
if (patt[pos] == '\0')
35643575
break;
35653576
}
3566-
match[match_pos++] = patt[pos];
3577+
/* save position in case we need to back up on next loop cycle */
3578+
prev_match_pos = match_pos;
3579+
prev_pos = pos;
3580+
/* must use encoding-aware processing here */
3581+
len = pg_mblen(&patt[pos]);
3582+
memcpy(&match[match_pos], &patt[pos], len);
3583+
match_pos += len;
3584+
pos += len;
35673585
}
35683586

35693587
match[match_pos] = '\0';

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy