Skip to content

Commit 8167a38

Browse files
committed
Allow multibyte characters as escape in SIMILAR TO and SUBSTRING.
Previously, only a single-byte character was allowed as an escape. This patch allows it to be a multi-byte character, though it still must be a single character. Reviewed by Heikki Linnakangas and Tom Lane.
1 parent 1c9701c commit 8167a38

File tree

1 file changed

+58
-5
lines changed

1 file changed

+58
-5
lines changed

src/backend/utils/adt/regexp.c

Lines changed: 58 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -688,11 +688,16 @@ similar_escape(PG_FUNCTION_ARGS)
688688
elen = VARSIZE_ANY_EXHDR(esc_text);
689689
if (elen == 0)
690690
e = NULL; /* no escape character */
691-
else if (elen != 1)
692-
ereport(ERROR,
693-
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
694-
errmsg("invalid escape string"),
695-
errhint("Escape string must be empty or one character.")));
691+
else
692+
{
693+
int escape_mblen = pg_mbstrlen_with_len(e, elen);
694+
695+
if (escape_mblen > 1)
696+
ereport(ERROR,
697+
(errcode(ERRCODE_INVALID_ESCAPE_SEQUENCE),
698+
errmsg("invalid escape string"),
699+
errhint("Escape string must be empty or one character.")));
700+
}
696701
}
697702

698703
/*----------
@@ -724,6 +729,54 @@ similar_escape(PG_FUNCTION_ARGS)
724729
{
725730
char pchar = *p;
726731

732+
/*
733+
* If both the escape character and the current character from the
734+
* pattern are multi-byte, we need to take the slow path.
735+
*
736+
* But if one of them is single-byte, we can process the pattern one
737+
* byte at a time, ignoring multi-byte characters. (This works
738+
* because all server-encodings have the property that a valid
739+
* multi-byte character representation cannot contain the
740+
* representation of a valid single-byte character.)
741+
*/
742+
743+
if (elen > 1)
744+
{
745+
int mblen = pg_mblen(p);
746+
if (mblen > 1)
747+
{
748+
/* slow, multi-byte path */
749+
if (afterescape)
750+
{
751+
*r++ = '\\';
752+
memcpy(r, p, mblen);
753+
r += mblen;
754+
afterescape = false;
755+
}
756+
else if (e && elen == mblen && memcmp(e, p, mblen) == 0)
757+
{
758+
/* SQL99 escape character; do not send to output */
759+
afterescape = true;
760+
}
761+
else
762+
{
763+
/*
764+
* We know it's a multi-byte character, so we don't need
765+
* to do all the comparisons to single-byte characters
766+
* that we do below.
767+
*/
768+
memcpy(r, p, mblen);
769+
r += mblen;
770+
}
771+
772+
p += mblen;
773+
plen -= mblen;
774+
775+
continue;
776+
}
777+
}
778+
779+
/* fast path */
727780
if (afterescape)
728781
{
729782
if (pchar == '"' && !incharclass) /* for SUBSTRING patterns */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy