Skip to content

Commit f71c924

Browse files
committed
[ Patch comments in three pieces.] Attached is a pacth against 7.2 which adds locale awareness to the character classes of the regular expression engine. ... > > I still think the xdigit class could be handled the same way the digit > > class is (by enumeration rather than using the isxdigit function). That > > saves you a cicle, and I don't think there's any loss. > > In fact, I will email you when I apply the original patch. I miss that case :-(. Here is the pached patch. ... Here is a patch which addresses Tatsuo's concerns (it does return an static struct instead of constructing it).
1 parent 450e728 commit f71c924

File tree

1 file changed

+92
-2
lines changed

1 file changed

+92
-2
lines changed

src/backend/regex/regcomp.c

Lines changed: 92 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,17 @@
4747
#include "regex/regex.h"
4848
#include "regex/utils.h"
4949
#include "regex/regex2.h"
50-
#include "regex/cclass.h"
5150
#include "regex/cname.h"
51+
#include <locale.h>
52+
53+
struct cclass
54+
{
55+
char *name;
56+
char *chars;
57+
char *multis;
58+
};
59+
static struct cclass* cclasses = NULL;
60+
static struct cclass* cclass_init(void);
5261

5362
/*
5463
* parse structure, passed up and down to avoid global variables and
@@ -174,6 +183,9 @@ pg95_regcomp(regex_t *preg, const char *pattern, int cflags)
174183
pg_wchar *wcp;
175184
#endif
176185

186+
if ( cclasses == NULL )
187+
cclasses = cclass_init();
188+
177189
#ifdef REDEBUG
178190
#define GOODFLAGS(f) (f)
179191
#else
@@ -884,7 +896,7 @@ p_b_cclass(struct parse * p, cset *cs)
884896
struct cclass *cp;
885897
size_t len;
886898
char *u;
887-
char c;
899+
unsigned char c;
888900

889901
while (MORE() && pg_isalpha(PEEK()))
890902
NEXT();
@@ -1716,3 +1728,81 @@ pg_islower(int c)
17161728
return (islower((unsigned char) c));
17171729
#endif
17181730
}
1731+
1732+
static struct cclass *
1733+
cclass_init(void)
1734+
{
1735+
static struct cclass cclasses_C[] = {
1736+
{ "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789", "" },
1737+
{ "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", "" },
1738+
{ "blank", " \t", "" },
1739+
{ "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\25\26\27\30\31\32\33\34\35\36\37\177", "" },
1740+
{ "digit", "0123456789", "" },
1741+
{ "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "" },
1742+
{ "lower", "abcdefghijklmnopqrstuvwxyz", "" },
1743+
{ "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", "" },
1744+
{ "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", "" },
1745+
{ "space", "\t\n\v\f\r ", "" },
1746+
{ "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", "" },
1747+
{ "xdigit", "0123456789ABCDEFabcdef", "" },
1748+
{ NULL, NULL, "" }
1749+
};
1750+
struct cclass *cp = NULL;
1751+
struct cclass *classes = NULL;
1752+
struct cclass_factory
1753+
{
1754+
char *name;
1755+
int (*func)(int);
1756+
char *chars;
1757+
} cclass_factories [] =
1758+
{
1759+
{ "alnum", isalnum, NULL },
1760+
{ "alpha", isalpha, NULL },
1761+
{ "blank", NULL, " \t" },
1762+
{ "cntrl", iscntrl, NULL },
1763+
{ "digit", NULL, "0123456789" },
1764+
{ "graph", isgraph, NULL },
1765+
{ "lower", islower, NULL },
1766+
{ "print", isprint, NULL },
1767+
{ "punct", ispunct, NULL },
1768+
{ "space", NULL, "\t\n\v\f\r " },
1769+
{ "upper", isupper, NULL },
1770+
{ "xdigit", NULL, "0123456789ABCDEFabcdef" },
1771+
{ NULL, NULL, NULL }
1772+
};
1773+
struct cclass_factory *cf = NULL;
1774+
1775+
if ( strcmp( setlocale( LC_CTYPE, NULL ), "C" ) == 0 )
1776+
return cclasses_C;
1777+
1778+
classes = malloc(sizeof(struct cclass) * (sizeof(cclass_factories) / sizeof(struct cclass_factory)));
1779+
if (classes == NULL)
1780+
elog(ERROR,"cclass_init: out of memory");
1781+
1782+
cp = classes;
1783+
for(cf = cclass_factories; cf->name != NULL; cf++)
1784+
{
1785+
cp->name = strdup(cf->name);
1786+
if ( cf->chars )
1787+
cp->chars = strdup(cf->chars);
1788+
else
1789+
{
1790+
int x = 0, y = 0;
1791+
cp->chars = malloc(sizeof(char) * 256);
1792+
if (cp->chars == NULL)
1793+
elog(ERROR,"cclass_init: out of memory");
1794+
for (x = 0; x < 256; x++)
1795+
{
1796+
if((cf->func)(x))
1797+
*(cp->chars + y++) = x;
1798+
}
1799+
*(cp->chars + y) = '\0';
1800+
}
1801+
cp->multis = "";
1802+
cp++;
1803+
}
1804+
cp->name = cp->chars = NULL;
1805+
cp->multis = "";
1806+
1807+
return classes;
1808+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy