Skip to content

Commit 59f9a0b

Browse files
committed
Implement a solution to the 'Turkish locale downcases I incorrectly'
problem, per previous discussion. Make some additional changes to centralize the knowledge of just how identifier downcasing is done, in hopes of simplifying any future tweaking in this area.
1 parent 1d567ae commit 59f9a0b

File tree

10 files changed

+158
-125
lines changed

10 files changed

+158
-125
lines changed

src/backend/commands/define.c

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/commands/define.c,v 1.85 2003/11/29 19:51:47 pgsql Exp $
12+
* $PostgreSQL: pgsql/src/backend/commands/define.c,v 1.86 2004/02/21 00:34:52 tgl Exp $
1313
*
1414
* DESCRIPTION
1515
* The "DefineFoo" routines take the parse tree and pick out the
@@ -38,24 +38,19 @@
3838
#include "catalog/namespace.h"
3939
#include "commands/defrem.h"
4040
#include "parser/parse_type.h"
41+
#include "parser/scansup.h"
4142
#include "utils/int8.h"
4243

4344

4445
/*
45-
* Translate the input language name to lower case.
46+
* Translate the input language name to lower case, and truncate if needed.
4647
*
47-
* Output buffer must be NAMEDATALEN long.
48+
* Returns a palloc'd string
4849
*/
49-
void
50-
case_translate_language_name(const char *input, char *output)
50+
char *
51+
case_translate_language_name(const char *input)
5152
{
52-
int i;
53-
54-
MemSet(output, 0, NAMEDATALEN); /* ensure result Name is
55-
* zero-filled */
56-
57-
for (i = 0; i < NAMEDATALEN - 1 && input[i]; ++i)
58-
output[i] = tolower((unsigned char) input[i]);
53+
return downcase_truncate_identifier(input, strlen(input), false);
5954
}
6055

6156

src/backend/commands/functioncmds.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
*
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/commands/functioncmds.c,v 1.43 2004/01/06 23:55:18 tgl Exp $
13+
* $PostgreSQL: pgsql/src/backend/commands/functioncmds.c,v 1.44 2004/02/21 00:34:52 tgl Exp $
1414
*
1515
* DESCRIPTION
1616
* These routines take the parse tree and pick out the
@@ -401,7 +401,7 @@ CreateFunction(CreateFunctionStmt *stmt)
401401
Oid prorettype;
402402
bool returnsSet;
403403
char *language;
404-
char languageName[NAMEDATALEN];
404+
char *languageName;
405405
Oid languageOid;
406406
Oid languageValidator;
407407
char *funcname;
@@ -437,7 +437,7 @@ CreateFunction(CreateFunctionStmt *stmt)
437437
&as_clause, &language, &volatility, &isStrict, &security);
438438

439439
/* Convert language name to canonical case */
440-
case_translate_language_name(language, languageName);
440+
languageName = case_translate_language_name(language);
441441

442442
/* Look up the language and validate permissions */
443443
languageTuple = SearchSysCache(LANGNAME,

src/backend/commands/proclang.c

Lines changed: 13 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/commands/proclang.c,v 1.52 2003/11/29 19:51:47 pgsql Exp $
10+
* $PostgreSQL: pgsql/src/backend/commands/proclang.c,v 1.53 2004/02/21 00:34:52 tgl Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -40,11 +40,12 @@
4040
void
4141
CreateProceduralLanguage(CreatePLangStmt *stmt)
4242
{
43-
char languageName[NAMEDATALEN];
43+
char *languageName;
4444
Oid procOid,
4545
valProcOid;
4646
Oid funcrettype;
4747
Oid typev[FUNC_MAX_ARGS];
48+
NameData langname;
4849
char nulls[Natts_pg_language];
4950
Datum values[Natts_pg_language];
5051
Relation rel;
@@ -66,7 +67,7 @@ CreateProceduralLanguage(CreatePLangStmt *stmt)
6667
* Translate the language name and check that this language doesn't
6768
* already exist
6869
*/
69-
case_translate_language_name(stmt->plname, languageName);
70+
languageName = case_translate_language_name(stmt->plname);
7071

7172
if (SearchSysCacheExists(LANGNAME,
7273
PointerGetDatum(languageName),
@@ -124,12 +125,13 @@ CreateProceduralLanguage(CreatePLangStmt *stmt)
124125
}
125126

126127
i = 0;
127-
values[i++] = PointerGetDatum(languageName);
128-
values[i++] = BoolGetDatum(true); /* lanispl */
129-
values[i++] = BoolGetDatum(stmt->pltrusted);
130-
values[i++] = ObjectIdGetDatum(procOid);
131-
values[i++] = ObjectIdGetDatum(valProcOid);
132-
nulls[i] = 'n'; /* lanacl */
128+
namestrcpy(&langname, languageName);
129+
values[i++] = NameGetDatum(&langname); /* lanname */
130+
values[i++] = BoolGetDatum(true); /* lanispl */
131+
values[i++] = BoolGetDatum(stmt->pltrusted); /* lanpltrusted */
132+
values[i++] = ObjectIdGetDatum(procOid); /* lanplcallfoid */
133+
values[i++] = ObjectIdGetDatum(valProcOid); /* lanvalidator */
134+
nulls[i] = 'n'; /* lanacl */
133135

134136
rel = heap_openr(LanguageRelationName, RowExclusiveLock);
135137

@@ -173,7 +175,7 @@ CreateProceduralLanguage(CreatePLangStmt *stmt)
173175
void
174176
DropProceduralLanguage(DropPLangStmt *stmt)
175177
{
176-
char languageName[NAMEDATALEN];
178+
char *languageName;
177179
HeapTuple langTup;
178180
ObjectAddress object;
179181

@@ -189,7 +191,7 @@ DropProceduralLanguage(DropPLangStmt *stmt)
189191
* Translate the language name, check that this language exist and is
190192
* a PL
191193
*/
192-
case_translate_language_name(stmt->plname, languageName);
194+
languageName = case_translate_language_name(stmt->plname);
193195

194196
langTup = SearchSysCache(LANGNAME,
195197
CStringGetDatum(languageName),

src/backend/parser/keywords.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.144 2003/11/29 19:51:51 pgsql Exp $
11+
* $PostgreSQL: pgsql/src/backend/parser/keywords.c,v 1.145 2004/02/21 00:34:52 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -369,17 +369,13 @@ ScanKeywordLookup(const char *text)
369369

370370
/*
371371
* Apply an ASCII-only downcasing. We must not use tolower() since it
372-
* may produce the wrong translation in some locales (eg, Turkish),
373-
* and we don't trust isupper() very much either. In an ASCII-based
374-
* encoding the tests against A and Z are sufficient, but we also
375-
* check isupper() so that we will work correctly under EBCDIC. The
376-
* actual case conversion step should work for either ASCII or EBCDIC.
372+
* may produce the wrong translation in some locales (eg, Turkish).
377373
*/
378374
for (i = 0; i < len; i++)
379375
{
380376
char ch = text[i];
381377

382-
if (ch >= 'A' && ch <= 'Z' && isupper((unsigned char) ch))
378+
if (ch >= 'A' && ch <= 'Z')
383379
ch += 'a' - 'A';
384380
word[i] = ch;
385381
}

src/backend/parser/scan.l

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
* Portions Copyright (c) 1994, Regents of the University of California
1111
*
1212
* IDENTIFICATION
13-
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.113 2004/02/19 19:11:30 tgl Exp $
13+
* $PostgreSQL: pgsql/src/backend/parser/scan.l,v 1.114 2004/02/21 00:34:52 tgl Exp $
1414
*
1515
*-------------------------------------------------------------------------
1616
*/
@@ -27,6 +27,7 @@
2727
#include "parser/keywords.h"
2828
/* Not needed now that this file is compiled as part of gram.y */
2929
/* #include "parser/parse.h" */
30+
#include "parser/scansup.h"
3031
#include "utils/builtins.h"
3132
#include "mb/pg_wchar.h"
3233

@@ -395,23 +396,15 @@ other .
395396
startlit();
396397
}
397398
<xd>{xdstop} {
399+
char *ident;
400+
398401
BEGIN(INITIAL);
399402
if (literallen == 0)
400403
yyerror("zero-length delimited identifier");
404+
ident = litbufdup();
401405
if (literallen >= NAMEDATALEN)
402-
{
403-
int len;
404-
405-
len = pg_mbcliplen(literalbuf, literallen,
406-
NAMEDATALEN-1);
407-
ereport(NOTICE,
408-
(errcode(ERRCODE_NAME_TOO_LONG),
409-
errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
410-
literalbuf, len, literalbuf)));
411-
literalbuf[len] = '\0';
412-
literallen = len;
413-
}
414-
yylval.str = litbufdup();
406+
truncate_identifier(ident, literallen, true);
407+
yylval.str = ident;
415408
return IDENT;
416409
}
417410
<xd>{xddouble} {
@@ -537,7 +530,6 @@ other .
537530
{identifier} {
538531
const ScanKeyword *keyword;
539532
char *ident;
540-
int i;
541533

542534
/* Is it a keyword? */
543535
keyword = ScanKeywordLookup(yytext);
@@ -550,28 +542,8 @@ other .
550542
/*
551543
* No. Convert the identifier to lower case, and truncate
552544
* if necessary.
553-
*
554-
* Note: here we use a locale-dependent case conversion,
555-
* which seems appropriate under standard SQL rules, whereas
556-
* the keyword comparison was NOT locale-dependent.
557545
*/
558-
ident = pstrdup(yytext);
559-
for (i = 0; ident[i]; i++)
560-
{
561-
if (isupper((unsigned char) ident[i]))
562-
ident[i] = tolower((unsigned char) ident[i]);
563-
}
564-
if (i >= NAMEDATALEN)
565-
{
566-
int len;
567-
568-
len = pg_mbcliplen(ident, i, NAMEDATALEN-1);
569-
ereport(NOTICE,
570-
(errcode(ERRCODE_NAME_TOO_LONG),
571-
errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
572-
ident, len, ident)));
573-
ident[len] = '\0';
574-
}
546+
ident = downcase_truncate_identifier(yytext, yyleng, true);
575547
yylval.str = ident;
576548
return IDENT;
577549
}

src/backend/parser/scansup.c

Lines changed: 76 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/parser/scansup.c,v 1.25 2003/11/29 19:51:52 pgsql Exp $
12+
* $PostgreSQL: pgsql/src/backend/parser/scansup.c,v 1.26 2004/02/21 00:34:53 tgl Exp $
1313
*
1414
*-------------------------------------------------------------------------
1515
*/
@@ -19,6 +19,8 @@
1919

2020
#include "miscadmin.h"
2121
#include "parser/scansup.h"
22+
#include "mb/pg_wchar.h"
23+
2224

2325
/* ----------------
2426
* scanstr
@@ -32,7 +34,7 @@
3234
*/
3335

3436
char *
35-
scanstr(char *s)
37+
scanstr(const char *s)
3638
{
3739
char *newStr;
3840
int len,
@@ -109,3 +111,75 @@ scanstr(char *s)
109111
newStr[j] = '\0';
110112
return newStr;
111113
}
114+
115+
116+
/*
117+
* downcase_truncate_identifier() --- do appropriate downcasing and
118+
* truncation of an unquoted identifier. Optionally warn of truncation.
119+
*
120+
* Returns a palloc'd string containing the adjusted identifier.
121+
*
122+
* Note: in some usages the passed string is not null-terminated.
123+
*
124+
* Note: the API of this function is designed to allow for downcasing
125+
* transformations that increase the string length, but we don't yet
126+
* support that. If you want to implement it, you'll need to fix
127+
* SplitIdentifierString() in utils/adt/varlena.c.
128+
*/
129+
char *
130+
downcase_truncate_identifier(const char *ident, int len, bool warn)
131+
{
132+
char *result;
133+
int i;
134+
135+
result = palloc(len + 1);
136+
/*
137+
* SQL99 specifies Unicode-aware case normalization, which we don't yet
138+
* have the infrastructure for. Instead we use tolower() to provide a
139+
* locale-aware translation. However, there are some locales where this
140+
* is not right either (eg, Turkish may do strange things with 'i' and
141+
* 'I'). Our current compromise is to use tolower() for characters with
142+
* the high bit set, and use an ASCII-only downcasing for 7-bit
143+
* characters.
144+
*/
145+
for (i = 0; i < len; i++)
146+
{
147+
unsigned char ch = (unsigned char) ident[i];
148+
149+
if (ch >= 'A' && ch <= 'Z')
150+
ch += 'a' - 'A';
151+
else if (ch >= 0x80 && isupper(ch))
152+
ch = tolower(ch);
153+
result[i] = (char) ch;
154+
}
155+
result[i] = '\0';
156+
157+
if (i >= NAMEDATALEN)
158+
truncate_identifier(result, i, warn);
159+
160+
return result;
161+
}
162+
163+
/*
164+
* truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
165+
*
166+
* The given string is modified in-place, if necessary. A warning is
167+
* issued if requested.
168+
*
169+
* We require the caller to pass in the string length since this saves a
170+
* strlen() call in some common usages.
171+
*/
172+
void
173+
truncate_identifier(char *ident, int len, bool warn)
174+
{
175+
if (len >= NAMEDATALEN)
176+
{
177+
len = pg_mbcliplen(ident, len, NAMEDATALEN-1);
178+
if (warn)
179+
ereport(NOTICE,
180+
(errcode(ERRCODE_NAME_TOO_LONG),
181+
errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
182+
ident, len, ident)));
183+
ident[len] = '\0';
184+
}
185+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy