Skip to content

Commit f7faa99

Browse files
committed
ICU: do not convert locale 'C' to 'en-US-u-va-posix'.
The conversion was intended to be for convenience, but it's more likely to be confusing than useful. The user can still directly specify 'en-US-u-va-posix' if desired. Discussion: https://postgr.es/m/f83f089ee1e9acd5dbbbf3353294d24e1f196e95.camel@j-davis.com Discussion: https://postgr.es/m/37520ec1ae9591f83132f82dbd625f3fc2d69c16.camel@j-davis.com
1 parent 8d525d7 commit f7faa99

File tree

4 files changed

+14
-34
lines changed

4 files changed

+14
-34
lines changed

src/backend/utils/adt/pg_locale.c

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2782,26 +2782,10 @@ icu_language_tag(const char *loc_str, int elevel)
27822782
{
27832783
#ifdef USE_ICU
27842784
UErrorCode status;
2785-
char lang[ULOC_LANG_CAPACITY];
27862785
char *langtag;
27872786
size_t buflen = 32; /* arbitrary starting buffer size */
27882787
const bool strict = true;
27892788

2790-
status = U_ZERO_ERROR;
2791-
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
2792-
if (U_FAILURE(status))
2793-
{
2794-
if (elevel > 0)
2795-
ereport(elevel,
2796-
(errmsg("could not get language from locale \"%s\": %s",
2797-
loc_str, u_errorName(status))));
2798-
return NULL;
2799-
}
2800-
2801-
/* C/POSIX locales aren't handled by uloc_getLanguageTag() */
2802-
if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
2803-
return pstrdup("en-US-u-va-posix");
2804-
28052789
/*
28062790
* A BCP47 language tag doesn't have a clearly-defined upper limit
28072791
* (cf. RFC5646 section 4.4). Additionally, in older ICU versions,
@@ -2889,8 +2873,7 @@ icu_validate_locale(const char *loc_str)
28892873

28902874
/* check for special language name */
28912875
if (strcmp(lang, "") == 0 ||
2892-
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
2893-
strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
2876+
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
28942877
found = true;
28952878

28962879
/* search for matching language within ICU */

src/bin/initdb/initdb.c

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2238,24 +2238,10 @@ icu_language_tag(const char *loc_str)
22382238
{
22392239
#ifdef USE_ICU
22402240
UErrorCode status;
2241-
char lang[ULOC_LANG_CAPACITY];
22422241
char *langtag;
22432242
size_t buflen = 32; /* arbitrary starting buffer size */
22442243
const bool strict = true;
22452244

2246-
status = U_ZERO_ERROR;
2247-
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
2248-
if (U_FAILURE(status))
2249-
{
2250-
pg_fatal("could not get language from locale \"%s\": %s",
2251-
loc_str, u_errorName(status));
2252-
return NULL;
2253-
}
2254-
2255-
/* C/POSIX locales aren't handled by uloc_getLanguageTag() */
2256-
if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
2257-
return pstrdup("en-US-u-va-posix");
2258-
22592245
/*
22602246
* A BCP47 language tag doesn't have a clearly-defined upper limit
22612247
* (cf. RFC5646 section 4.4). Additionally, in older ICU versions,
@@ -2327,8 +2313,7 @@ icu_validate_locale(const char *loc_str)
23272313

23282314
/* check for special language name */
23292315
if (strcmp(lang, "") == 0 ||
2330-
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
2331-
strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
2316+
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
23322317
found = true;
23332318

23342319
/* search for matching language within ICU */

src/test/regress/expected/collate.icu.utf8.out

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,7 @@ CREATE ROLE regress_test_role;
10201020
CREATE SCHEMA test_schema;
10211021
-- We need to do this this way to cope with varying names for encodings:
10221022
SET client_min_messages TO WARNING;
1023+
SET icu_validation_level = disabled;
10231024
do $$
10241025
BEGIN
10251026
EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
@@ -1034,17 +1035,24 @@ BEGIN
10341035
quote_literal(current_setting('lc_collate')) || ');';
10351036
END
10361037
$$;
1038+
RESET icu_validation_level;
10371039
RESET client_min_messages;
10381040
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
10391041
ERROR: parameter "locale" must be specified
10401042
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); -- fails
10411043
ERROR: ICU locale "nonsense-nowhere" has unknown language "nonsense"
10421044
HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED.
1045+
CREATE COLLATION testx (provider = icu, locale = 'C'); -- fails
1046+
ERROR: could not convert locale name "C" to language tag: U_ILLEGAL_ARGUMENT_ERROR
10431047
CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); -- fails
10441048
ERROR: could not convert locale name "@colStrength=primary;nonsense=yes" to language tag: U_ILLEGAL_ARGUMENT_ERROR
10451049
SET icu_validation_level = WARNING;
10461050
CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); DROP COLLATION testx;
10471051
WARNING: could not convert locale name "@colStrength=primary;nonsense=yes" to language tag: U_ILLEGAL_ARGUMENT_ERROR
1052+
CREATE COLLATION testx (provider = icu, locale = 'C'); DROP COLLATION testx;
1053+
WARNING: could not convert locale name "C" to language tag: U_ILLEGAL_ARGUMENT_ERROR
1054+
WARNING: ICU locale "C" has unknown language "c"
1055+
HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED.
10481056
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); DROP COLLATION testx;
10491057
WARNING: ICU locale "nonsense-nowhere" has unknown language "nonsense"
10501058
HINT: To disable ICU locale validation, set parameter icu_validation_level to DISABLED.

src/test/regress/sql/collate.icu.utf8.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ CREATE SCHEMA test_schema;
358358

359359
-- We need to do this this way to cope with varying names for encodings:
360360
SET client_min_messages TO WARNING;
361+
SET icu_validation_level = disabled;
361362

362363
do $$
363364
BEGIN
@@ -373,13 +374,16 @@ BEGIN
373374
END
374375
$$;
375376

377+
RESET icu_validation_level;
376378
RESET client_min_messages;
377379

378380
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
379381
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); -- fails
382+
CREATE COLLATION testx (provider = icu, locale = 'C'); -- fails
380383
CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); -- fails
381384
SET icu_validation_level = WARNING;
382385
CREATE COLLATION testx (provider = icu, locale = '@colStrength=primary;nonsense=yes'); DROP COLLATION testx;
386+
CREATE COLLATION testx (provider = icu, locale = 'C'); DROP COLLATION testx;
383387
CREATE COLLATION testx (provider = icu, locale = 'nonsense-nowhere'); DROP COLLATION testx;
384388
RESET icu_validation_level;
385389

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy