Skip to content

Commit f3a01af

Browse files
committed
ICU: do not convert locale 'C' to 'en-US-u-va-posix'.
Older versions of ICU canonicalize "C" to "en-US-u-va-posix"; but starting in ICU version 64, the "C" locale is considered obsolete. Postgres commit ea1db8a introduced code to always canonicalize "C" to "en-US-u-va-posix" for consistency and convenience, but it was deemed too confusing. This commit removes that code, so that "C" is treated like other ICU locale names: canonicalization is attempted, and if it fails, the behavior is controlled by icu_validation_level. A similar change was previously committed as f7faa99, then reverted due to an ICU-version-dependent test failure. This commit un-reverts it, omitting the test because we now expect the behavior to depend on the version of ICU being used. Discussion: https://postgr.es/m/3a200aca-4672-4b37-fc91-5d198a323503%40eisentraut.org Discussion: https://postgr.es/m/f83f089ee1e9acd5dbbbf3353294d24e1f196e95.camel@j-davis.com Discussion: https://postgr.es/m/37520ec1ae9591f83132f82dbd625f3fc2d69c16.camel@j-davis.com
1 parent 2535c74 commit f3a01af

File tree

4 files changed

+6
-34
lines changed

4 files changed

+6
-34
lines changed

src/backend/utils/adt/pg_locale.c

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2784,26 +2784,10 @@ icu_language_tag(const char *loc_str, int elevel)
27842784
{
27852785
#ifdef USE_ICU
27862786
UErrorCode status;
2787-
char lang[ULOC_LANG_CAPACITY];
27882787
char *langtag;
27892788
size_t buflen = 32; /* arbitrary starting buffer size */
27902789
const bool strict = true;
27912790

2792-
status = U_ZERO_ERROR;
2793-
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
2794-
if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
2795-
{
2796-
if (elevel > 0)
2797-
ereport(elevel,
2798-
(errmsg("could not get language from locale \"%s\": %s",
2799-
loc_str, u_errorName(status))));
2800-
return NULL;
2801-
}
2802-
2803-
/* C/POSIX locales aren't handled by uloc_getLanguageTag() */
2804-
if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
2805-
return pstrdup("en-US-u-va-posix");
2806-
28072791
/*
28082792
* A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
28092793
* RFC5646 section 4.4). Additionally, in older ICU versions,
@@ -2884,8 +2868,7 @@ icu_validate_locale(const char *loc_str)
28842868

28852869
/* check for special language name */
28862870
if (strcmp(lang, "") == 0 ||
2887-
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
2888-
strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
2871+
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
28892872
found = true;
28902873

28912874
/* search for matching language within ICU */

src/bin/initdb/initdb.c

Lines changed: 1 addition & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -2244,24 +2244,10 @@ icu_language_tag(const char *loc_str)
22442244
{
22452245
#ifdef USE_ICU
22462246
UErrorCode status;
2247-
char lang[ULOC_LANG_CAPACITY];
22482247
char *langtag;
22492248
size_t buflen = 32; /* arbitrary starting buffer size */
22502249
const bool strict = true;
22512250

2252-
status = U_ZERO_ERROR;
2253-
uloc_getLanguage(loc_str, lang, ULOC_LANG_CAPACITY, &status);
2254-
if (U_FAILURE(status) || status == U_STRING_NOT_TERMINATED_WARNING)
2255-
{
2256-
pg_fatal("could not get language from locale \"%s\": %s",
2257-
loc_str, u_errorName(status));
2258-
return NULL;
2259-
}
2260-
2261-
/* C/POSIX locales aren't handled by uloc_getLanguageTag() */
2262-
if (strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
2263-
return pstrdup("en-US-u-va-posix");
2264-
22652251
/*
22662252
* A BCP47 language tag doesn't have a clearly-defined upper limit (cf.
22672253
* RFC5646 section 4.4). Additionally, in older ICU versions,
@@ -2326,8 +2312,7 @@ icu_validate_locale(const char *loc_str)
23262312

23272313
/* check for special language name */
23282314
if (strcmp(lang, "") == 0 ||
2329-
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0 ||
2330-
strcmp(lang, "c") == 0 || strcmp(lang, "posix") == 0)
2315+
strcmp(lang, "root") == 0 || strcmp(lang, "und") == 0)
23312316
found = true;
23322317

23332318
/* search for matching language within ICU */

src/test/regress/expected/collate.icu.utf8.out

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,7 @@ CREATE ROLE regress_test_role;
10201020
CREATE SCHEMA test_schema;
10211021
-- We need to do this this way to cope with varying names for encodings:
10221022
SET client_min_messages TO WARNING;
1023+
SET icu_validation_level = disabled;
10231024
do $$
10241025
BEGIN
10251026
EXECUTE 'CREATE COLLATION test0 (provider = icu, locale = ' ||
@@ -1034,6 +1035,7 @@ BEGIN
10341035
quote_literal((SELECT CASE WHEN datlocprovider='i' THEN daticulocale ELSE datcollate END FROM pg_database WHERE datname = current_database())) || ');';
10351036
END
10361037
$$;
1038+
RESET icu_validation_level;
10371039
RESET client_min_messages;
10381040
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"
10391041
ERROR: parameter "locale" must be specified

src/test/regress/sql/collate.icu.utf8.sql

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,7 @@ CREATE SCHEMA test_schema;
358358

359359
-- We need to do this this way to cope with varying names for encodings:
360360
SET client_min_messages TO WARNING;
361+
SET icu_validation_level = disabled;
361362

362363
do $$
363364
BEGIN
@@ -373,6 +374,7 @@ BEGIN
373374
END
374375
$$;
375376

377+
RESET icu_validation_level;
376378
RESET client_min_messages;
377379

378380
CREATE COLLATION test3 (provider = icu, lc_collate = 'en_US.utf8'); -- fail, needs "locale"

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy