Skip to content

Commit 869650f

Browse files
committed
Support language tags in older ICU versions (53 and earlier).
By calling uloc_canonicalize() before parsing the attributes, the existing locale attribute parsing logic works on language tags as well. Fix a small memory leak, too. Discussion: http://postgr.es/m/60da0cecfb512a78b8666b31631a636215d8ce73.camel@j-davis.com Reviewed-by: Peter Eisentraut
1 parent e8e1f96 commit 869650f

File tree

4 files changed

+50
-11
lines changed

4 files changed

+50
-11
lines changed

src/backend/commands/collationcmds.c

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -950,7 +950,6 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
950950
const char *name;
951951
char *langtag;
952952
char *icucomment;
953-
const char *iculocstr;
954953
Oid collid;
955954

956955
if (i == -1)
@@ -959,20 +958,19 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
959958
name = uloc_getAvailable(i);
960959

961960
langtag = get_icu_language_tag(name);
962-
iculocstr = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
963961

964962
/*
965963
* Be paranoid about not allowing any non-ASCII strings into
966964
* pg_collation
967965
*/
968-
if (!pg_is_ascii(langtag) || !pg_is_ascii(iculocstr))
966+
if (!pg_is_ascii(langtag))
969967
continue;
970968

971969
collid = CollationCreate(psprintf("%s-x-icu", langtag),
972970
nspid, GetUserId(),
973971
COLLPROVIDER_ICU, true, -1,
974-
NULL, NULL, iculocstr, NULL,
975-
get_collation_actual_version(COLLPROVIDER_ICU, iculocstr),
972+
NULL, NULL, langtag, NULL,
973+
get_collation_actual_version(COLLPROVIDER_ICU, langtag),
976974
true, true);
977975
if (OidIsValid(collid))
978976
{

src/backend/utils/adt/pg_locale.c

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2634,9 +2634,12 @@ icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
26342634
}
26352635

26362636
/*
2637-
* Parse collation attributes and apply them to the open collator. This takes
2638-
* a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
2639-
* applies the key-value arguments.
2637+
* Parse collation attributes from the given locale string and apply them to
2638+
* the open collator.
2639+
*
2640+
* First, the locale string is canonicalized to an ICU format locale ID such
2641+
* as "und@colStrength=primary;colCaseLevel=yes". Then, it parses and applies
2642+
* the key-value arguments.
26402643
*
26412644
* Starting with ICU version 54, the attributes are processed automatically by
26422645
* ucol_open(), so this is only necessary for emulating this behavior on older
@@ -2646,9 +2649,34 @@ pg_attribute_unused()
26462649
static void
26472650
icu_set_collation_attributes(UCollator *collator, const char *loc)
26482651
{
2649-
char *str = asc_tolower(loc, strlen(loc));
2652+
UErrorCode status;
2653+
int32_t len;
2654+
char *icu_locale_id;
2655+
char *lower_str;
2656+
char *str;
2657+
2658+
/*
2659+
* The input locale may be a BCP 47 language tag, e.g.
2660+
* "und-u-kc-ks-level1", which expresses the same attributes in a
2661+
* different form. It will be converted to the equivalent ICU format
2662+
* locale ID, e.g. "und@colcaselevel=yes;colstrength=primary", by
2663+
* uloc_canonicalize().
2664+
*/
2665+
status = U_ZERO_ERROR;
2666+
len = uloc_canonicalize(loc, NULL, 0, &status);
2667+
icu_locale_id = palloc(len + 1);
2668+
status = U_ZERO_ERROR;
2669+
len = uloc_canonicalize(loc, icu_locale_id, len + 1, &status);
2670+
if (U_FAILURE(status))
2671+
ereport(ERROR,
2672+
(errmsg("canonicalization failed for locale string \"%s\": %s",
2673+
loc, u_errorName(status))));
26502674

2651-
str = strchr(str, '@');
2675+
lower_str = asc_tolower(icu_locale_id, strlen(icu_locale_id));
2676+
2677+
pfree(icu_locale_id);
2678+
2679+
str = strchr(lower_str, '@');
26522680
if (!str)
26532681
return;
26542682
str++;
@@ -2663,7 +2691,6 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
26632691
char *value;
26642692
UColAttribute uattr;
26652693
UColAttributeValue uvalue;
2666-
UErrorCode status;
26672694

26682695
status = U_ZERO_ERROR;
26692696

@@ -2730,6 +2757,8 @@ icu_set_collation_attributes(UCollator *collator, const char *loc)
27302757
loc, u_errorName(status))));
27312758
}
27322759
}
2760+
2761+
pfree(lower_str);
27332762
}
27342763

27352764
#endif /* USE_ICU */

src/test/regress/expected/collate.icu.utf8.out

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1304,6 +1304,14 @@ SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_inse
13041304
t | t
13051305
(1 row)
13061306

1307+
-- test language tags
1308+
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
1309+
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
1310+
?column?
1311+
----------
1312+
t
1313+
(1 row)
1314+
13071315
CREATE TABLE test1cs (x text COLLATE case_sensitive);
13081316
CREATE TABLE test2cs (x text COLLATE case_sensitive);
13091317
CREATE TABLE test3cs (x text COLLATE case_sensitive);

src/test/regress/sql/collate.icu.utf8.sql

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,10 @@ CREATE COLLATION case_insensitive (provider = icu, locale = '@colStrength=second
518518
SELECT 'abc' <= 'ABC' COLLATE case_sensitive, 'abc' >= 'ABC' COLLATE case_sensitive;
519519
SELECT 'abc' <= 'ABC' COLLATE case_insensitive, 'abc' >= 'ABC' COLLATE case_insensitive;
520520

521+
-- test language tags
522+
CREATE COLLATION lt_insensitive (provider = icu, locale = 'en-u-ks-level1', deterministic = false);
523+
SELECT 'aBcD' COLLATE lt_insensitive = 'AbCd' COLLATE lt_insensitive;
524+
521525
CREATE TABLE test1cs (x text COLLATE case_sensitive);
522526
CREATE TABLE test2cs (x text COLLATE case_sensitive);
523527
CREATE TABLE test3cs (x text COLLATE case_sensitive);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy