Skip to content

Commit b615920

Browse files
committed
Fix memory leakage in ICU encoding conversion, and other code review.
Callers of icu_to_uchar() neglected to pfree the result string when done with it. This results in catastrophic memory leaks in varstr_cmp(), because of our prevailing assumption that btree comparison functions don't leak memory. For safety, make all the call sites clean up leaks, though I suspect that we could get away without it in formatting.c. I audited callers of icu_from_uchar() as well, but found no places that seemed to have a comparable issue. Add function API specifications for icu_to_uchar() and icu_from_uchar(); the lack of any thought-through specification is perhaps not unrelated to the existence of this bug in the first place. Fix icu_to_uchar() to guarantee a nul-terminated result; although no existing caller appears to care, the fact that it would have been nul-terminated except in extreme corner cases seems ideally designed to bite someone on the rear someday. Fix ucnv_fromUChars() destCapacity argument --- in the worst case, that could perhaps have led to a non-nul-terminated result, too. Fix icu_from_uchar() to have a more reasonable definition of the function result --- no callers are actually paying attention, so this isn't a live bug, but it's certainly sloppily designed. Const-ify icu_from_uchar()'s input string for consistency. That is not the end of what needs to be done to these functions, but it's as much as I have the patience for right now. Discussion: https://postgr.es/m/1955.1498181798@sss.pgh.pa.us
1 parent 8be8510 commit b615920

File tree

5 files changed

+60
-11
lines changed

5 files changed

+60
-11
lines changed

src/backend/commands/collationcmds.c

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -381,6 +381,10 @@ normalize_libc_locale_name(char *new, const char *old)
381381

382382

383383
#ifdef USE_ICU
384+
/*
385+
* Get the ICU language tag for a locale name.
386+
* The result is a palloc'd string.
387+
*/
384388
static char *
385389
get_icu_language_tag(const char *localename)
386390
{
@@ -397,7 +401,10 @@ get_icu_language_tag(const char *localename)
397401
return pstrdup(buf);
398402
}
399403

400-
404+
/*
405+
* Get a comment (specifically, the display name) for an ICU locale.
406+
* The result is a palloc'd string.
407+
*/
401408
static char *
402409
get_icu_locale_comment(const char *localename)
403410
{
@@ -407,10 +414,12 @@ get_icu_locale_comment(const char *localename)
407414
char *result;
408415

409416
status = U_ZERO_ERROR;
410-
len_uchar = uloc_getDisplayName(localename, "en", &displayname[0], sizeof(displayname), &status);
417+
len_uchar = uloc_getDisplayName(localename, "en",
418+
&displayname[0], sizeof(displayname),
419+
&status);
411420
if (U_FAILURE(status))
412421
ereport(ERROR,
413-
(errmsg("could get display name for locale \"%s\": %s",
422+
(errmsg("could not get display name for locale \"%s\": %s",
414423
localename, u_errorName(status))));
415424

416425
icu_from_uchar(&result, displayname, len_uchar);

src/backend/utils/adt/formatting.c

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1561,6 +1561,7 @@ str_tolower(const char *buff, size_t nbytes, Oid collid)
15611561
len_conv = icu_convert_case(u_strToLower, mylocale,
15621562
&buff_conv, buff_uchar, len_uchar);
15631563
icu_from_uchar(&result, buff_conv, len_conv);
1564+
pfree(buff_uchar);
15641565
}
15651566
else
15661567
#endif
@@ -1684,6 +1685,7 @@ str_toupper(const char *buff, size_t nbytes, Oid collid)
16841685
len_conv = icu_convert_case(u_strToUpper, mylocale,
16851686
&buff_conv, buff_uchar, len_uchar);
16861687
icu_from_uchar(&result, buff_conv, len_conv);
1688+
pfree(buff_uchar);
16871689
}
16881690
else
16891691
#endif
@@ -1808,6 +1810,7 @@ str_initcap(const char *buff, size_t nbytes, Oid collid)
18081810
len_conv = icu_convert_case(u_strToTitle_default_BI, mylocale,
18091811
&buff_conv, buff_uchar, len_uchar);
18101812
icu_from_uchar(&result, buff_conv, len_conv);
1813+
pfree(buff_uchar);
18111814
}
18121815
else
18131816
#endif

src/backend/utils/adt/pg_locale.c

Lines changed: 30 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1486,6 +1486,18 @@ init_icu_converter(void)
14861486
icu_converter = conv;
14871487
}
14881488

1489+
/*
1490+
* Convert a string in the database encoding into a string of UChars.
1491+
*
1492+
* The source string at buff is of length nbytes
1493+
* (it needn't be nul-terminated)
1494+
*
1495+
* *buff_uchar receives a pointer to the palloc'd result string, and
1496+
* the function's result is the number of UChars generated.
1497+
*
1498+
* The result string is nul-terminated, though most callers rely on the
1499+
* result length instead.
1500+
*/
14891501
int32_t
14901502
icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
14911503
{
@@ -1494,18 +1506,30 @@ icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
14941506

14951507
init_icu_converter();
14961508

1497-
len_uchar = 2 * nbytes; /* max length per docs */
1509+
len_uchar = 2 * nbytes + 1; /* max length per docs */
14981510
*buff_uchar = palloc(len_uchar * sizeof(**buff_uchar));
14991511
status = U_ZERO_ERROR;
1500-
len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar, buff, nbytes, &status);
1512+
len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar,
1513+
buff, nbytes, &status);
15011514
if (U_FAILURE(status))
15021515
ereport(ERROR,
15031516
(errmsg("ucnv_toUChars failed: %s", u_errorName(status))));
15041517
return len_uchar;
15051518
}
15061519

1520+
/*
1521+
* Convert a string of UChars into the database encoding.
1522+
*
1523+
* The source string at buff_uchar is of length len_uchar
1524+
* (it needn't be nul-terminated)
1525+
*
1526+
* *result receives a pointer to the palloc'd result string, and the
1527+
* function's result is the number of bytes generated (not counting nul).
1528+
*
1529+
* The result string is nul-terminated.
1530+
*/
15071531
int32_t
1508-
icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar)
1532+
icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
15091533
{
15101534
UErrorCode status;
15111535
int32_t len_result;
@@ -1515,13 +1539,14 @@ icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar)
15151539
len_result = UCNV_GET_MAX_BYTES_FOR_STRING(len_uchar, ucnv_getMaxCharSize(icu_converter));
15161540
*result = palloc(len_result + 1);
15171541
status = U_ZERO_ERROR;
1518-
ucnv_fromUChars(icu_converter, *result, len_result, buff_uchar, len_uchar, &status);
1542+
len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
1543+
buff_uchar, len_uchar, &status);
15191544
if (U_FAILURE(status))
15201545
ereport(ERROR,
15211546
(errmsg("ucnv_fromUChars failed: %s", u_errorName(status))));
15221547
return len_result;
15231548
}
1524-
#endif
1549+
#endif /* USE_ICU */
15251550

15261551
/*
15271552
* These functions convert from/to libc's wchar_t, *not* pg_wchar_t.

src/backend/utils/adt/varlena.c

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1569,6 +1569,9 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2, Oid collid)
15691569
result = ucol_strcoll(mylocale->info.icu.ucol,
15701570
uchar1, ulen1,
15711571
uchar2, ulen2);
1572+
1573+
pfree(uchar1);
1574+
pfree(uchar2);
15721575
}
15731576
#else /* not USE_ICU */
15741577
/* shouldn't happen */
@@ -2155,6 +2158,9 @@ varstrfastcmp_locale(Datum x, Datum y, SortSupport ssup)
21552158
result = ucol_strcoll(sss->locale->info.icu.ucol,
21562159
uchar1, ulen1,
21572160
uchar2, ulen2);
2161+
2162+
pfree(uchar1);
2163+
pfree(uchar2);
21582164
}
21592165
#else /* not USE_ICU */
21602166
/* shouldn't happen */
@@ -2279,7 +2285,7 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
22792285
Size bsize;
22802286
#ifdef USE_ICU
22812287
int32_t ulen = -1;
2282-
UChar *uchar;
2288+
UChar *uchar = NULL;
22832289
#endif
22842290

22852291
/*
@@ -2354,7 +2360,8 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
23542360
&status);
23552361
if (U_FAILURE(status))
23562362
ereport(ERROR,
2357-
(errmsg("sort key generation failed: %s", u_errorName(status))));
2363+
(errmsg("sort key generation failed: %s",
2364+
u_errorName(status))));
23582365
}
23592366
else
23602367
bsize = ucol_getSortKey(sss->locale->info.icu.ucol,
@@ -2394,6 +2401,11 @@ varstr_abbrev_convert(Datum original, SortSupport ssup)
23942401
* okay. See remarks on bytea case above.)
23952402
*/
23962403
memcpy(pres, sss->buf2, Min(sizeof(Datum), bsize));
2404+
2405+
#ifdef USE_ICU
2406+
if (uchar)
2407+
pfree(uchar);
2408+
#endif
23972409
}
23982410

23992411
/*

src/include/utils/pg_locale.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -93,7 +93,7 @@ extern char *get_collation_actual_version(char collprovider, const char *collcol
9393

9494
#ifdef USE_ICU
9595
extern int32_t icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes);
96-
extern int32_t icu_from_uchar(char **result, UChar *buff_uchar, int32_t len_uchar);
96+
extern int32_t icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar);
9797
#endif
9898

9999
/* These functions convert from/to libc's wchar_t, *not* pg_wchar_t */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy