Skip to content

Commit a890ad2

Browse files
committed
selfuncs.c: use pg_strxfrm() instead of strxfrm().
pg_strxfrm() takes a pg_locale_t, so it works properly with all providers. This improves estimates for ICU when performing linear interpolation within a histogram bin. Previously, convert_string_datum() always used strxfrm() and relied on setlocale(). That did not produce good estimates for non-default or non-libc collations. Discussion: https://postgr.es/m/89475ee5487d795124f4e25118ea8f1853edb8cb.camel@j-davis.com
1 parent a54d4ed commit a890ad2

File tree

2 files changed

+25
-11
lines changed

2 files changed

+25
-11
lines changed

src/backend/utils/adt/pg_locale.c

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2124,14 +2124,7 @@ pg_strxfrm_libc(char *dest, const char *src, size_t destsize,
21242124
pg_locale_t locale)
21252125
{
21262126
Assert(locale->provider == COLLPROVIDER_LIBC);
2127-
2128-
#ifdef TRUST_STRXFRM
21292127
return strxfrm_l(dest, src, destsize, locale->info.lt);
2130-
#else
2131-
/* shouldn't happen */
2132-
PGLOCALE_SUPPORT_ERROR(locale->provider);
2133-
return 0; /* keep compiler quiet */
2134-
#endif
21352128
}
21362129

21372130
static size_t
@@ -2340,6 +2333,10 @@ pg_strxfrm_enabled(pg_locale_t locale)
23402333
* The provided 'src' must be nul-terminated. If 'destsize' is zero, 'dest'
23412334
* may be NULL.
23422335
*
2336+
* Not all providers support pg_strxfrm() safely. The caller should check
2337+
* pg_strxfrm_enabled() first, otherwise this function may return wrong
2338+
* results or an error.
2339+
*
23432340
* Returns the number of bytes needed (or more) to store the transformed
23442341
* string, excluding the terminating nul byte. If the value returned is
23452342
* 'destsize' or greater, the resulting contents of 'dest' are undefined.
@@ -2372,6 +2369,10 @@ pg_strxfrm(char *dest, const char *src, size_t destsize, pg_locale_t locale)
23722369
* 'src' does not need to be nul-terminated. If 'destsize' is zero, 'dest' may
23732370
* be NULL.
23742371
*
2372+
* Not all providers support pg_strnxfrm() safely. The caller should check
2373+
* pg_strxfrm_enabled() first, otherwise this function may return wrong
2374+
* results or an error.
2375+
*
23752376
* Returns the number of bytes needed (or more) to store the transformed
23762377
* string, excluding the terminating nul byte. If the value returned is
23772378
* 'destsize' or greater, the resulting contents of 'dest' are undefined.
@@ -2426,6 +2427,10 @@ pg_strxfrm_prefix_enabled(pg_locale_t locale)
24262427
*
24272428
* The provided 'src' must be nul-terminated.
24282429
*
2430+
* Not all providers support pg_strxfrm_prefix() safely. The caller should
2431+
* check pg_strxfrm_prefix_enabled() first, otherwise this function may return
2432+
* wrong results or an error.
2433+
*
24292434
* If destsize is not large enough to hold the resulting byte sequence, stores
24302435
* only the first destsize bytes in 'dest'. Returns the number of bytes
24312436
* actually copied to 'dest'.
@@ -2455,6 +2460,10 @@ pg_strxfrm_prefix(char *dest, const char *src, size_t destsize,
24552460
*
24562461
* The provided 'src' must be nul-terminated.
24572462
*
2463+
* Not all providers support pg_strnxfrm_prefix() safely. The caller should
2464+
* check pg_strxfrm_prefix_enabled() first, otherwise this function may return
2465+
* wrong results or an error.
2466+
*
24582467
* If destsize is not large enough to hold the resulting byte sequence, stores
24592468
* only the first destsize bytes in 'dest'. Returns the number of bytes
24602469
* actually copied to 'dest'.

src/backend/utils/adt/selfuncs.c

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -4639,7 +4639,7 @@ convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
46394639
* On failure (e.g., unsupported typid), set *failure to true;
46404640
* otherwise, that variable is not changed. (We'll return NULL on failure.)
46414641
*
4642-
* When using a non-C locale, we must pass the string through strxfrm()
4642+
* When using a non-C locale, we must pass the string through pg_strxfrm()
46434643
* before continuing, so as to generate correct locale-specific results.
46444644
*/
46454645
static char *
@@ -4673,20 +4673,25 @@ convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
46734673

46744674
if (!lc_collate_is_c(collid))
46754675
{
4676+
pg_locale_t mylocale = pg_newlocale_from_collation(collid);
46764677
char *xfrmstr;
46774678
size_t xfrmlen;
46784679
size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
46794680

46804681
/*
46814682
* XXX: We could guess at a suitable output buffer size and only call
4682-
* strxfrm twice if our guess is too small.
4683+
* pg_strxfrm() twice if our guess is too small.
46834684
*
46844685
* XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
46854686
* bogus data or set an error. This is not really a problem unless it
46864687
* crashes since it will only give an estimation error and nothing
46874688
* fatal.
4689+
*
4690+
* XXX: we do not check pg_strxfrm_enabled(). On some platforms and in
4691+
* some cases, libc strxfrm() may return the wrong results, but that
4692+
* will only lead to an estimation error.
46884693
*/
4689-
xfrmlen = strxfrm(NULL, val, 0);
4694+
xfrmlen = pg_strxfrm(NULL, val, 0, mylocale);
46904695
#ifdef WIN32
46914696

46924697
/*
@@ -4698,7 +4703,7 @@ convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
46984703
return val;
46994704
#endif
47004705
xfrmstr = (char *) palloc(xfrmlen + 1);
4701-
xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
4706+
xfrmlen2 = pg_strxfrm(xfrmstr, val, xfrmlen + 1, mylocale);
47024707

47034708
/*
47044709
* Some systems (e.g., glibc) can return a smaller value from the

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy