Skip to content

Commit d87d548

Browse files
committed
Refactor to add pg_strcoll(), pg_strxfrm(), and variants.
Offers a generally better separation of responsibilities for collation code. Also, a step towards multi-lib ICU, which should be based on a clean separation of the routines required for collation providers. Callers with NUL-terminated strings should call pg_strcoll() or pg_strxfrm(); callers with strings and their length should call the variants pg_strncoll() or pg_strnxfrm(). Reviewed-by: Peter Eisentraut, Peter Geoghegan Discussion: https://postgr.es/m/a581136455c940d7bd0ff482d3a2bd51af25a94f.camel%40j-davis.com
1 parent e996073 commit d87d548

File tree

5 files changed

+871
-391
lines changed

5 files changed

+871
-391
lines changed

src/backend/access/hash/hashfunc.c

Lines changed: 34 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -292,21 +292,24 @@ hashtext(PG_FUNCTION_ARGS)
292292
#ifdef USE_ICU
293293
if (mylocale->provider == COLLPROVIDER_ICU)
294294
{
295-
int32_t ulen = -1;
296-
UChar *uchar = NULL;
297-
Size bsize;
298-
uint8_t *buf;
295+
Size bsize, rsize;
296+
char *buf;
297+
const char *keydata = VARDATA_ANY(key);
298+
size_t keylen = VARSIZE_ANY_EXHDR(key);
299299

300-
ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
300+
bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
301+
buf = palloc(bsize + 1);
301302

302-
bsize = ucol_getSortKey(mylocale->info.icu.ucol,
303-
uchar, ulen, NULL, 0);
304-
buf = palloc(bsize);
305-
ucol_getSortKey(mylocale->info.icu.ucol,
306-
uchar, ulen, buf, bsize);
307-
pfree(uchar);
303+
rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
304+
if (rsize != bsize)
305+
elog(ERROR, "pg_strnxfrm() returned unexpected result");
308306

309-
result = hash_any(buf, bsize);
307+
/*
308+
* In principle, there's no reason to include the terminating NUL
309+
* character in the hash, but it was done before and the behavior
310+
* must be preserved.
311+
*/
312+
result = hash_any((uint8_t *) buf, bsize + 1);
310313

311314
pfree(buf);
312315
}
@@ -350,21 +353,25 @@ hashtextextended(PG_FUNCTION_ARGS)
350353
#ifdef USE_ICU
351354
if (mylocale->provider == COLLPROVIDER_ICU)
352355
{
353-
int32_t ulen = -1;
354-
UChar *uchar = NULL;
355-
Size bsize;
356-
uint8_t *buf;
357-
358-
ulen = icu_to_uchar(&uchar, VARDATA_ANY(key), VARSIZE_ANY_EXHDR(key));
359-
360-
bsize = ucol_getSortKey(mylocale->info.icu.ucol,
361-
uchar, ulen, NULL, 0);
362-
buf = palloc(bsize);
363-
ucol_getSortKey(mylocale->info.icu.ucol,
364-
uchar, ulen, buf, bsize);
365-
pfree(uchar);
366-
367-
result = hash_any_extended(buf, bsize, PG_GETARG_INT64(1));
356+
Size bsize, rsize;
357+
char *buf;
358+
const char *keydata = VARDATA_ANY(key);
359+
size_t keylen = VARSIZE_ANY_EXHDR(key);
360+
361+
bsize = pg_strnxfrm(NULL, 0, keydata, keylen, mylocale);
362+
buf = palloc(bsize + 1);
363+
364+
rsize = pg_strnxfrm(buf, bsize + 1, keydata, keylen, mylocale);
365+
if (rsize != bsize)
366+
elog(ERROR, "pg_strnxfrm() returned unexpected result");
367+
368+
/*
369+
* In principle, there's no reason to include the terminating NUL
370+
* character in the hash, but it was done before and the behavior
371+
* must be preserved.
372+
*/
373+
result = hash_any_extended((uint8_t *) buf, bsize + 1,
374+
PG_GETARG_INT64(1));
368375

369376
pfree(buf);
370377
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy