Skip to content

Commit 72fe6d2

Browse files
committed
Make collation not depend on setlocale().
Now that the result of pg_newlocale_from_collation() is always non-NULL, then we can move the collate_is_c and ctype_is_c flags into pg_locale_t. That simplifies the logic in lc_collate_is_c() and lc_ctype_is_c(), removing the dependence on setlocale(). This commit also eliminates the multi-stage initialization of the collation cache. As long as we have catalog access, then it's now safe to call pg_newlocale_from_collation() without checking lc_collate_is_c() first. Discussion: https://postgr.es/m/cfd9eb85-c52a-4ec9-a90e-a5e4de56e57d@eisentraut.org Reviewed-by: Peter Eisentraut, Andreas Karlsson
1 parent 9b282a9 commit 72fe6d2

File tree

4 files changed

+81
-154
lines changed

4 files changed

+81
-154
lines changed

src/backend/utils/adt/pg_locale.c

Lines changed: 26 additions & 154 deletions
Original file line numberDiff line numberDiff line change
@@ -128,9 +128,6 @@ static bool CurrentLCTimeValid = false;
128128
typedef struct
129129
{
130130
Oid collid; /* hash key: pg_collation OID */
131-
bool collate_is_c; /* is collation's LC_COLLATE C? */
132-
bool ctype_is_c; /* is collation's LC_CTYPE C? */
133-
bool flags_valid; /* true if above flags are valid */
134131
pg_locale_t locale; /* locale_t struct, or 0 if not valid */
135132

136133
/* needed for simplehash */
@@ -1225,29 +1222,13 @@ IsoLocaleName(const char *winlocname)
12251222
/*
12261223
* Cache mechanism for collation information.
12271224
*
1228-
* We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1229-
* (or POSIX), so we can optimize a few code paths in various places.
1230-
* For the built-in C and POSIX collations, we can know that without even
1231-
* doing a cache lookup, but we want to support aliases for C/POSIX too.
1232-
* For the "default" collation, there are separate static cache variables,
1233-
* since consulting the pg_collation catalog doesn't tell us what we need.
1234-
*
1235-
* Also, if a pg_locale_t has been requested for a collation, we cache that
1236-
* for the life of a backend.
1237-
*
1238-
* Note that some code relies on the flags not reporting false negatives
1239-
* (that is, saying it's not C when it is). For example, char2wchar()
1240-
* could fail if the locale is C, so str_tolower() shouldn't call it
1241-
* in that case.
1242-
*
12431225
* Note that we currently lack any way to flush the cache. Since we don't
12441226
* support ALTER COLLATION, this is OK. The worst case is that someone
12451227
* drops a collation, and a useless cache entry hangs around in existing
12461228
* backends.
12471229
*/
1248-
12491230
static collation_cache_entry *
1250-
lookup_collation_cache(Oid collation, bool set_flags)
1231+
lookup_collation_cache(Oid collation)
12511232
{
12521233
collation_cache_entry *cache_entry;
12531234
bool found;
@@ -1271,59 +1252,9 @@ lookup_collation_cache(Oid collation, bool set_flags)
12711252
* Make sure cache entry is marked invalid, in case we fail before
12721253
* setting things.
12731254
*/
1274-
cache_entry->flags_valid = false;
12751255
cache_entry->locale = 0;
12761256
}
12771257

1278-
if (set_flags && !cache_entry->flags_valid)
1279-
{
1280-
/* Attempt to set the flags */
1281-
HeapTuple tp;
1282-
Form_pg_collation collform;
1283-
1284-
tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
1285-
if (!HeapTupleIsValid(tp))
1286-
elog(ERROR, "cache lookup failed for collation %u", collation);
1287-
collform = (Form_pg_collation) GETSTRUCT(tp);
1288-
1289-
if (collform->collprovider == COLLPROVIDER_BUILTIN)
1290-
{
1291-
Datum datum;
1292-
const char *colllocale;
1293-
1294-
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
1295-
colllocale = TextDatumGetCString(datum);
1296-
1297-
cache_entry->collate_is_c = true;
1298-
cache_entry->ctype_is_c = (strcmp(colllocale, "C") == 0);
1299-
}
1300-
else if (collform->collprovider == COLLPROVIDER_LIBC)
1301-
{
1302-
Datum datum;
1303-
const char *collcollate;
1304-
const char *collctype;
1305-
1306-
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collcollate);
1307-
collcollate = TextDatumGetCString(datum);
1308-
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
1309-
collctype = TextDatumGetCString(datum);
1310-
1311-
cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
1312-
(strcmp(collcollate, "POSIX") == 0));
1313-
cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
1314-
(strcmp(collctype, "POSIX") == 0));
1315-
}
1316-
else
1317-
{
1318-
cache_entry->collate_is_c = false;
1319-
cache_entry->ctype_is_c = false;
1320-
}
1321-
1322-
cache_entry->flags_valid = true;
1323-
1324-
ReleaseSysCache(tp);
1325-
}
1326-
13271258
return cache_entry;
13281259
}
13291260

@@ -1341,47 +1272,6 @@ lc_collate_is_c(Oid collation)
13411272
if (!OidIsValid(collation))
13421273
return false;
13431274

1344-
/*
1345-
* If we're asked about the default collation, we have to inquire of the C
1346-
* library. Cache the result so we only have to compute it once.
1347-
*/
1348-
if (collation == DEFAULT_COLLATION_OID)
1349-
{
1350-
static int result = -1;
1351-
const char *localeptr;
1352-
1353-
if (result >= 0)
1354-
return (bool) result;
1355-
1356-
if (default_locale.provider == COLLPROVIDER_BUILTIN)
1357-
{
1358-
result = true;
1359-
return (bool) result;
1360-
}
1361-
else if (default_locale.provider == COLLPROVIDER_ICU)
1362-
{
1363-
result = false;
1364-
return (bool) result;
1365-
}
1366-
else if (default_locale.provider == COLLPROVIDER_LIBC)
1367-
{
1368-
localeptr = setlocale(LC_CTYPE, NULL);
1369-
if (!localeptr)
1370-
elog(ERROR, "invalid LC_CTYPE setting");
1371-
}
1372-
else
1373-
elog(ERROR, "unexpected collation provider '%c'",
1374-
default_locale.provider);
1375-
1376-
if (strcmp(localeptr, "C") == 0)
1377-
result = true;
1378-
else if (strcmp(localeptr, "POSIX") == 0)
1379-
result = true;
1380-
else
1381-
result = false;
1382-
return (bool) result;
1383-
}
1384-
13851275
/*
13861276
* If we're asked about the built-in C/POSIX collations, we know that.
13871277
*/
@@ -1392,7 +1282,7 @@ lc_collate_is_c(Oid collation)
13921282
/*
13931283
* Otherwise, we have to consult pg_collation, but we cache that.
13941284
*/
1395-
return (lookup_collation_cache(collation, true))->collate_is_c;
1285+
return pg_newlocale_from_collation(collation)->collate_is_c;
13961286
}
13971287

13981288
/*
@@ -1408,46 +1298,6 @@ lc_ctype_is_c(Oid collation)
14081298
if (!OidIsValid(collation))
14091299
return false;
14101300

1411-
/*
1412-
* If we're asked about the default collation, we have to inquire of the C
1413-
* library. Cache the result so we only have to compute it once.
1414-
*/
1415-
if (collation == DEFAULT_COLLATION_OID)
1416-
{
1417-
static int result = -1;
1418-
const char *localeptr;
1419-
1420-
if (result >= 0)
1421-
return (bool) result;
1422-
1423-
if (default_locale.provider == COLLPROVIDER_BUILTIN)
1424-
{
1425-
localeptr = default_locale.info.builtin.locale;
1426-
}
1427-
else if (default_locale.provider == COLLPROVIDER_ICU)
1428-
{
1429-
result = false;
1430-
return (bool) result;
1431-
}
1432-
else if (default_locale.provider == COLLPROVIDER_LIBC)
1433-
{
1434-
localeptr = setlocale(LC_CTYPE, NULL);
1435-
if (!localeptr)
1436-
elog(ERROR, "invalid LC_CTYPE setting");
1437-
}
1438-
else
1439-
elog(ERROR, "unexpected collation provider '%c'",
1440-
default_locale.provider);
1441-
1442-
if (strcmp(localeptr, "C") == 0)
1443-
result = true;
1444-
else if (strcmp(localeptr, "POSIX") == 0)
1445-
result = true;
1446-
else
1447-
result = false;
1448-
return (bool) result;
1449-
}
1450-
14511301
/*
14521302
* If we're asked about the built-in C/POSIX collations, we know that.
14531303
*/
@@ -1458,7 +1308,7 @@ lc_ctype_is_c(Oid collation)
14581308
/*
14591309
* Otherwise, we have to consult pg_collation, but we cache that.
14601310
*/
1461-
return (lookup_collation_cache(collation, true))->ctype_is_c;
1311+
return pg_newlocale_from_collation(collation)->ctype_is_c;
14621312
}
14631313

14641314
/* simple subroutine for reporting errors from newlocale() */
@@ -1647,6 +1497,9 @@ init_database_collation(void)
16471497

16481498
builtin_validate_locale(dbform->encoding, datlocale);
16491499

1500+
default_locale.collate_is_c = true;
1501+
default_locale.ctype_is_c = (strcmp(datlocale, "C") == 0);
1502+
16501503
default_locale.info.builtin.locale = MemoryContextStrdup(
16511504
TopMemoryContext, datlocale);
16521505
}
@@ -1658,6 +1511,9 @@ init_database_collation(void)
16581511
datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datlocale);
16591512
datlocale = TextDatumGetCString(datum);
16601513

1514+
default_locale.collate_is_c = false;
1515+
default_locale.ctype_is_c = false;
1516+
16611517
datum = SysCacheGetAttr(DATABASEOID, tup, Anum_pg_database_daticurules, &isnull);
16621518
if (!isnull)
16631519
icurules = TextDatumGetCString(datum);
@@ -1678,6 +1534,11 @@ init_database_collation(void)
16781534
datum = SysCacheGetAttrNotNull(DATABASEOID, tup, Anum_pg_database_datctype);
16791535
datctype = TextDatumGetCString(datum);
16801536

1537+
default_locale.collate_is_c = (strcmp(datcollate, "C") == 0) ||
1538+
(strcmp(datcollate, "POSIX") == 0);
1539+
default_locale.ctype_is_c = (strcmp(datctype, "C") == 0) ||
1540+
(strcmp(datctype, "POSIX") == 0);
1541+
16811542
make_libc_collator(datcollate, datctype, &default_locale);
16821543
}
16831544

@@ -1712,7 +1573,7 @@ pg_newlocale_from_collation(Oid collid)
17121573
if (collid == DEFAULT_COLLATION_OID)
17131574
return &default_locale;
17141575

1715-
cache_entry = lookup_collation_cache(collid, false);
1576+
cache_entry = lookup_collation_cache(collid);
17161577

17171578
if (cache_entry->locale == 0)
17181579
{
@@ -1741,6 +1602,9 @@ pg_newlocale_from_collation(Oid collid)
17411602
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
17421603
locstr = TextDatumGetCString(datum);
17431604

1605+
result.collate_is_c = true;
1606+
result.ctype_is_c = (strcmp(locstr, "C") == 0);
1607+
17441608
builtin_validate_locale(GetDatabaseEncoding(), locstr);
17451609

17461610
result.info.builtin.locale = MemoryContextStrdup(TopMemoryContext,
@@ -1756,6 +1620,11 @@ pg_newlocale_from_collation(Oid collid)
17561620
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_collctype);
17571621
collctype = TextDatumGetCString(datum);
17581622

1623+
result.collate_is_c = (strcmp(collcollate, "C") == 0) ||
1624+
(strcmp(collcollate, "POSIX") == 0);
1625+
result.ctype_is_c = (strcmp(collctype, "C") == 0) ||
1626+
(strcmp(collctype, "POSIX") == 0);
1627+
17591628
make_libc_collator(collcollate, collctype, &result);
17601629
}
17611630
else if (collform->collprovider == COLLPROVIDER_ICU)
@@ -1766,6 +1635,9 @@ pg_newlocale_from_collation(Oid collid)
17661635
datum = SysCacheGetAttrNotNull(COLLOID, tp, Anum_pg_collation_colllocale);
17671636
iculocstr = TextDatumGetCString(datum);
17681637

1638+
result.collate_is_c = false;
1639+
result.ctype_is_c = false;
1640+
17691641
datum = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collicurules, &isnull);
17701642
if (!isnull)
17711643
icurules = TextDatumGetCString(datum);

src/include/utils/pg_locale.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,25 @@ extern void cache_locale_time(void);
6969
/*
7070
* We use a discriminated union to hold either a locale_t or an ICU collator.
7171
* pg_locale_t is occasionally checked for truth, so make it a pointer.
72+
*
73+
* Also, hold two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
74+
* (or POSIX), so we can optimize a few code paths in various places. For the
75+
* built-in C and POSIX collations, we can know that without even doing a
76+
* cache lookup, but we want to support aliases for C/POSIX too. For the
77+
* "default" collation, there are separate static cache variables, since
78+
* consulting the pg_collation catalog doesn't tell us what we need.
79+
*
80+
* Note that some code relies on the flags not reporting false negatives
81+
* (that is, saying it's not C when it is). For example, char2wchar()
82+
* could fail if the locale is C, so str_tolower() shouldn't call it
83+
* in that case.
7284
*/
7385
struct pg_locale_struct
7486
{
7587
char provider;
7688
bool deterministic;
89+
bool collate_is_c;
90+
bool ctype_is_c;
7791
union
7892
{
7993
struct

src/test/regress/expected/collate.utf8.out

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,32 @@ SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
99
\endif
1010
SET client_encoding TO UTF8;
1111
--
12+
-- Test builtin "C"
13+
--
14+
CREATE COLLATION regress_builtin_c (
15+
provider = builtin, locale = 'C');
16+
-- non-ASCII characters are unchanged
17+
SELECT LOWER(U&'\00C1' COLLATE regress_builtin_c) = U&'\00C1';
18+
?column?
19+
----------
20+
t
21+
(1 row)
22+
23+
SELECT UPPER(U&'\00E1' COLLATE regress_builtin_c) = U&'\00E1';
24+
?column?
25+
----------
26+
t
27+
(1 row)
28+
29+
-- non-ASCII characters are not alphabetic
30+
SELECT U&'\00C1\00E1' !~ '[[:alpha:]]' COLLATE regress_builtin_c;
31+
?column?
32+
----------
33+
t
34+
(1 row)
35+
36+
DROP COLLATION regress_builtin_c;
37+
--
1238
-- Test PG_C_UTF8
1339
--
1440
CREATE COLLATION regress_pg_c_utf8 (

src/test/regress/sql/collate.utf8.sql

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,21 @@ SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
1111

1212
SET client_encoding TO UTF8;
1313

14+
--
15+
-- Test builtin "C"
16+
--
17+
CREATE COLLATION regress_builtin_c (
18+
provider = builtin, locale = 'C');
19+
20+
-- non-ASCII characters are unchanged
21+
SELECT LOWER(U&'\00C1' COLLATE regress_builtin_c) = U&'\00C1';
22+
SELECT UPPER(U&'\00E1' COLLATE regress_builtin_c) = U&'\00E1';
23+
24+
-- non-ASCII characters are not alphabetic
25+
SELECT U&'\00C1\00E1' !~ '[[:alpha:]]' COLLATE regress_builtin_c;
26+
27+
DROP COLLATION regress_builtin_c;
28+
1429
--
1530
-- Test PG_C_UTF8
1631
--

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy