Skip to content

Commit aa17c06

Browse files
committed
Add function to import operating system collations
Move this logic out of initdb into a user-callable function. This simplifies the code and makes it possible to update the standard collations later on if additional operating system collations appear. Reviewed-by: Andres Freund <andres@anarazel.de> Reviewed-by: Euler Taveira <euler@timbira.com.br>
1 parent 193a7d7 commit aa17c06

File tree

8 files changed

+229
-172
lines changed

8 files changed

+229
-172
lines changed

doc/src/sgml/charset.sgml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,7 @@ SELECT * FROM test1 ORDER BY a || b COLLATE "fr_FR";
496496
</para>
497497
</sect2>
498498

499-
<sect2>
499+
<sect2 id="collation-managing">
500500
<title>Managing Collations</title>
501501

502502
<para>

doc/src/sgml/func.sgml

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19190,6 +19190,46 @@ postgres=# SELECT * FROM pg_xlogfile_name_offset(pg_stop_backup());
1919019190
in the database's default tablespace, the tablespace can be specified as 0.
1919119191
</para>
1919219192

19193+
<para>
19194+
<xref linkend="functions-admin-collation"> lists functions used to manage
19195+
collations.
19196+
</para>
19197+
19198+
<table id="functions-admin-collation">
19199+
<title>Collation Management Functions</title>
19200+
<tgroup cols="3">
19201+
<thead>
19202+
<row><entry>Name</entry> <entry>Return Type</entry> <entry>Description</entry></row>
19203+
</thead>
19204+
19205+
<tbody>
19206+
<row>
19207+
<entry>
19208+
<indexterm><primary>pg_import_system_collations</primary></indexterm>
19209+
<literal><function>pg_import_system_collations(<parameter>if_not_exists</> <type>boolean</>, <parameter>schema</> <type>regnamespace</>)</function></literal>
19210+
</entry>
19211+
<entry><type>void</type></entry>
19212+
<entry>Import operating system collations</entry>
19213+
</row>
19214+
</tbody>
19215+
</tgroup>
19216+
</table>
19217+
19218+
<para>
19219+
<function>pg_import_system_collations</> populates the system
19220+
catalog <literal>pg_collation</literal> with collations based on all the
19221+
locales it finds on the operating system. This is
19222+
what <command>initdb</command> uses;
19223+
see <xref linkend="collation-managing"> for more details. If additional
19224+
locales are installed into the operating system later on, this function
19225+
can be run again to add collations for the new locales. In that case, the
19226+
parameter <parameter>if_not_exists</parameter> should be set to true to
19227+
skip over existing collations. The <parameter>schema</parameter>
19228+
parameter would typically be <literal>pg_catalog</literal>, but that is
19229+
not a requirement. (Collation objects based on locales that are no longer
19230+
present on the operating system are never removed by this function.)
19231+
</para>
19232+
1919319233
</sect2>
1919419234

1919519235
<sect2 id="functions-admin-index">

src/backend/catalog/pg_collation.c

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,8 @@ Oid
4141
CollationCreate(const char *collname, Oid collnamespace,
4242
Oid collowner,
4343
int32 collencoding,
44-
const char *collcollate, const char *collctype)
44+
const char *collcollate, const char *collctype,
45+
bool if_not_exists)
4546
{
4647
Relation rel;
4748
TupleDesc tupDesc;
@@ -72,10 +73,21 @@ CollationCreate(const char *collname, Oid collnamespace,
7273
PointerGetDatum(collname),
7374
Int32GetDatum(collencoding),
7475
ObjectIdGetDatum(collnamespace)))
75-
ereport(ERROR,
76+
{
77+
if (if_not_exists)
78+
{
79+
ereport(NOTICE,
7680
(errcode(ERRCODE_DUPLICATE_OBJECT),
77-
errmsg("collation \"%s\" for encoding \"%s\" already exists",
81+
errmsg("collation \"%s\" for encoding \"%s\" already exists, skipping",
7882
collname, pg_encoding_to_char(collencoding))));
83+
return InvalidOid;
84+
}
85+
else
86+
ereport(ERROR,
87+
(errcode(ERRCODE_DUPLICATE_OBJECT),
88+
errmsg("collation \"%s\" for encoding \"%s\" already exists",
89+
collname, pg_encoding_to_char(collencoding))));
90+
}
7991

8092
/*
8193
* Also forbid matching an any-encoding entry. This test of course is not
@@ -86,10 +98,21 @@ CollationCreate(const char *collname, Oid collnamespace,
8698
PointerGetDatum(collname),
8799
Int32GetDatum(-1),
88100
ObjectIdGetDatum(collnamespace)))
89-
ereport(ERROR,
101+
{
102+
if (if_not_exists)
103+
{
104+
ereport(NOTICE,
105+
(errcode(ERRCODE_DUPLICATE_OBJECT),
106+
errmsg("collation \"%s\" already exists, skipping",
107+
collname)));
108+
return InvalidOid;
109+
}
110+
else
111+
ereport(ERROR,
90112
(errcode(ERRCODE_DUPLICATE_OBJECT),
91113
errmsg("collation \"%s\" already exists",
92114
collname)));
115+
}
93116

94117
/* open pg_collation */
95118
rel = heap_open(CollationRelationId, RowExclusiveLock);

src/backend/commands/collationcmds.c

Lines changed: 153 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,11 @@ DefineCollation(ParseState *pstate, List *names, List *parameters)
136136
GetUserId(),
137137
GetDatabaseEncoding(),
138138
collcollate,
139-
collctype);
139+
collctype,
140+
false);
141+
142+
if (!OidIsValid(newoid))
143+
return InvalidObjectAddress;
140144

141145
ObjectAddressSet(address, CollationRelationId, newoid);
142146

@@ -177,3 +181,151 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid)
177181
errmsg("collation \"%s\" already exists in schema \"%s\"",
178182
collname, get_namespace_name(nspOid))));
179183
}
184+
185+
186+
/*
187+
* "Normalize" a locale name, stripping off encoding tags such as
188+
* ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
189+
* -> "br_FR@euro"). Return true if a new, different name was
190+
* generated.
191+
*/
192+
pg_attribute_unused()
193+
static bool
194+
normalize_locale_name(char *new, const char *old)
195+
{
196+
char *n = new;
197+
const char *o = old;
198+
bool changed = false;
199+
200+
while (*o)
201+
{
202+
if (*o == '.')
203+
{
204+
/* skip over encoding tag such as ".utf8" or ".UTF-8" */
205+
o++;
206+
while ((*o >= 'A' && *o <= 'Z')
207+
|| (*o >= 'a' && *o <= 'z')
208+
|| (*o >= '0' && *o <= '9')
209+
|| (*o == '-'))
210+
o++;
211+
changed = true;
212+
}
213+
else
214+
*n++ = *o++;
215+
}
216+
*n = '\0';
217+
218+
return changed;
219+
}
220+
221+
222+
Datum
223+
pg_import_system_collations(PG_FUNCTION_ARGS)
224+
{
225+
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
226+
bool if_not_exists = PG_GETARG_BOOL(0);
227+
Oid nspid = PG_GETARG_OID(1);
228+
229+
FILE *locale_a_handle;
230+
char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */
231+
int count = 0;
232+
#endif
233+
234+
if (!superuser())
235+
ereport(ERROR,
236+
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
237+
(errmsg("must be superuser to import system collations"))));
238+
239+
#if defined(HAVE_LOCALE_T) && !defined(WIN32)
240+
locale_a_handle = OpenPipeStream("locale -a", "r");
241+
if (locale_a_handle == NULL)
242+
ereport(ERROR,
243+
(errcode_for_file_access(),
244+
errmsg("could not execute command \"%s\": %m",
245+
"locale -a")));
246+
247+
while (fgets(localebuf, sizeof(localebuf), locale_a_handle))
248+
{
249+
int i;
250+
size_t len;
251+
int enc;
252+
bool skip;
253+
char alias[NAMEDATALEN];
254+
255+
len = strlen(localebuf);
256+
257+
if (len == 0 || localebuf[len - 1] != '\n')
258+
{
259+
elog(DEBUG1, "locale name too long, skipped: \"%s\"", localebuf);
260+
continue;
261+
}
262+
localebuf[len - 1] = '\0';
263+
264+
/*
265+
* Some systems have locale names that don't consist entirely of ASCII
266+
* letters (such as "bokm&aring;l" or "fran&ccedil;ais"). This is
267+
* pretty silly, since we need the locale itself to interpret the
268+
* non-ASCII characters. We can't do much with those, so we filter
269+
* them out.
270+
*/
271+
skip = false;
272+
for (i = 0; i < len; i++)
273+
{
274+
if (IS_HIGHBIT_SET(localebuf[i]))
275+
{
276+
skip = true;
277+
break;
278+
}
279+
}
280+
if (skip)
281+
{
282+
elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"", localebuf);
283+
continue;
284+
}
285+
286+
enc = pg_get_encoding_from_locale(localebuf, false);
287+
if (enc < 0)
288+
{
289+
/* error message printed by pg_get_encoding_from_locale() */
290+
continue;
291+
}
292+
if (!PG_VALID_BE_ENCODING(enc))
293+
continue; /* ignore locales for client-only encodings */
294+
if (enc == PG_SQL_ASCII)
295+
continue; /* C/POSIX are already in the catalog */
296+
297+
count++;
298+
299+
CollationCreate(localebuf, nspid, GetUserId(), enc,
300+
localebuf, localebuf, if_not_exists);
301+
302+
CommandCounterIncrement();
303+
304+
/*
305+
* Generate aliases such as "en_US" in addition to "en_US.utf8" for
306+
* ease of use. Note that collation names are unique per encoding
307+
* only, so this doesn't clash with "en_US" for LATIN1, say.
308+
*
309+
* This always runs in "if not exists" mode, to skip aliases that
310+
* conflict with an existing locale name for the same encoding. For
311+
* example, "br_FR.iso88591" is normalized to "br_FR", both for
312+
* encoding LATIN1. But the unnormalized locale "br_FR" already
313+
* exists for LATIN1.
314+
*/
315+
if (normalize_locale_name(alias, localebuf))
316+
{
317+
CollationCreate(alias, nspid, GetUserId(), enc,
318+
localebuf, localebuf, true);
319+
CommandCounterIncrement();
320+
}
321+
}
322+
323+
ClosePipeStream(locale_a_handle);
324+
325+
if (count == 0)
326+
ereport(ERROR,
327+
(errmsg("no usable system locales were found")));
328+
#endif /* not HAVE_LOCALE_T && not WIN32 */
329+
330+
PG_RETURN_VOID();
331+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy