Skip to content

Commit bf03cfd

Browse files
committed
Windows support in pg_import_system_collations
Windows can enumerate the locales that are either installed or supported by calling EnumSystemLocalesEx(), similar to what is already done in the READ_LOCALE_A_OUTPUT switch. We can refactor some of the logic already used in that switch into a new function create_collation_from_locale(). The enumerated locales have BCP 47 shape, that is with a hyphen between language and territory, instead of POSIX's underscore. The created collations will retain the BCP 47 shape, but we will also create a POSIX alias, so xx-YY will have an xx_YY alias. A new test collate.windows.win1252 is added that is like collate.linux.utf8. Author: Juan Jose Santamaria Flecha <juanjo.santamaria@gmail.com> Reviewed-by: Dmitry Koval <d.koval@postgrespro.ru> Reviewed-by: Peter Eisentraut <peter.eisentraut@enterprisedb.com> Discussion: https://www.postgresql.org/message-id/flat/0050ec23-34d9-2765-9015-98c04f0e18ac@postgrespro.ru
1 parent 33ab0a2 commit bf03cfd

File tree

6 files changed

+1650
-52
lines changed

6 files changed

+1650
-52
lines changed

src/backend/commands/collationcmds.c

Lines changed: 187 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -499,6 +499,12 @@ pg_collation_actual_version(PG_FUNCTION_ARGS)
499499
#define READ_LOCALE_A_OUTPUT
500500
#endif
501501

502+
/* will we use EnumSystemLocalesEx in pg_import_system_collations? */
503+
#ifdef WIN32
504+
#define ENUM_SYSTEM_LOCALE
505+
#endif
506+
507+
502508
#ifdef READ_LOCALE_A_OUTPUT
503509
/*
504510
* "Normalize" a libc locale name, stripping off encoding tags such as
@@ -610,6 +616,161 @@ get_icu_locale_comment(const char *localename)
610616
#endif /* USE_ICU */
611617

612618

619+
/*
620+
* Create a new collation using the input locale 'locale'. (subroutine for
621+
* pg_import_system_collations())
622+
*
623+
* 'nspid' is the namespace id where the collation will be created.
624+
*
625+
* 'nvalidp' is incremented if the locale has a valid encoding.
626+
*
627+
* 'ncreatedp' is incremented if the collation is actually created. If the
628+
* collation already exists it will quietly do nothing.
629+
*
630+
* The returned value is the encoding of the locale, -1 if the locale is not
631+
* valid for creating a collation.
632+
*
633+
*/
634+
pg_attribute_unused()
635+
static int
636+
create_collation_from_locale(const char *locale, int nspid,
637+
int *nvalidp, int *ncreatedp)
638+
{
639+
int enc;
640+
Oid collid;
641+
642+
/*
643+
* Some systems have locale names that don't consist entirely of
644+
* ASCII letters (such as "bokm&aring;l" or "fran&ccedil;ais").
645+
* This is pretty silly, since we need the locale itself to
646+
* interpret the non-ASCII characters. We can't do much with
647+
* those, so we filter them out.
648+
*/
649+
if (!pg_is_ascii(locale))
650+
{
651+
elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", locale);
652+
return -1;
653+
}
654+
655+
enc = pg_get_encoding_from_locale(locale, false);
656+
if (enc < 0)
657+
{
658+
elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"", locale);
659+
return -1;
660+
}
661+
if (!PG_VALID_BE_ENCODING(enc))
662+
{
663+
elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", locale);
664+
return -1;
665+
}
666+
if (enc == PG_SQL_ASCII)
667+
return -1; /* C/POSIX are already in the catalog */
668+
669+
/* count valid locales found in operating system */
670+
(*nvalidp)++;
671+
672+
/*
673+
* Create a collation named the same as the locale, but quietly
674+
* doing nothing if it already exists. This is the behavior we
675+
* need even at initdb time, because some versions of "locale -a"
676+
* can report the same locale name more than once. And it's
677+
* convenient for later import runs, too, since you just about
678+
* always want to add on new locales without a lot of chatter
679+
* about existing ones.
680+
*/
681+
collid = CollationCreate(locale, nspid, GetUserId(),
682+
COLLPROVIDER_LIBC, true, enc,
683+
locale, locale, NULL,
684+
get_collation_actual_version(COLLPROVIDER_LIBC, locale),
685+
true, true);
686+
if (OidIsValid(collid))
687+
{
688+
(*ncreatedp)++;
689+
690+
/* Must do CCI between inserts to handle duplicates correctly */
691+
CommandCounterIncrement();
692+
}
693+
694+
return enc;
695+
}
696+
697+
698+
#ifdef ENUM_SYSTEM_LOCALE
699+
/* parameter to be passed to the callback function win32_read_locale() */
700+
typedef struct
701+
{
702+
Oid nspid;
703+
int *ncreatedp;
704+
int *nvalidp;
705+
} CollParam;
706+
707+
/*
708+
* Callback function for EnumSystemLocalesEx() in
709+
* pg_import_system_collations(). Creates a collation for every valid locale
710+
* and a POSIX alias collation.
711+
*
712+
* The callback contract is to return TRUE to continue enumerating and FALSE
713+
* to stop enumerating. We always want to continue.
714+
*/
715+
static BOOL CALLBACK
716+
win32_read_locale(LPWSTR pStr, DWORD dwFlags, LPARAM lparam)
717+
{
718+
CollParam *param = (CollParam *) lparam;
719+
char localebuf[NAMEDATALEN];
720+
int result;
721+
int enc;
722+
723+
(void) dwFlags;
724+
725+
result = WideCharToMultiByte(CP_ACP, 0, pStr, -1, localebuf, NAMEDATALEN,
726+
NULL, NULL);
727+
728+
if (result == 0)
729+
{
730+
if (GetLastError() == ERROR_INSUFFICIENT_BUFFER)
731+
elog(DEBUG1, "skipping locale with too-long name: \"%s\"", localebuf);
732+
return TRUE;
733+
}
734+
if (localebuf[0] == '\0')
735+
return TRUE;
736+
737+
enc = create_collation_from_locale(localebuf, param->nspid,
738+
param->nvalidp, param->ncreatedp);
739+
if (enc < 0)
740+
return TRUE;
741+
742+
/*
743+
* Windows will use hyphens between language and territory, where POSIX
744+
* uses an underscore. Simply create a POSIX alias.
745+
*/
746+
if (strchr(localebuf, '-'))
747+
{
748+
char alias[NAMEDATALEN];
749+
Oid collid;
750+
751+
strcpy(alias, localebuf);
752+
for (char *p = alias; *p; p++)
753+
if (*p == '-')
754+
*p = '_';
755+
756+
collid = CollationCreate(alias, param->nspid, GetUserId(),
757+
COLLPROVIDER_LIBC, true, enc,
758+
localebuf, localebuf, NULL,
759+
get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
760+
true, true);
761+
if (OidIsValid(collid))
762+
{
763+
(*param->ncreatedp)++;
764+
765+
CommandCounterIncrement();
766+
}
767+
}
768+
769+
return TRUE;
770+
}
771+
#endif /* ENUM_SYSTEM_LOCALE */
772+
773+
613774
/*
614775
* pg_import_system_collations: add known system collations to pg_collation
615776
*/
@@ -668,58 +829,9 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
668829
}
669830
localebuf[len - 1] = '\0';
670831

671-
/*
672-
* Some systems have locale names that don't consist entirely of
673-
* ASCII letters (such as "bokm&aring;l" or "fran&ccedil;ais").
674-
* This is pretty silly, since we need the locale itself to
675-
* interpret the non-ASCII characters. We can't do much with
676-
* those, so we filter them out.
677-
*/
678-
if (!pg_is_ascii(localebuf))
679-
{
680-
elog(DEBUG1, "skipping locale with non-ASCII name: \"%s\"", localebuf);
681-
continue;
682-
}
683-
684-
enc = pg_get_encoding_from_locale(localebuf, false);
832+
enc = create_collation_from_locale(localebuf, nspid, &nvalid, &ncreated);
685833
if (enc < 0)
686-
{
687-
elog(DEBUG1, "skipping locale with unrecognized encoding: \"%s\"",
688-
localebuf);
689-
continue;
690-
}
691-
if (!PG_VALID_BE_ENCODING(enc))
692-
{
693-
elog(DEBUG1, "skipping locale with client-only encoding: \"%s\"", localebuf);
694834
continue;
695-
}
696-
if (enc == PG_SQL_ASCII)
697-
continue; /* C/POSIX are already in the catalog */
698-
699-
/* count valid locales found in operating system */
700-
nvalid++;
701-
702-
/*
703-
* Create a collation named the same as the locale, but quietly
704-
* doing nothing if it already exists. This is the behavior we
705-
* need even at initdb time, because some versions of "locale -a"
706-
* can report the same locale name more than once. And it's
707-
* convenient for later import runs, too, since you just about
708-
* always want to add on new locales without a lot of chatter
709-
* about existing ones.
710-
*/
711-
collid = CollationCreate(localebuf, nspid, GetUserId(),
712-
COLLPROVIDER_LIBC, true, enc,
713-
localebuf, localebuf, NULL,
714-
get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
715-
true, true);
716-
if (OidIsValid(collid))
717-
{
718-
ncreated++;
719-
720-
/* Must do CCI between inserts to handle duplicates correctly */
721-
CommandCounterIncrement();
722-
}
723835

724836
/*
725837
* Generate aliases such as "en_US" in addition to "en_US.utf8"
@@ -857,5 +969,30 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
857969
}
858970
#endif /* USE_ICU */
859971

972+
/* Load collations known to WIN32 */
973+
#ifdef ENUM_SYSTEM_LOCALE
974+
{
975+
int nvalid = 0;
976+
CollParam param;
977+
978+
param.nspid = nspid;
979+
param.ncreatedp = &ncreated;
980+
param.nvalidp = &nvalid;
981+
982+
/*
983+
* Enumerate the locales that are either installed on or supported
984+
* by the OS.
985+
*/
986+
if (!EnumSystemLocalesEx(win32_read_locale, LOCALE_ALL,
987+
(LPARAM) &param, NULL))
988+
_dosmaperr(GetLastError());
989+
990+
/* Give a warning if EnumSystemLocalesEx seems to be malfunctioning */
991+
if (nvalid == 0)
992+
ereport(WARNING,
993+
(errmsg("no usable system locales were found")));
994+
}
995+
#endif /* ENUM_SYSTEM_LOCALE */
996+
860997
PG_RETURN_INT32(ncreated);
861998
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy