Skip to content

Commit a8dfe11

Browse files
committed
Teach chklocale.c how to extract encoding info from Windows locale
names. ITAGAKI Takahiro
1 parent bda5750 commit a8dfe11

File tree

1 file changed

+54
-7
lines changed

1 file changed

+54
-7
lines changed

src/port/chklocale.c

Lines changed: 54 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/port/chklocale.c,v 1.3 2007/09/29 00:01:43 tgl Exp $
11+
* $PostgreSQL: pgsql/src/port/chklocale.c,v 1.4 2007/10/03 17:16:39 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -27,13 +27,12 @@
2727
#include "mb/pg_wchar.h"
2828

2929

30-
#if defined(HAVE_LANGINFO_H) && defined(CODESET)
31-
3230
/*
3331
* This table needs to recognize all the CODESET spellings for supported
3432
* backend encodings, as well as frontend-only encodings where possible
3533
* (the latter case is currently only needed for initdb to recognize
36-
* error situations).
34+
* error situations). On Windows, we rely on entries for codepage
35+
* numbers (CPnnn).
3736
*
3837
* Note that we search the table with pg_strcasecmp(), so variant
3938
* capitalizations don't need their own entries.
@@ -49,23 +48,27 @@ static const struct encoding_match encoding_match_list[] = {
4948
{PG_EUC_JP, "eucJP"},
5049
{PG_EUC_JP, "IBM-eucJP"},
5150
{PG_EUC_JP, "sdeckanji"},
51+
{PG_EUC_JP, "CP20932"},
5252

5353
{PG_EUC_CN, "EUC-CN"},
5454
{PG_EUC_CN, "eucCN"},
5555
{PG_EUC_CN, "IBM-eucCN"},
5656
{PG_EUC_CN, "GB2312"},
5757
{PG_EUC_CN, "dechanzi"},
58+
{PG_EUC_CN, "CP20936"},
5859

5960
{PG_EUC_KR, "EUC-KR"},
6061
{PG_EUC_KR, "eucKR"},
6162
{PG_EUC_KR, "IBM-eucKR"},
6263
{PG_EUC_KR, "deckorean"},
6364
{PG_EUC_KR, "5601"},
65+
{PG_EUC_KR, "CP51949"}, /* or 20949 ? */
6466

6567
{PG_EUC_TW, "EUC-TW"},
6668
{PG_EUC_TW, "eucTW"},
6769
{PG_EUC_TW, "IBM-eucTW"},
6870
{PG_EUC_TW, "cns11643"},
71+
/* No codepage for EUC-TW ? */
6972

7073
{PG_UTF8, "UTF-8"},
7174
{PG_UTF8, "utf8"},
@@ -111,6 +114,7 @@ static const struct encoding_match encoding_match_list[] = {
111114
{PG_LATIN10, "iso885916"},
112115

113116
{PG_KOI8R, "KOI8-R"},
117+
{PG_KOI8R, "CP20866"},
114118

115119
{PG_WIN1252, "CP1252"},
116120
{PG_WIN1253, "CP1253"},
@@ -143,23 +147,56 @@ static const struct encoding_match encoding_match_list[] = {
143147

144148
{PG_SJIS, "SJIS"},
145149
{PG_SJIS, "PCK"},
150+
{PG_SJIS, "CP932"},
146151

147152
{PG_BIG5, "BIG5"},
148153
{PG_BIG5, "BIG5HKSCS"},
154+
{PG_BIG5, "CP950"},
149155

150156
{PG_GBK, "GBK"},
157+
{PG_GBK, "CP936"},
151158

152159
{PG_UHC, "UHC"},
153160

154161
{PG_JOHAB, "JOHAB"},
162+
{PG_JOHAB, "CP1361"},
155163

156164
{PG_GB18030, "GB18030"},
165+
{PG_GB18030, "CP54936"},
157166

158167
{PG_SHIFT_JIS_2004, "SJIS_2004"},
159168

160169
{PG_SQL_ASCII, NULL} /* end marker */
161170
};
162171

172+
#ifdef WIN32
173+
/*
174+
* On Windows, use CP<codepage number> instead of the nl_langinfo() result
175+
*/
176+
static char *
177+
win32_langinfo(const char *ctype)
178+
{
179+
char *r;
180+
char *codepage;
181+
int ln;
182+
183+
/*
184+
* Locale format on Win32 is <Language>_<Country>.<CodePage> .
185+
* For example, English_USA.1252.
186+
*/
187+
codepage = strrchr(ctype, '.');
188+
if (!codepage)
189+
return NULL;
190+
codepage++;
191+
ln = strlen(codepage);
192+
r = malloc(ln + 3);
193+
sprintf(r, "CP%s", codepage);
194+
195+
return r;
196+
}
197+
#endif /* WIN32 */
198+
199+
#if (defined(HAVE_LANGINFO_H) && defined(CODESET)) || defined(WIN32)
163200

164201
/*
165202
* Given a setting for LC_CTYPE, return the Postgres ID of the associated
@@ -181,6 +218,7 @@ pg_get_encoding_from_locale(const char *ctype)
181218
if (ctype)
182219
{
183220
char *save;
221+
char *name;
184222

185223
save = setlocale(LC_CTYPE, NULL);
186224
if (!save)
@@ -190,15 +228,20 @@ pg_get_encoding_from_locale(const char *ctype)
190228
if (!save)
191229
return PG_SQL_ASCII; /* out of memory; unlikely */
192230

193-
if (!setlocale(LC_CTYPE, ctype))
231+
name = setlocale(LC_CTYPE, ctype);
232+
if (!name)
194233
{
195234
free(save);
196235
return PG_SQL_ASCII; /* bogus ctype passed in? */
197236
}
198237

238+
#ifndef WIN32
199239
sys = nl_langinfo(CODESET);
200240
if (sys)
201241
sys = strdup(sys);
242+
#else
243+
sys = win32_langinfo(name);
244+
#endif
202245

203246
setlocale(LC_CTYPE, save);
204247
free(save);
@@ -209,9 +252,13 @@ pg_get_encoding_from_locale(const char *ctype)
209252
ctype = setlocale(LC_CTYPE, NULL);
210253
if (!ctype)
211254
return PG_SQL_ASCII; /* setlocale() broken? */
255+
#ifndef WIN32
212256
sys = nl_langinfo(CODESET);
213257
if (sys)
214258
sys = strdup(sys);
259+
#else
260+
sys = win32_langinfo(ctype);
261+
#endif
215262
}
216263

217264
if (!sys)
@@ -268,7 +315,7 @@ pg_get_encoding_from_locale(const char *ctype)
268315
return PG_SQL_ASCII;
269316
}
270317

271-
#else /* !(HAVE_LANGINFO_H && CODESET) */
318+
#else /* (HAVE_LANGINFO_H && CODESET) || WIN32 */
272319

273320
/*
274321
* stub if no platform support
@@ -279,4 +326,4 @@ pg_get_encoding_from_locale(const char *ctype)
279326
return PG_SQL_ASCII;
280327
}
281328

282-
#endif /* HAVE_LANGINFO_H && CODESET */
329+
#endif /* (HAVE_LANGINFO_H && CODESET) || WIN32 */

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy