Skip to content

Commit 767a902

Browse files
committed
Add small hack to support use of Unicode-based locales on WIN32. This
is not adequately tested yet, but let's get it into beta1 so it can be tested. Magnus Hagander and Tom Lane.
1 parent a0a0bc0 commit 767a902

File tree

2 files changed

+197
-7
lines changed

2 files changed

+197
-7
lines changed

src/backend/utils/adt/oracle_compat.c

Lines changed: 112 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
*
1010
*
1111
* IDENTIFICATION
12-
* $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.60 2005/05/07 15:18:17 momjian Exp $
12+
* $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.61 2005/08/24 17:50:00 tgl Exp $
1313
*
1414
*-------------------------------------------------------------------------
1515
*/
@@ -149,6 +149,117 @@ wcstotext(const wchar_t *str, int ncodes)
149149
#endif /* USE_WIDE_UPPER_LOWER */
150150

151151

152+
/*
153+
* On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding.
154+
* To make use of the upper/lower functionality, we need to map UTF8 to
155+
* UTF16, which for some reason mbstowcs and wcstombs won't do for us.
156+
* This conversion layer takes care of it.
157+
*/
158+
159+
#ifdef WIN32
160+
161+
/* texttowcs for the case of UTF8 to UTF16 */
162+
static wchar_t *
163+
win32_utf8_texttowcs(const text *txt)
164+
{
165+
int nbytes = VARSIZE(txt) - VARHDRSZ;
166+
wchar_t *result;
167+
int r;
168+
169+
/* Overflow paranoia */
170+
if (nbytes < 0 ||
171+
nbytes > (int) (INT_MAX / sizeof(wchar_t)) -1)
172+
ereport(ERROR,
173+
(errcode(ERRCODE_OUT_OF_MEMORY),
174+
errmsg("out of memory")));
175+
176+
/* Output workspace cannot have more codes than input bytes */
177+
result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
178+
179+
/* stupid Microsloth API does not work for zero-length input */
180+
if (nbytes == 0)
181+
r = 0;
182+
else
183+
{
184+
/* Do the conversion */
185+
r = MultiByteToWideChar(CP_UTF8, 0, VARDATA(txt), nbytes,
186+
result, nbytes);
187+
188+
if (!r) /* assume it's NO_UNICODE_TRANSLATION */
189+
{
190+
/* see notes above about error reporting */
191+
pg_verifymbstr(VARDATA(txt), nbytes, false);
192+
ereport(ERROR,
193+
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
194+
errmsg("invalid multibyte character for locale"),
195+
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
196+
}
197+
}
198+
199+
Assert(r <= nbytes);
200+
result[r] = 0;
201+
202+
return result;
203+
}
204+
205+
/* wcstotext for the case of UTF16 to UTF8 */
206+
static text *
207+
win32_utf8_wcstotext(const wchar_t *str)
208+
{
209+
text *result;
210+
int nbytes;
211+
int r;
212+
213+
nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
214+
if (nbytes == 0) /* shouldn't happen */
215+
ereport(ERROR,
216+
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
217+
errmsg("UTF16 to UTF8 translation failed: %lu",
218+
GetLastError())));
219+
220+
result = palloc(nbytes+VARHDRSZ);
221+
222+
r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes,
223+
NULL, NULL);
224+
if (r == 0) /* shouldn't happen */
225+
ereport(ERROR,
226+
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
227+
errmsg("UTF16 to UTF8 translation failed: %lu",
228+
GetLastError())));
229+
230+
VARATT_SIZEP(result) = nbytes + VARHDRSZ - 1; /* -1 to ignore null */
231+
232+
return result;
233+
}
234+
235+
/* interface layer to check which encoding is in use */
236+
237+
static wchar_t *
238+
win32_texttowcs(const text *txt)
239+
{
240+
if (GetDatabaseEncoding() == PG_UTF8)
241+
return win32_utf8_texttowcs(txt);
242+
else
243+
return texttowcs(txt);
244+
}
245+
246+
static text *
247+
win32_wcstotext(const wchar_t *str, int ncodes)
248+
{
249+
if (GetDatabaseEncoding() == PG_UTF8)
250+
return win32_utf8_wcstotext(str);
251+
else
252+
return wcstotext(str, ncodes);
253+
}
254+
255+
/* use macros to cause routines below to call interface layer */
256+
257+
#define texttowcs win32_texttowcs
258+
#define wcstotext win32_wcstotext
259+
260+
#endif /* WIN32 */
261+
262+
152263
/********************************************************************
153264
*
154265
* lower

src/backend/utils/adt/varlena.c

Lines changed: 85 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.131 2005/08/02 16:11:57 tgl Exp $
11+
* $PostgreSQL: pgsql/src/backend/utils/adt/varlena.c,v 1.132 2005/08/24 17:50:00 tgl Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -849,6 +849,8 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2)
849849
char *a1p,
850850
*a2p;
851851

852+
#ifndef WIN32
853+
852854
if (len1 >= STACKBUFLEN)
853855
a1p = (char *) palloc(len1 + 1);
854856
else
@@ -865,10 +867,87 @@ varstr_cmp(char *arg1, int len1, char *arg2, int len2)
865867

866868
result = strcoll(a1p, a2p);
867869

868-
if (len1 >= STACKBUFLEN)
870+
if (a1p != a1buf)
869871
pfree(a1p);
870-
if (len2 >= STACKBUFLEN)
872+
if (a2p != a2buf)
871873
pfree(a2p);
874+
875+
#else /* WIN32 */
876+
877+
/* Win32 does not have UTF-8, so we need to map to UTF-16 */
878+
if (GetDatabaseEncoding() == PG_UTF8)
879+
{
880+
int a1len;
881+
int a2len;
882+
int r;
883+
884+
if (len1 >= STACKBUFLEN/2)
885+
{
886+
a1len = len1 * 2 + 2;
887+
a1p = palloc(a1len);
888+
}
889+
else
890+
{
891+
a1len = STACKBUFLEN;
892+
a1p = a1buf;
893+
}
894+
if (len2 >= STACKBUFLEN/2)
895+
{
896+
a2len = len2 * 2 + 2;
897+
a2p = palloc(a2len);
898+
}
899+
else
900+
{
901+
a2len = STACKBUFLEN;
902+
a2p = a2buf;
903+
}
904+
905+
/* stupid Microsloth API does not work for zero-length input */
906+
if (len1 == 0)
907+
r = 0;
908+
else
909+
{
910+
r = MultiByteToWideChar(CP_UTF8, 0, arg1, len1,
911+
(LPWSTR) a1p, a1len/2);
912+
if (!r)
913+
ereport(ERROR,
914+
(errmsg("could not convert string to UTF16: %lu",
915+
GetLastError())));
916+
}
917+
((LPWSTR) a1p)[r] = 0;
918+
919+
if (len2 == 0)
920+
r = 0;
921+
else
922+
{
923+
r = MultiByteToWideChar(CP_UTF8, 0, arg2, len2,
924+
(LPWSTR) a2p, a2len/2);
925+
if (!r)
926+
ereport(ERROR,
927+
(errmsg("could not convert string to UTF16: %lu",
928+
GetLastError())));
929+
}
930+
((LPWSTR) a2p)[r] = 0;
931+
932+
errno = 0;
933+
result = wcscoll((LPWSTR) a1p, (LPWSTR) a2p);
934+
if (result == 2147483647) /* _NLSCMPERROR; missing from mingw headers */
935+
ereport(ERROR,
936+
(errmsg("could not compare unicode strings: %d",
937+
errno)));
938+
939+
if (a1p != a1buf)
940+
pfree(a1p);
941+
if (a2p != a2buf)
942+
pfree(a2p);
943+
944+
return result;
945+
}
946+
947+
/* Win32 has strncoll(), so use it to avoid copying */
948+
return _strncoll(arg1, arg2, Min(len1, len2));
949+
950+
#endif /* WIN32 */
872951
}
873952
else
874953
{
@@ -2000,7 +2079,7 @@ replace_text(PG_FUNCTION_ARGS)
20002079

20012080
/*
20022081
* check_replace_text_has_escape_char
2003-
* check whether replace_text has escape char.
2082+
* check whether replace_text has escape char.
20042083
*/
20052084
static bool
20062085
check_replace_text_has_escape_char(const text *replace_text)
@@ -2175,7 +2254,7 @@ replace_text_regexp(PG_FUNCTION_ARGS)
21752254

21762255
/*
21772256
* Copy the replace_text. Process back references when the
2178-
* replace_text has escape characters.
2257+
* replace_text has escape characters.
21792258
*/
21802259
if (have_escape)
21812260
appendStringInfoRegexpSubstr(str, replace_text, pmatch, src_text);
@@ -2573,7 +2652,7 @@ md5_bytea(PG_FUNCTION_ARGS)
25732652
PG_RETURN_TEXT_P(result_text);
25742653
}
25752654

2576-
/*
2655+
/*
25772656
* Return the size of a datum, possibly compressed
25782657
*
25792658
* Works on any data type

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy