Skip to content

Commit 9de09c0

Browse files
committed
Move wchar2char() and char2wchar() from tsearch into /mb to be easier to
use for other modules; also move pnstrdup(). Clean up code slightly.
1 parent 3eb9da5 commit 9de09c0

File tree

8 files changed

+155
-140
lines changed

8 files changed

+155
-140
lines changed

src/backend/tsearch/ts_locale.c

Lines changed: 1 addition & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.8 2008/06/17 16:09:06 momjian Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/ts_locale.c,v 1.9 2008/06/18 18:42:54 momjian Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -16,125 +16,8 @@
1616
#include "tsearch/ts_locale.h"
1717
#include "tsearch/ts_public.h"
1818

19-
2019
#ifdef USE_WIDE_UPPER_LOWER
2120

22-
/*
23-
* wchar2char --- convert wide characters to multibyte format
24-
*
25-
* This has the same API as the standard wcstombs() function; in particular,
26-
* tolen is the maximum number of bytes to store at *to, and *from must be
27-
* zero-terminated. The output will be zero-terminated iff there is room.
28-
*/
29-
size_t
30-
wchar2char(char *to, const wchar_t *from, size_t tolen)
31-
{
32-
if (tolen == 0)
33-
return 0;
34-
35-
#ifdef WIN32
36-
if (GetDatabaseEncoding() == PG_UTF8)
37-
{
38-
int r;
39-
40-
r = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
41-
NULL, NULL);
42-
43-
if (r <= 0)
44-
return (size_t) -1;
45-
46-
Assert(r <= tolen);
47-
48-
/* Microsoft counts the zero terminator in the result */
49-
return r - 1;
50-
}
51-
#endif /* WIN32 */
52-
53-
return wcstombs(to, from, tolen);
54-
}
55-
56-
/*
57-
* char2wchar --- convert multibyte characters to wide characters
58-
*
59-
* This has almost the API of mbstowcs(), except that *from need not be
60-
* null-terminated; instead, the number of input bytes is specified as
61-
* fromlen. Also, we ereport() rather than returning -1 for invalid
62-
* input encoding. tolen is the maximum number of wchar_t's to store at *to.
63-
* The output will be zero-terminated iff there is room.
64-
*/
65-
size_t
66-
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
67-
{
68-
if (tolen == 0)
69-
return 0;
70-
71-
#ifdef WIN32
72-
if (GetDatabaseEncoding() == PG_UTF8)
73-
{
74-
int r;
75-
76-
/* stupid Microsloth API does not work for zero-length input */
77-
if (fromlen == 0)
78-
r = 0;
79-
else
80-
{
81-
r = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
82-
83-
if (r <= 0)
84-
{
85-
/* see notes in oracle_compat.c about error reporting */
86-
pg_verifymbstr(from, fromlen, false);
87-
ereport(ERROR,
88-
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
89-
errmsg("invalid multibyte character for locale"),
90-
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
91-
}
92-
}
93-
94-
Assert(r < tolen);
95-
to[r] = 0;
96-
97-
return r;
98-
}
99-
#endif /* WIN32 */
100-
101-
if (lc_ctype_is_c())
102-
{
103-
/*
104-
* pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
105-
* allocated with sufficient space
106-
*/
107-
return pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
108-
}
109-
else
110-
{
111-
/*
112-
* mbstowcs requires ending '\0'
113-
*/
114-
char *str = pnstrdup(from, fromlen);
115-
size_t result;
116-
117-
result = mbstowcs(to, str, tolen);
118-
119-
pfree(str);
120-
121-
if (result == (size_t) -1)
122-
{
123-
pg_verifymbstr(from, fromlen, false);
124-
ereport(ERROR,
125-
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
126-
errmsg("invalid multibyte character for locale"),
127-
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
128-
}
129-
130-
if (result < tolen)
131-
to[result] = 0;
132-
133-
return result;
134-
}
135-
}
136-
137-
13821
int
13922
t_isdigit(const char *ptr)
14023
{

src/backend/tsearch/ts_utils.c

Lines changed: 1 addition & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
*
88
*
99
* IDENTIFICATION
10-
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.9 2008/01/01 19:45:52 momjian Exp $
10+
* $PostgreSQL: pgsql/src/backend/tsearch/ts_utils.c,v 1.10 2008/06/18 18:42:54 momjian Exp $
1111
*
1212
*-------------------------------------------------------------------------
1313
*/
@@ -153,13 +153,3 @@ searchstoplist(StopList *s, char *key)
153153
bsearch(&key, s->stop, s->len,
154154
sizeof(char *), comparestr)) ? true : false;
155155
}
156-
157-
char *
158-
pnstrdup(const char *in, int len)
159-
{
160-
char *out = palloc(len + 1);
161-
162-
memcpy(out, in, len);
163-
out[len] = '\0';
164-
return out;
165-
}

src/backend/utils/mb/mbutils.c

Lines changed: 129 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
* (currently mule internal code (mic) is used)
55
* Tatsuo Ishii
66
*
7-
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.71 2008/05/27 12:24:42 mha Exp $
7+
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.72 2008/06/18 18:42:54 momjian Exp $
88
*/
99
#include "postgres.h"
1010

@@ -555,6 +555,134 @@ perform_default_encoding_conversion(const char *src, int len, bool is_client_to_
555555
return result;
556556
}
557557

558+
559+
560+
#ifdef USE_WIDE_UPPER_LOWER
561+
562+
/*
563+
* wchar2char --- convert wide characters to multibyte format
564+
*
565+
* This has the same API as the standard wcstombs() function; in particular,
566+
* tolen is the maximum number of bytes to store at *to, and *from must be
567+
* zero-terminated. The output will be zero-terminated iff there is room.
568+
*/
569+
size_t
570+
wchar2char(char *to, const wchar_t *from, size_t tolen)
571+
{
572+
size_t result;
573+
574+
if (tolen == 0)
575+
return 0;
576+
577+
#ifdef WIN32
578+
/*
579+
* On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding,
580+
* and for some reason mbstowcs and wcstombs won't do this for us,
581+
* so we use MultiByteToWideChar().
582+
*/
583+
if (GetDatabaseEncoding() == PG_UTF8)
584+
{
585+
result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
586+
NULL, NULL);
587+
/* A zero return is failure */
588+
if (result <= 0)
589+
result = -1;
590+
else
591+
{
592+
Assert(result <= tolen);
593+
/* Microsoft counts the zero terminator in the result */
594+
result--;
595+
}
596+
}
597+
else
598+
#endif /* WIN32 */
599+
result = wcstombs(to, from, tolen);
600+
return result;
601+
}
602+
603+
/*
604+
* char2wchar --- convert multibyte characters to wide characters
605+
*
606+
* This has almost the API of mbstowcs(), except that *from need not be
607+
* null-terminated; instead, the number of input bytes is specified as
608+
* fromlen. Also, we ereport() rather than returning -1 for invalid
609+
* input encoding. tolen is the maximum number of wchar_t's to store at *to.
610+
* The output will be zero-terminated iff there is room.
611+
*/
612+
size_t
613+
char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen)
614+
{
615+
size_t result;
616+
617+
if (tolen == 0)
618+
return 0;
619+
620+
#ifdef WIN32
621+
/* See WIN32 "Unicode" comment above */
622+
if (GetDatabaseEncoding() == PG_UTF8)
623+
{
624+
/* Win32 API does not work for zero-length input */
625+
if (fromlen == 0)
626+
result = 0;
627+
else
628+
{
629+
result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
630+
/* A zero return is failure */
631+
if (result == 0)
632+
result = -1;
633+
}
634+
635+
if (result != -1)
636+
{
637+
Assert(result < tolen);
638+
/* Append trailing null wchar (MultiByteToWideChar() does not) */
639+
to[result] = 0;
640+
}
641+
}
642+
else
643+
#endif /* WIN32 */
644+
{
645+
if (lc_ctype_is_c())
646+
{
647+
/*
648+
* pg_mb2wchar_with_len always adds trailing '\0', so 'to' should be
649+
* allocated with sufficient space
650+
*/
651+
result = pg_mb2wchar_with_len(from, (pg_wchar *) to, fromlen);
652+
}
653+
else
654+
{
655+
/* mbstowcs requires ending '\0' */
656+
char *str = pnstrdup(from, fromlen);
657+
658+
result = mbstowcs(to, str, tolen);
659+
pfree(str);
660+
}
661+
}
662+
663+
if (result == -1)
664+
{
665+
/*
666+
* Invalid multibyte character encountered. We try to give a useful
667+
* error message by letting pg_verifymbstr check the string. But it's
668+
* possible that the string is OK to us, and not OK to mbstowcs ---
669+
* this suggests that the LC_CTYPE locale is different from the
670+
* database encoding. Give a generic error message if verifymbstr
671+
* can't find anything wrong.
672+
*/
673+
pg_verifymbstr(from, fromlen, false); /* might not return */
674+
/* but if it does ... */
675+
ereport(ERROR,
676+
(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
677+
errmsg("invalid multibyte character for locale"),
678+
errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
679+
}
680+
681+
return result;
682+
}
683+
684+
#endif
685+
558686
/* convert a multibyte string to a wchar */
559687
int
560688
pg_mb2wchar(const char *from, pg_wchar *to)

src/backend/utils/mmgr/mcxt.c

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
*
1515
*
1616
* IDENTIFICATION
17-
* $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.63 2008/01/01 19:45:55 momjian Exp $
17+
* $PostgreSQL: pgsql/src/backend/utils/mmgr/mcxt.c,v 1.64 2008/06/18 18:42:54 momjian Exp $
1818
*
1919
*-------------------------------------------------------------------------
2020
*/
@@ -624,6 +624,18 @@ repalloc(void *pointer, Size size)
624624
pointer, size);
625625
}
626626

627+
/* Like pstrdup(), but append null byte */
628+
char *
629+
pnstrdup(const char *in, int len)
630+
{
631+
char *out = palloc(len + 1);
632+
633+
memcpy(out, in, len);
634+
out[len] = '\0';
635+
return out;
636+
}
637+
638+
627639
/*
628640
* MemoryContextSwitchTo
629641
* Returns the current context; installs the given context.

src/include/mb/pg_wchar.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1996-2008, PostgreSQL Global Development Group
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
9-
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.78 2008/01/01 19:45:58 momjian Exp $
9+
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.79 2008/06/18 18:42:54 momjian Exp $
1010
*
1111
* NOTES
1212
* This is used both by the backend and by libpq, but should not be
@@ -362,6 +362,11 @@ extern int pg_mbcharcliplen(const char *mbstr, int len, int imit);
362362
extern int pg_encoding_max_length(int encoding);
363363
extern int pg_database_encoding_max_length(void);
364364

365+
#ifdef USE_WIDE_UPPER_LOWER
366+
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
367+
extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
368+
#endif
369+
365370
extern void SetDefaultClientEncoding(void);
366371
extern int SetClientEncoding(int encoding, bool doit);
367372
extern void InitializeClientEncoding(void);

src/include/tsearch/ts_locale.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
*
66
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
77
*
8-
* $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.6 2008/06/17 16:09:06 momjian Exp $
8+
* $PostgreSQL: pgsql/src/include/tsearch/ts_locale.h,v 1.7 2008/06/18 18:42:54 momjian Exp $
99
*
1010
*-------------------------------------------------------------------------
1111
*/
@@ -33,9 +33,6 @@
3333

3434
#ifdef USE_WIDE_UPPER_LOWER
3535

36-
extern size_t wchar2char(char *to, const wchar_t *from, size_t tolen);
37-
extern size_t char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen);
38-
3936
extern int t_isdigit(const char *ptr);
4037
extern int t_isspace(const char *ptr);
4138
extern int t_isalpha(const char *ptr);

src/include/tsearch/ts_public.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
*
77
* Copyright (c) 1998-2008, PostgreSQL Global Development Group
88
*
9-
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.9 2008/05/16 16:31:02 tgl Exp $
9+
* $PostgreSQL: pgsql/src/include/tsearch/ts_public.h,v 1.10 2008/06/18 18:42:54 momjian Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -62,8 +62,6 @@ typedef struct
6262
extern char *get_tsearch_config_filename(const char *basename,
6363
const char *extension);
6464

65-
extern char *pnstrdup(const char *in, int len);
66-
6765
/*
6866
* Often useful stopword list management
6967
*/

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy