Skip to content

Commit ab9b6c4

Browse files
committed
Add conver/convert2 functions. They are similar to the SQL99's convert.
1 parent 872cd63 commit ab9b6c4

File tree

5 files changed

+221
-80
lines changed

5 files changed

+221
-80
lines changed

src/backend/utils/init/miscinit.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/utils/init/miscinit.c,v 1.75 2001/08/06 18:17:42 tgl Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/utils/init/miscinit.c,v 1.76 2001/08/15 07:07:40 ishii Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -209,6 +209,19 @@ PG_char_to_encoding(PG_FUNCTION_ARGS)
209209
PG_RETURN_INT32(0);
210210
}
211211

212+
Datum
213+
pg_convert(PG_FUNCTION_ARGS)
214+
{
215+
elog(ERROR, "convert is not supported. To use convert, you need to enable multibyte capability");
216+
return DirectFunctionCall1(textin, CStringGetDatum(""));
217+
}
218+
219+
Datum
220+
pg_convert2(PG_FUNCTION_ARGS)
221+
{
222+
elog(ERROR, "convert is not supported. To use convert, you need to enable multibyte capability");
223+
return DirectFunctionCall1(textin, CStringGetDatum(""));
224+
}
212225
#endif
213226

214227
/* ----------------------------------------------------------------

src/backend/utils/mb/mbutils.c

Lines changed: 193 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* client encoding and server internal encoding.
44
* (currently mule internal code (mic) is used)
55
* Tatsuo Ishii
6-
* $Id: mbutils.c,v 1.18 2001/07/15 11:07:36 ishii Exp $
6+
* $Id: mbutils.c,v 1.19 2001/08/15 07:07:40 ishii Exp $
77
*/
88
#include "postgres.h"
99

@@ -34,67 +34,84 @@ pg_get_enc_ent(int encoding)
3434
}
3535

3636
/*
37-
* set the client encoding. if encoding conversion between
38-
* client/server encoding is not supported, returns -1
37+
* Find appropriate encoding conversion functions. If no such
38+
* functions found, returns -1.
39+
*
40+
* Arguments:
41+
*
42+
* src, dest (in): source and destination encoding ids
43+
*
44+
* src_to_mic (out): pointer to a function which converts src to
45+
* mic/unicode according to dest. if src == mic/unicode or no
46+
* appropriate function found, set to 0.
47+
*
48+
* dest_from_mic (out): pointer to a function which converts
49+
* mic/unicode to dest according to src. if dest == mic/unicode or no
50+
* appropriate function found, set to 0.
3951
*/
4052
int
41-
pg_set_client_encoding(int encoding)
53+
pg_find_encoding_converters(int src, int dest, void (**src_to_mic)(), void (**dest_from_mic)())
4254
{
43-
int current_server_encoding = GetDatabaseEncoding();
44-
45-
client_encoding = encoding;
46-
47-
if (client_encoding == current_server_encoding)
48-
{ /* server == client? */
49-
client_to_mic = client_from_mic = 0;
50-
server_to_mic = server_from_mic = 0;
55+
if (src == dest)
56+
{ /* src == dest? */
57+
*src_to_mic = *dest_from_mic = 0;
5158
}
52-
else if (current_server_encoding == MULE_INTERNAL)
53-
{ /* server == MULE_INETRNAL? */
54-
client_to_mic = pg_get_enc_ent(encoding)->to_mic;
55-
client_from_mic = pg_get_enc_ent(encoding)->from_mic;
56-
server_to_mic = server_from_mic = 0;
57-
if (client_to_mic == 0 || client_from_mic == 0)
59+
else if (src == MULE_INTERNAL)
60+
{ /* src == MULE_INETRNAL? */
61+
*dest_from_mic = pg_get_enc_ent(dest)->from_mic;
62+
if (*dest_from_mic == 0)
5863
return (-1);
64+
*src_to_mic = 0;
5965
}
60-
else if (encoding == MULE_INTERNAL)
61-
{ /* client == MULE_INETRNAL? */
62-
client_to_mic = client_from_mic = 0;
63-
server_to_mic = pg_get_enc_ent(current_server_encoding)->to_mic;
64-
server_from_mic = pg_get_enc_ent(current_server_encoding)->from_mic;
65-
if (server_to_mic == 0 || server_from_mic == 0)
66+
else if (dest == MULE_INTERNAL)
67+
{ /* dest == MULE_INETRNAL? */
68+
*src_to_mic = pg_get_enc_ent(src)->to_mic;
69+
if (*src_to_mic == 0)
6670
return (-1);
71+
*dest_from_mic = 0;
6772
}
68-
else if (current_server_encoding == UNICODE)
69-
{ /* server == UNICODE? */
70-
client_to_mic = pg_get_enc_ent(encoding)->to_unicode;
71-
client_from_mic = pg_get_enc_ent(encoding)->from_unicode;
72-
server_to_mic = server_from_mic = 0;
73-
if (client_to_mic == 0 || client_from_mic == 0)
73+
else if (src == UNICODE)
74+
{ /* src == UNICODE? */
75+
*dest_from_mic = pg_get_enc_ent(dest)->from_unicode;
76+
if (*dest_from_mic == 0)
7477
return (-1);
78+
*src_to_mic = 0;
7579
}
76-
else if (encoding == UNICODE)
77-
{ /* client == UNICODE? */
78-
client_to_mic = client_from_mic = 0;
79-
server_to_mic = pg_get_enc_ent(current_server_encoding)->to_unicode;
80-
server_from_mic = pg_get_enc_ent(current_server_encoding)->from_unicode;
81-
if (server_to_mic == 0 || server_from_mic == 0)
80+
else if (dest == UNICODE)
81+
{ /* dest == UNICODE? */
82+
*src_to_mic = pg_get_enc_ent(src)->to_unicode;
83+
if (*src_to_mic == 0)
8284
return (-1);
85+
*dest_from_mic = 0;
8386
}
8487
else
8588
{
86-
client_to_mic = pg_get_enc_ent(encoding)->to_mic;
87-
client_from_mic = pg_get_enc_ent(encoding)->from_mic;
88-
server_to_mic = pg_get_enc_ent(current_server_encoding)->to_mic;
89-
server_from_mic = pg_get_enc_ent(current_server_encoding)->from_mic;
90-
if (client_to_mic == 0 || client_from_mic == 0)
91-
return (-1);
92-
if (server_to_mic == 0 || server_from_mic == 0)
89+
*src_to_mic = pg_get_enc_ent(src)->to_mic;
90+
*dest_from_mic = pg_get_enc_ent(dest)->from_mic;
91+
if (*src_to_mic == 0 || *dest_from_mic == 0)
9392
return (-1);
9493
}
9594
return (0);
9695
}
9796

97+
/*
98+
* set the client encoding. if encoding conversion between
99+
* client/server encoding is not supported, returns -1
100+
*/
101+
int
102+
pg_set_client_encoding(int encoding)
103+
{
104+
int current_server_encoding = GetDatabaseEncoding();
105+
106+
if (pg_find_encoding_converters(encoding, current_server_encoding, &client_to_mic, &server_from_mic) < 0)
107+
return (-1);
108+
client_encoding = encoding;
109+
110+
if (pg_find_encoding_converters(current_server_encoding, encoding, &server_to_mic, &client_from_mic) < 0)
111+
return (-1);
112+
return 0;
113+
}
114+
98115
/*
99116
* returns the current client encoding
100117
*/
@@ -110,7 +127,21 @@ pg_get_client_encoding()
110127
}
111128

112129
/*
113-
* convert client encoding to server encoding.
130+
* Convert src encoding and returns it. Actual conversion is done by
131+
* src_to_mic and dest_from_mic, which can be obtained by
132+
* pg_find_encoding_converters(). The reason we require two conversion
133+
* functions is that we have an intermediate encoding: MULE_INTERNAL
134+
* Using intermediate encodings will reduce the number of functions
135+
* doing encoding conversions. Special case is either src or dest is
136+
* the intermediate encoding itself. In this case, you don't need src
137+
* or dest (setting 0 will indicate there's no conversion
138+
* function). Another case is you have direct-conversion function from
139+
* src to dest. In this case either src_to_mic or dest_from_mic could
140+
* be set to 0 also.
141+
*
142+
* Note that If src or dest is UNICODE, we have to do
143+
* direct-conversion, since we don't support conversion bwteen UNICODE
144+
* and MULE_INTERNAL, we cannot go through MULE_INTERNAL.
114145
*
115146
* CASE 1: if no conversion is required, then the given pointer s is returned.
116147
*
@@ -120,34 +151,138 @@ pg_get_client_encoding()
120151
* to determine whether to pfree the result or not!
121152
*
122153
* Note: we assume that conversion cannot cause more than a 4-to-1 growth
123-
* in the length of the string --- is this enough?
124-
*/
154+
* in the length of the string --- is this enough? */
155+
125156
unsigned char *
126-
pg_client_to_server(unsigned char *s, int len)
157+
pg_do_encoding_conversion(unsigned char *src, int len, void (*src_to_mic)(), void (*dest_from_mic)())
127158
{
128-
unsigned char *result = s;
159+
unsigned char *result = src;
129160
unsigned char *buf;
130161

131-
if (client_encoding == GetDatabaseEncoding())
132-
return result;
133-
if (client_to_mic)
162+
if (src_to_mic)
134163
{
135164
buf = (unsigned char *) palloc(len * 4 + 1);
136-
(*client_to_mic) (result, buf, len);
165+
(*src_to_mic) (result, buf, len);
137166
result = buf;
138167
len = strlen(result);
139168
}
140-
if (server_from_mic)
169+
if (dest_from_mic)
141170
{
142171
buf = (unsigned char *) palloc(len * 4 + 1);
143-
(*server_from_mic) (result, buf, len);
144-
if (result != s)
172+
(*dest_from_mic) (result, buf, len);
173+
if (result != src)
145174
pfree(result); /* release first buffer */
146175
result = buf;
147176
}
148177
return result;
149178
}
150179

180+
/*
181+
* Convert string using encoding_nanme. We assume that string's
182+
* encoding is same as DB encoding.
183+
*
184+
* TEXT convert(TEXT string, NAME encoding_name)
185+
*/
186+
Datum
187+
pg_convert(PG_FUNCTION_ARGS)
188+
{
189+
text *string = PG_GETARG_TEXT_P(0);
190+
Name s = PG_GETARG_NAME(1);
191+
int encoding = pg_char_to_encoding(NameStr(*s));
192+
int db_encoding = GetDatabaseEncoding();
193+
void (*src)(), (*dest)();
194+
unsigned char *result;
195+
text *retval;
196+
197+
if (encoding < 0)
198+
elog(ERROR, "Invalid encoding name %s", NameStr(*s));
199+
200+
if (pg_find_encoding_converters(db_encoding, encoding, &src, &dest) < 0)
201+
{
202+
char *encoding_name = (char *)pg_encoding_to_char(db_encoding);
203+
elog(ERROR, "Conversion from %s to %s is not possible", NameStr(*s), encoding_name);
204+
}
205+
206+
result = pg_do_encoding_conversion(VARDATA(string), VARSIZE(string)-VARHDRSZ,
207+
src, dest);
208+
if (result == NULL)
209+
elog(ERROR, "Encoding conversion failed");
210+
211+
retval = DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(result)));
212+
if (result != (unsigned char *)VARDATA(string))
213+
pfree(result);
214+
215+
/* free memory if allocated by the toaster */
216+
PG_FREE_IF_COPY(string, 0);
217+
218+
PG_RETURN_TEXT_P(retval);
219+
}
220+
221+
/*
222+
* Convert string using encoding_nanme.
223+
*
224+
* TEXT convert(TEXT string, NAME src_encoding_name, NAME dest_encoding_name)
225+
*/
226+
Datum
227+
pg_convert2(PG_FUNCTION_ARGS)
228+
{
229+
text *string = PG_GETARG_TEXT_P(0);
230+
char *src_encoding_name = NameStr(*PG_GETARG_NAME(1));
231+
int src_encoding = pg_char_to_encoding(src_encoding_name);
232+
char *dest_encoding_name = NameStr(*PG_GETARG_NAME(2));
233+
int dest_encoding = pg_char_to_encoding(dest_encoding_name);
234+
void (*src)(), (*dest)();
235+
unsigned char *result;
236+
text *retval;
237+
238+
if (src_encoding < 0)
239+
elog(ERROR, "Invalid source encoding name %s", src_encoding_name);
240+
if (dest_encoding < 0)
241+
elog(ERROR, "Invalid destination encoding name %s", dest_encoding_name);
242+
243+
if (pg_find_encoding_converters(src_encoding, dest_encoding, &src, &dest) < 0)
244+
{
245+
elog(ERROR, "Conversion from %s to %s is not possible",
246+
src_encoding_name, dest_encoding_name);
247+
}
248+
249+
result = pg_do_encoding_conversion(VARDATA(string), VARSIZE(string)-VARHDRSZ,
250+
src, dest);
251+
if (result == NULL)
252+
elog(ERROR, "Encoding conversion failed");
253+
254+
retval = DatumGetTextP(DirectFunctionCall1(textin, CStringGetDatum(result)));
255+
if (result != (unsigned char *)VARDATA(string))
256+
pfree(result);
257+
258+
/* free memory if allocated by the toaster */
259+
PG_FREE_IF_COPY(string, 0);
260+
261+
PG_RETURN_TEXT_P(retval);
262+
}
263+
264+
/*
265+
* convert client encoding to server encoding.
266+
*
267+
* CASE 1: if no conversion is required, then the given pointer s is returned.
268+
*
269+
* CASE 2: if conversion is required, a palloc'd string is returned.
270+
*
271+
* Callers must check whether return value differs from passed value
272+
* to determine whether to pfree the result or not!
273+
*
274+
* Note: we assume that conversion cannot cause more than a 4-to-1 growth
275+
* in the length of the string --- is this enough?
276+
*/
277+
unsigned char *
278+
pg_client_to_server(unsigned char *s, int len)
279+
{
280+
if (client_encoding == GetDatabaseEncoding())
281+
return s;
282+
283+
return pg_do_encoding_conversion(s, len, client_to_mic, server_from_mic);
284+
}
285+
151286
/*
152287
* convert server encoding to client encoding.
153288
*
@@ -164,27 +299,10 @@ pg_client_to_server(unsigned char *s, int len)
164299
unsigned char *
165300
pg_server_to_client(unsigned char *s, int len)
166301
{
167-
unsigned char *result = s;
168-
unsigned char *buf;
169-
170302
if (client_encoding == GetDatabaseEncoding())
171-
return result;
172-
if (server_to_mic)
173-
{
174-
buf = (unsigned char *) palloc(len * 4 + 1);
175-
(*server_to_mic) (result, buf, len);
176-
result = buf;
177-
len = strlen(result);
178-
}
179-
if (client_from_mic)
180-
{
181-
buf = (unsigned char *) palloc(len * 4 + 1);
182-
(*client_from_mic) (result, buf, len);
183-
if (result != s)
184-
pfree(result); /* release first buffer */
185-
result = buf;
186-
}
187-
return result;
303+
return s;
304+
305+
return pg_do_encoding_conversion(s, len, server_to_mic, client_from_mic);
188306
}
189307

190308
/* convert a multi-byte string to a wchar */

src/include/catalog/pg_proc.h

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Portions Copyright (c) 1996-2001, PostgreSQL Global Development Group
88
* Portions Copyright (c) 1994, Regents of the University of California
99
*
10-
* $Id: pg_proc.h,v 1.204 2001/08/14 22:21:58 tgl Exp $
10+
* $Id: pg_proc.h,v 1.205 2001/08/15 07:07:40 ishii Exp $
1111
*
1212
* NOTES
1313
* The script catalog/genbki.sh reads this file and generates .bki
@@ -2137,7 +2137,13 @@ DESCR("return portion of string");
21372137
DATA(insert OID = 1039 ( getdatabaseencoding PGUID 12 f t f t 0 f 19 "0" 100 0 0 100 getdatabaseencoding - ));
21382138
DESCR("encoding name of current database");
21392139

2140-
DATA(insert OID = 1295 ( pg_char_to_encoding PGUID 12 f t f t 1 f 23 "19" 100 0 0 100 PG_char_to_encoding - ));
2140+
DATA(insert OID = 1717 ( convert PGUID 12 f t f t 2 f 25 "25 19" 100 0 0 100 pg_convert - ));
2141+
DESCR("convert string with specified destination encoding name");
2142+
2143+
DATA(insert OID = 1813 ( convert PGUID 12 f t f t 3 f 25 "25 19 19" 100 0 0 100 pg_convert2 - ));
2144+
DESCR("convert string with specified encoding names");
2145+
2146+
DATA(insert OID = 1264 ( pg_char_to_encoding PGUID 12 f t f t 1 f 23 "19" 100 0 0 100 PG_char_to_encoding - ));
21412147
DESCR("convert encoding name to encoding id");
21422148

21432149
DATA(insert OID = 1597 ( pg_encoding_to_char PGUID 12 f t f t 1 f 19 "23" 100 0 0 100 PG_encoding_to_char - ));

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy