Skip to content

Commit 1032445

Browse files
committed
TODO item:
* Make n of CHAR(n)/VARCHAR(n) the number of letters, not bytes
1 parent b08e86d commit 1032445

File tree

3 files changed

+106
-50
lines changed

3 files changed

+106
-50
lines changed

src/backend/utils/adt/varchar.c

Lines changed: 77 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
*
99
*
1010
* IDENTIFICATION
11-
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.80 2001/06/09 23:21:55 petere Exp $
11+
* $Header: /cvsroot/pgsql/src/backend/utils/adt/varchar.c,v 1.81 2001/07/15 11:07:37 ishii Exp $
1212
*
1313
*-------------------------------------------------------------------------
1414
*/
@@ -73,33 +73,65 @@ bpcharin(PG_FUNCTION_ARGS)
7373
char *r;
7474
size_t len, maxlen;
7575
int i;
76+
#ifdef MULTIBYTE
77+
int charlen; /* number of charcters in the input string */
78+
#endif
7679

7780
len = strlen(s);
81+
#ifdef MULTIBYTE
82+
charlen = pg_mbstrlen(s);
83+
#endif
7884

7985
/* If typmod is -1 (or invalid), use the actual string length */
8086
if (atttypmod < (int32) VARHDRSZ)
87+
#ifdef MULTIBYTE
88+
maxlen = charlen;
89+
#else
8190
maxlen = len;
91+
#endif
8292
else
8393
maxlen = atttypmod - VARHDRSZ;
8494

95+
#ifdef MULTIBYTE
96+
if (charlen > maxlen)
97+
#else
8598
if (len > maxlen)
99+
#endif
86100
{
87101
/* Verify that extra characters are spaces, and clip them off */
88102
#ifdef MULTIBYTE
89-
size_t mbmaxlen = pg_mbcliplen(s, len, maxlen);
90-
103+
size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
104+
/*
105+
* at this point, len is the actual BYTE length of the
106+
* input string, maxlen is the max number of
107+
* CHARACTERS allowed for this bpchar type.
108+
*/
91109
if (strspn(s + mbmaxlen, " ") == len - mbmaxlen)
92110
len = mbmaxlen;
93111
else
94112
elog(ERROR, "value too long for type character(%d)", maxlen);
95-
Assert(len <= maxlen);
113+
/*
114+
* XXX: at this point, maxlen is the necessary byte
115+
* length, not the number of CHARACTERS!
116+
*/
117+
maxlen = len;
96118
#else
97119
if (strspn(s + maxlen, " ") == len - maxlen)
98120
len = maxlen;
99121
else
100122
elog(ERROR, "value too long for type character(%d)", maxlen);
101123
#endif
102124
}
125+
#ifdef MULTIBYTE
126+
else
127+
{
128+
/*
129+
* XXX: at this point, maxlen is the necessary byte
130+
* length, not the number of CHARACTERS!
131+
*/
132+
maxlen = len + (maxlen - charlen);
133+
}
134+
#endif
103135

104136
result = palloc(maxlen + VARHDRSZ);
105137
VARATT_SIZEP(result) = maxlen + VARHDRSZ;
@@ -158,19 +190,29 @@ bpchar(PG_FUNCTION_ARGS)
158190
char *r;
159191
char *s;
160192
int i;
193+
#ifdef MULTIBYTE
194+
int charlen; /* number of charcters in the input string
195+
+ VARHDRSZ*/
196+
#endif
161197

162198
len = VARSIZE(source);
199+
#ifdef MULTIBYTE
200+
charlen = pg_mbstrlen_with_len(VARDATA(source), len - VARHDRSZ) + VARHDRSZ;
201+
#endif
163202
/* No work if typmod is invalid or supplied data matches it already */
164203
if (maxlen < (int32) VARHDRSZ || len == maxlen)
165204
PG_RETURN_BPCHAR_P(source);
166-
205+
#ifdef MULTIBYTE
206+
if (charlen > maxlen)
207+
#else
167208
if (len > maxlen)
209+
#endif
168210
{
169211
/* Verify that extra characters are spaces, and clip them off */
170212
#ifdef MULTIBYTE
171213
size_t maxmblen;
172214

173-
maxmblen = pg_mbcliplen(VARDATA(source), len - VARHDRSZ,
215+
maxmblen = pg_mbcharcliplen(VARDATA(source), len - VARHDRSZ,
174216
maxlen - VARHDRSZ) + VARHDRSZ;
175217

176218
for (i = maxmblen - VARHDRSZ; i < len - VARHDRSZ; i++)
@@ -179,7 +221,11 @@ bpchar(PG_FUNCTION_ARGS)
179221
maxlen - VARHDRSZ);
180222

181223
len = maxmblen;
182-
Assert(len <= maxlen);
224+
/*
225+
* XXX: at this point, maxlen is the necessary byte
226+
* length+VARHDRSZ, not the number of CHARACTERS!
227+
*/
228+
maxlen = len;
183229
#else
184230
for (i = maxlen - VARHDRSZ; i < len - VARHDRSZ; i++)
185231
if (*(VARDATA(source) + i) != ' ')
@@ -189,6 +235,16 @@ bpchar(PG_FUNCTION_ARGS)
189235
len = maxlen;
190236
#endif
191237
}
238+
#ifdef MULTIBYTE
239+
else
240+
{
241+
/*
242+
* XXX: at this point, maxlen is the necessary byte
243+
* length+VARHDRSZ, not the number of CHARACTERS!
244+
*/
245+
maxlen = len + (maxlen - charlen);
246+
}
247+
#endif
192248

193249
s = VARDATA(source);
194250

@@ -333,9 +389,12 @@ name_bpchar(PG_FUNCTION_ARGS)
333389
* Convert a C string to VARCHAR internal representation. atttypmod
334390
* is the declared length of the type plus VARHDRSZ.
335391
*
336-
* If the C string is too long, raise an error, unless the extra
337-
* characters are spaces, in which case they're truncated. (per SQL)
338-
*/
392+
* Note that if MULTIBYTE is enabled, atttypmod is regarded as the
393+
* number of characters, rather than number of bytes.
394+
*
395+
* If the C string is too long,
396+
* raise an error, unless the extra characters are spaces, in which
397+
* case they're truncated. (per SQL) */
339398
Datum
340399
varcharin(PG_FUNCTION_ARGS)
341400
{
@@ -354,7 +413,7 @@ varcharin(PG_FUNCTION_ARGS)
354413
{
355414
/* Verify that extra characters are spaces, and clip them off */
356415
#ifdef MULTIBYTE
357-
size_t mbmaxlen = pg_mbcliplen(s, len, maxlen);
416+
size_t mbmaxlen = pg_mbcharcliplen(s, len, maxlen);
358417

359418
if (strspn(s + mbmaxlen, " ") == len - mbmaxlen)
360419
len = mbmaxlen;
@@ -428,7 +487,7 @@ varchar(PG_FUNCTION_ARGS)
428487
size_t maxmblen;
429488

430489
/* truncate multi-byte string preserving multi-byte boundary */
431-
maxmblen = pg_mbcliplen(VARDATA(source), len - VARHDRSZ,
490+
maxmblen = pg_mbcharcliplen(VARDATA(source), len - VARHDRSZ,
432491
maxlen - VARHDRSZ) + VARHDRSZ;
433492

434493
for (i = maxmblen - VARHDRSZ; i < len - VARHDRSZ; i++)
@@ -515,22 +574,9 @@ bpcharlen(PG_FUNCTION_ARGS)
515574
BpChar *arg = PG_GETARG_BPCHAR_P(0);
516575

517576
#ifdef MULTIBYTE
518-
unsigned char *s;
519-
int len,
520-
l,
521-
wl;
522-
523-
l = VARSIZE(arg) - VARHDRSZ;
524-
len = 0;
525-
s = VARDATA(arg);
526-
while (l > 0)
527-
{
528-
wl = pg_mblen(s);
529-
l -= wl;
530-
s += wl;
531-
len++;
532-
}
533-
PG_RETURN_INT32(len);
577+
PG_RETURN_INT32(
578+
pg_mbstrlen_with_len(VARDATA(arg), VARSIZE(arg) - VARHDRSZ)
579+
);
534580
#else
535581
PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ);
536582
#endif
@@ -736,22 +782,9 @@ varcharlen(PG_FUNCTION_ARGS)
736782
VarChar *arg = PG_GETARG_VARCHAR_P(0);
737783

738784
#ifdef MULTIBYTE
739-
unsigned char *s;
740-
int len,
741-
l,
742-
wl;
743-
744-
len = 0;
745-
s = VARDATA(arg);
746-
l = VARSIZE(arg) - VARHDRSZ;
747-
while (l > 0)
748-
{
749-
wl = pg_mblen(s);
750-
l -= wl;
751-
s += wl;
752-
len++;
753-
}
754-
PG_RETURN_INT32(len);
785+
PG_RETURN_INT32(
786+
pg_mbstrlen_with_len(VARDATA(arg), VARSIZE(arg) - VARHDRSZ)
787+
);
755788
#else
756789
PG_RETURN_INT32(VARSIZE(arg) - VARHDRSZ);
757790
#endif

src/backend/utils/mb/mbutils.c

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
* client encoding and server internal encoding.
44
* (currently mule internal code (mic) is used)
55
* Tatsuo Ishii
6-
* $Id: mbutils.c,v 1.17 2001/04/16 02:42:01 tgl Exp $
6+
* $Id: mbutils.c,v 1.18 2001/07/15 11:07:36 ishii Exp $
77
*/
88
#include "postgres.h"
99

@@ -241,9 +241,9 @@ pg_mbstrlen_with_len(const unsigned char *mbstr, int limit)
241241
}
242242

243243
/*
244-
* returns the length of a multi-byte string
244+
* returns the byte length of a multi-byte string
245245
* (not necessarily NULL terminated)
246-
* that is not longer than limit.
246+
* that is no longer than limit.
247247
* this function does not break multi-byte word boundary.
248248
*/
249249
int
@@ -267,8 +267,30 @@ pg_mbcliplen(const unsigned char *mbstr, int len, int limit)
267267
}
268268

269269
/*
270-
* functions for utils/init
271-
*/
270+
* Similar to pg_mbcliplen but the limit parameter specifies the
271+
* character length, not the byte length. */
272+
int
273+
pg_mbcharcliplen(const unsigned char *mbstr, int len, int limit)
274+
{
275+
int clen = 0;
276+
int nch = 0;
277+
int l;
278+
279+
while (len > 0 && *mbstr)
280+
{
281+
l = pg_mblen(mbstr);
282+
nch++;
283+
if (nch > limit)
284+
break;
285+
clen += l;
286+
len -= l;
287+
mbstr += l;
288+
}
289+
return (clen);
290+
}
291+
292+
/*
293+
* functions for utils/init */
272294
static int DatabaseEncoding = MULTIBYTE;
273295

274296
void

src/include/mb/pg_wchar.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* $Id: pg_wchar.h,v 1.26 2001/05/03 21:38:44 momjian Exp $ */
1+
/* $Id: pg_wchar.h,v 1.27 2001/07/15 11:07:37 ishii Exp $ */
22

33
#ifndef PG_WCHAR_H
44
#define PG_WCHAR_H
@@ -136,6 +136,7 @@ extern int pg_mic_mblen(const unsigned char *);
136136
extern int pg_mbstrlen(const unsigned char *);
137137
extern int pg_mbstrlen_with_len(const unsigned char *, int);
138138
extern int pg_mbcliplen(const unsigned char *, int, int);
139+
extern int pg_mbcharcliplen(const unsigned char *, int, int);
139140
extern pg_encoding_conv_tbl *pg_get_encent_by_encoding(int);
140141
extern int pg_set_client_encoding(int);
141142
extern int pg_get_client_encoding(void);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy