Skip to content

Commit 1efd5ff

Browse files
committed
Add a pg_encoding_mbcliplen() function that is just like pg_mbcliplen()
except the caller can specify the encoding to work in; this will be needed for pg_stat_statements. In passing, do some marginal efficiency hacking and clean up some comments. Also, prevent the single-byte-encoding code path from fetching one byte past the stated length of the string (this last is a bug that might need to be back-patched at some point).
1 parent 74ef810 commit 1efd5ff

File tree

2 files changed

+38
-23
lines changed

2 files changed

+38
-23
lines changed

src/backend/utils/mb/mbutils.c

Lines changed: 35 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
* (currently mule internal code (mic) is used)
55
* Tatsuo Ishii
66
*
7-
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.75 2008/11/11 03:01:20 tgl Exp $
7+
* $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.76 2009/01/04 18:37:35 tgl Exp $
88
*/
99
#include "postgres.h"
1010

@@ -710,14 +710,14 @@ pg_encoding_mb2wchar_with_len(int encoding,
710710
return (*pg_wchar_table[encoding].mb2wchar_with_len) ((const unsigned char *) from, to, len);
711711
}
712712

713-
/* returns the byte length of a multibyte word */
713+
/* returns the byte length of a multibyte character */
714714
int
715715
pg_mblen(const char *mbstr)
716716
{
717717
return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) ((const unsigned char *) mbstr));
718718
}
719719

720-
/* returns the display length of a multibyte word */
720+
/* returns the display length of a multibyte character */
721721
int
722722
pg_dsplen(const char *mbstr)
723723
{
@@ -767,23 +767,37 @@ pg_mbstrlen_with_len(const char *mbstr, int limit)
767767

768768
/*
769769
* returns the byte length of a multibyte string
770-
* (not necessarily NULL terminated)
770+
* (not necessarily NULL terminated)
771771
* that is no longer than limit.
772-
* this function does not break multibyte word boundary.
772+
* this function does not break multibyte character boundary.
773773
*/
774774
int
775775
pg_mbcliplen(const char *mbstr, int len, int limit)
776776
{
777+
return pg_encoding_mbcliplen(DatabaseEncoding->encoding, mbstr,
778+
len, limit);
779+
}
780+
781+
/*
782+
* pg_mbcliplen with specified encoding
783+
*/
784+
int
785+
pg_encoding_mbcliplen(int encoding, const char *mbstr,
786+
int len, int limit)
787+
{
788+
mblen_converter mblen_fn;
777789
int clen = 0;
778790
int l;
779791

780792
/* optimization for single byte encoding */
781-
if (pg_database_encoding_max_length() == 1)
793+
if (pg_encoding_max_length(encoding) == 1)
782794
return cliplen(mbstr, len, limit);
783795

796+
mblen_fn = pg_wchar_table[encoding].mblen;
797+
784798
while (len > 0 && *mbstr)
785799
{
786-
l = pg_mblen(mbstr);
800+
l = (*mblen_fn) ((const unsigned char *) mbstr);
787801
if ((clen + l) > limit)
788802
break;
789803
clen += l;
@@ -797,7 +811,8 @@ pg_mbcliplen(const char *mbstr, int len, int limit)
797811

798812
/*
799813
* Similar to pg_mbcliplen except the limit parameter specifies the
800-
* character length, not the byte length. */
814+
* character length, not the byte length.
815+
*/
801816
int
802817
pg_mbcharcliplen(const char *mbstr, int len, int limit)
803818
{
@@ -822,6 +837,18 @@ pg_mbcharcliplen(const char *mbstr, int len, int limit)
822837
return clen;
823838
}
824839

840+
/* mbcliplen for any single-byte encoding */
841+
static int
842+
cliplen(const char *str, int len, int limit)
843+
{
844+
int l = 0;
845+
846+
len = Min(len, limit);
847+
while (l < len && str[l])
848+
l++;
849+
return l;
850+
}
851+
825852
void
826853
SetDatabaseEncoding(int encoding)
827854
{
@@ -884,17 +911,3 @@ pg_client_encoding(PG_FUNCTION_ARGS)
884911
Assert(ClientEncoding);
885912
return DirectFunctionCall1(namein, CStringGetDatum(ClientEncoding->name));
886913
}
887-
888-
static int
889-
cliplen(const char *str, int len, int limit)
890-
{
891-
int l = 0;
892-
const char *s;
893-
894-
for (s = str; *s; s++, l++)
895-
{
896-
if (l >= len || l >= limit)
897-
return l;
898-
}
899-
return (s - str);
900-
}

src/include/mb/pg_wchar.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
77
* Portions Copyright (c) 1994, Regents of the University of California
88
*
9-
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.81 2009/01/01 17:23:59 momjian Exp $
9+
* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.82 2009/01/04 18:37:36 tgl Exp $
1010
*
1111
* NOTES
1212
* This is used both by the backend and by libpq, but should not be
@@ -358,6 +358,8 @@ extern int pg_mic_mblen(const unsigned char *mbstr);
358358
extern int pg_mbstrlen(const char *mbstr);
359359
extern int pg_mbstrlen_with_len(const char *mbstr, int len);
360360
extern int pg_mbcliplen(const char *mbstr, int len, int limit);
361+
extern int pg_encoding_mbcliplen(int encoding, const char *mbstr,
362+
int len, int limit);
361363
extern int pg_mbcharcliplen(const char *mbstr, int len, int imit);
362364
extern int pg_encoding_max_length(int encoding);
363365
extern int pg_database_encoding_max_length(void);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy