Skip to content

Commit b80e106

Browse files
committed
Add mbverifystr() functions specific to each encoding.
This makes pg_verify_mbstr() function faster, by allowing more efficient encoding-specific implementations. All the implementations included in this commit are pretty naive, they just call the same encoding-specific verifychar functions that were used previously, but that already gives a performance boost because the tight character-at-a-time loop is simpler. Reviewed-by: John Naylor Discussion: https://www.postgresql.org/message-id/e7861509-3960-538a-9025-b75a61188e01@iki.fi
1 parent a3367aa commit b80e106

File tree

9 files changed

+493
-101
lines changed

9 files changed

+493
-101
lines changed

src/backend/commands/extension.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -682,7 +682,7 @@ read_extension_script_file(const ExtensionControlFile *control,
682682
src_encoding = control->encoding;
683683

684684
/* make sure that source string is valid in the expected encoding */
685-
pg_verify_mbstr_len(src_encoding, src_str, len, false);
685+
(void) pg_verify_mbstr(src_encoding, src_str, len, false);
686686

687687
/*
688688
* Convert the encoding to the database encoding. read_whole_file

src/backend/utils/mb/conv.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -653,7 +653,7 @@ LocalToUtf(const unsigned char *iso, int len,
653653
continue;
654654
}
655655

656-
l = pg_encoding_verifymb(encoding, (const char *) iso, len);
656+
l = pg_encoding_verifymbchar(encoding, (const char *) iso, len);
657657
if (l < 0)
658658
break;
659659

src/backend/utils/mb/conversion_procs/euc2004_sjis2004/euc2004_sjis2004.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ euc_jis_20042shift_jis_2004(const unsigned char *euc, unsigned char *p, int len)
8787
continue;
8888
}
8989

90-
l = pg_encoding_verifymb(PG_EUC_JIS_2004, (const char *) euc, len);
90+
l = pg_encoding_verifymbchar(PG_EUC_JIS_2004, (const char *) euc, len);
9191

9292
if (l < 0)
9393
report_invalid_encoding(PG_EUC_JIS_2004,
@@ -238,7 +238,7 @@ shift_jis_20042euc_jis_2004(const unsigned char *sjis, unsigned char *p, int len
238238
continue;
239239
}
240240

241-
l = pg_encoding_verifymb(PG_SHIFT_JIS_2004, (const char *) sjis, len);
241+
l = pg_encoding_verifymbchar(PG_SHIFT_JIS_2004, (const char *) sjis, len);
242242

243243
if (l < 0 || l > len)
244244
report_invalid_encoding(PG_SHIFT_JIS_2004,

src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -291,7 +291,7 @@ mic2sjis(const unsigned char *mic, unsigned char *p, int len)
291291
len--;
292292
continue;
293293
}
294-
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
294+
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
295295
if (l < 0)
296296
report_invalid_encoding(PG_MULE_INTERNAL,
297297
(const char *) mic, len);
@@ -381,7 +381,7 @@ euc_jp2mic(const unsigned char *euc, unsigned char *p, int len)
381381
len--;
382382
continue;
383383
}
384-
l = pg_encoding_verifymb(PG_EUC_JP, (const char *) euc, len);
384+
l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
385385
if (l < 0)
386386
report_invalid_encoding(PG_EUC_JP,
387387
(const char *) euc, len);
@@ -431,7 +431,7 @@ mic2euc_jp(const unsigned char *mic, unsigned char *p, int len)
431431
len--;
432432
continue;
433433
}
434-
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
434+
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
435435
if (l < 0)
436436
report_invalid_encoding(PG_MULE_INTERNAL,
437437
(const char *) mic, len);
@@ -485,7 +485,7 @@ euc_jp2sjis(const unsigned char *euc, unsigned char *p, int len)
485485
len--;
486486
continue;
487487
}
488-
l = pg_encoding_verifymb(PG_EUC_JP, (const char *) euc, len);
488+
l = pg_encoding_verifymbchar(PG_EUC_JP, (const char *) euc, len);
489489
if (l < 0)
490490
report_invalid_encoding(PG_EUC_JP,
491491
(const char *) euc, len);
@@ -580,7 +580,7 @@ sjis2euc_jp(const unsigned char *sjis, unsigned char *p, int len)
580580
len--;
581581
continue;
582582
}
583-
l = pg_encoding_verifymb(PG_SJIS, (const char *) sjis, len);
583+
l = pg_encoding_verifymbchar(PG_SJIS, (const char *) sjis, len);
584584
if (l < 0)
585585
report_invalid_encoding(PG_SJIS,
586586
(const char *) sjis, len);

src/backend/utils/mb/conversion_procs/euc_kr_and_mic/euc_kr_and_mic.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ euc_kr2mic(const unsigned char *euc, unsigned char *p, int len)
7676
c1 = *euc;
7777
if (IS_HIGHBIT_SET(c1))
7878
{
79-
l = pg_encoding_verifymb(PG_EUC_KR, (const char *) euc, len);
79+
l = pg_encoding_verifymbchar(PG_EUC_KR, (const char *) euc, len);
8080
if (l != 2)
8181
report_invalid_encoding(PG_EUC_KR,
8282
(const char *) euc, len);
@@ -122,7 +122,7 @@ mic2euc_kr(const unsigned char *mic, unsigned char *p, int len)
122122
len--;
123123
continue;
124124
}
125-
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
125+
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
126126
if (l < 0)
127127
report_invalid_encoding(PG_MULE_INTERNAL,
128128
(const char *) mic, len);

src/backend/utils/mb/conversion_procs/euc_tw_and_big5/euc_tw_and_big5.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,7 @@ euc_tw2mic(const unsigned char *euc, unsigned char *p, int len)
148148
c1 = *euc;
149149
if (IS_HIGHBIT_SET(c1))
150150
{
151-
l = pg_encoding_verifymb(PG_EUC_TW, (const char *) euc, len);
151+
l = pg_encoding_verifymbchar(PG_EUC_TW, (const char *) euc, len);
152152
if (l < 0)
153153
report_invalid_encoding(PG_EUC_TW,
154154
(const char *) euc, len);
@@ -213,7 +213,7 @@ mic2euc_tw(const unsigned char *mic, unsigned char *p, int len)
213213
len--;
214214
continue;
215215
}
216-
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
216+
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
217217
if (l < 0)
218218
report_invalid_encoding(PG_MULE_INTERNAL,
219219
(const char *) mic, len);
@@ -272,7 +272,7 @@ big52mic(const unsigned char *big5, unsigned char *p, int len)
272272
len--;
273273
continue;
274274
}
275-
l = pg_encoding_verifymb(PG_BIG5, (const char *) big5, len);
275+
l = pg_encoding_verifymbchar(PG_BIG5, (const char *) big5, len);
276276
if (l < 0)
277277
report_invalid_encoding(PG_BIG5,
278278
(const char *) big5, len);
@@ -321,7 +321,7 @@ mic2big5(const unsigned char *mic, unsigned char *p, int len)
321321
len--;
322322
continue;
323323
}
324-
l = pg_encoding_verifymb(PG_MULE_INTERNAL, (const char *) mic, len);
324+
l = pg_encoding_verifymbchar(PG_MULE_INTERNAL, (const char *) mic, len);
325325
if (l < 0)
326326
report_invalid_encoding(PG_MULE_INTERNAL,
327327
(const char *) mic, len);

src/backend/utils/mb/mbutils.c

Lines changed: 22 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -519,7 +519,7 @@ pg_convert(PG_FUNCTION_ARGS)
519519
/* make sure that source string is valid */
520520
len = VARSIZE_ANY_EXHDR(string);
521521
src_str = VARDATA_ANY(string);
522-
pg_verify_mbstr_len(src_encoding, src_str, len, false);
522+
(void) pg_verify_mbstr(src_encoding, src_str, len, false);
523523

524524
/* perform conversion */
525525
dest_str = (char *) pg_do_encoding_conversion((unsigned char *) unconstify(char *, src_str),
@@ -1215,10 +1215,10 @@ static bool
12151215
pg_generic_charinc(unsigned char *charptr, int len)
12161216
{
12171217
unsigned char *lastbyte = charptr + len - 1;
1218-
mbverifier mbverify;
1218+
mbchar_verifier mbverify;
12191219

12201220
/* We can just invoke the character verifier directly. */
1221-
mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverify;
1221+
mbverify = pg_wchar_table[GetDatabaseEncoding()].mbverifychar;
12221222

12231223
while (*lastbyte < (unsigned char) 255)
12241224
{
@@ -1445,8 +1445,7 @@ pg_database_encoding_max_length(void)
14451445
bool
14461446
pg_verifymbstr(const char *mbstr, int len, bool noError)
14471447
{
1448-
return
1449-
pg_verify_mbstr_len(GetDatabaseEncoding(), mbstr, len, noError) >= 0;
1448+
return pg_verify_mbstr(GetDatabaseEncoding(), mbstr, len, noError);
14501449
}
14511450

14521451
/*
@@ -1456,7 +1455,18 @@ pg_verifymbstr(const char *mbstr, int len, bool noError)
14561455
bool
14571456
pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
14581457
{
1459-
return pg_verify_mbstr_len(encoding, mbstr, len, noError) >= 0;
1458+
int oklen;
1459+
1460+
Assert(PG_VALID_ENCODING(encoding));
1461+
1462+
oklen = pg_wchar_table[encoding].mbverifystr((const unsigned char *) mbstr, len);
1463+
if (oklen != len)
1464+
{
1465+
if (noError)
1466+
return false;
1467+
report_invalid_encoding(encoding, mbstr + oklen, len - oklen);
1468+
}
1469+
return true;
14601470
}
14611471

14621472
/*
@@ -1469,11 +1479,14 @@ pg_verify_mbstr(int encoding, const char *mbstr, int len, bool noError)
14691479
* If OK, return length of string in the encoding.
14701480
* If a problem is found, return -1 when noError is
14711481
* true; when noError is false, ereport() a descriptive message.
1482+
*
1483+
* Note: We cannot use the faster encoding-specific mbverifystr() function
1484+
* here, because we need to count the number of characters in the string.
14721485
*/
14731486
int
14741487
pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
14751488
{
1476-
mbverifier mbverify;
1489+
mbchar_verifier mbverifychar;
14771490
int mb_len;
14781491

14791492
Assert(PG_VALID_ENCODING(encoding));
@@ -1493,7 +1506,7 @@ pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
14931506
}
14941507

14951508
/* fetch function pointer just once */
1496-
mbverify = pg_wchar_table[encoding].mbverify;
1509+
mbverifychar = pg_wchar_table[encoding].mbverifychar;
14971510

14981511
mb_len = 0;
14991512

@@ -1516,7 +1529,7 @@ pg_verify_mbstr_len(int encoding, const char *mbstr, int len, bool noError)
15161529
report_invalid_encoding(encoding, mbstr, len);
15171530
}
15181531

1519-
l = (*mbverify) ((const unsigned char *) mbstr, len);
1532+
l = (*mbverifychar) ((const unsigned char *) mbstr, len);
15201533

15211534
if (l < 0)
15221535
{

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy