Skip to content

Commit e2d088d

Browse files
committed
Allow direct conversion between EUC_JP and SJIS to improve
performance. patches submitted by Atsushi Ogawa.
1 parent 1fa87fa commit e2d088d

File tree

1 file changed

+201
-11
lines changed

1 file changed

+201
-11
lines changed

src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c

Lines changed: 201 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1994, Regents of the University of California
77
*
88
* IDENTIFICATION
9-
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.10 2005/06/10 16:43:56 ishii Exp $
9+
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.11 2005/06/24 13:56:39 ishii Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -58,23 +58,21 @@ static void sjis2mic(unsigned char *sjis, unsigned char *p, int len);
5858
static void mic2sjis(unsigned char *mic, unsigned char *p, int len);
5959
static void euc_jp2mic(unsigned char *euc, unsigned char *p, int len);
6060
static void mic2euc_jp(unsigned char *mic, unsigned char *p, int len);
61+
static void euc_jp2sjis(unsigned char *mic, unsigned char *p, int len);
62+
static void sjis2euc_jp(unsigned char *mic, unsigned char *p, int len);
6163

6264
Datum
6365
euc_jp_to_sjis(PG_FUNCTION_ARGS)
6466
{
6567
unsigned char *src = PG_GETARG_CSTRING(2);
6668
unsigned char *dest = PG_GETARG_CSTRING(3);
6769
int len = PG_GETARG_INT32(4);
68-
unsigned char *buf;
6970

7071
Assert(PG_GETARG_INT32(0) == PG_EUC_JP);
7172
Assert(PG_GETARG_INT32(1) == PG_SJIS);
7273
Assert(len >= 0);
7374

74-
buf = palloc(len * ENCODING_GROWTH_RATE);
75-
euc_jp2mic(src, buf, len);
76-
mic2sjis(buf, dest, strlen(buf));
77-
pfree(buf);
75+
euc_jp2sjis(src, dest, len);
7876

7977
PG_RETURN_VOID();
8078
}
@@ -85,16 +83,12 @@ sjis_to_euc_jp(PG_FUNCTION_ARGS)
8583
unsigned char *src = PG_GETARG_CSTRING(2);
8684
unsigned char *dest = PG_GETARG_CSTRING(3);
8785
int len = PG_GETARG_INT32(4);
88-
unsigned char *buf;
8986

9087
Assert(PG_GETARG_INT32(0) == PG_SJIS);
9188
Assert(PG_GETARG_INT32(1) == PG_EUC_JP);
9289
Assert(len >= 0);
9390

94-
buf = palloc(len * ENCODING_GROWTH_RATE);
95-
sjis2mic(src, buf, len);
96-
mic2euc_jp(buf, dest, strlen(buf));
97-
pfree(buf);
91+
sjis2euc_jp(src, dest, len);
9892

9993
PG_RETURN_VOID();
10094
}
@@ -454,3 +448,199 @@ mic2euc_jp(unsigned char *mic, unsigned char *p, int len)
454448
}
455449
*p = '\0';
456450
}
451+
452+
/*
453+
* EUC_JP -> SJIS
454+
*/
455+
static void
456+
euc_jp2sjis(unsigned char *euc, unsigned char *p, int len)
457+
{
458+
int c1,
459+
c2,
460+
k;
461+
unsigned char *euc_end = euc + len;
462+
463+
while (euc_end >= euc && (c1 = *euc++))
464+
{
465+
if(c1 < 0x80)
466+
{
467+
/* should be ASCII */
468+
*p++ = c1;
469+
}
470+
else if (c1 == SS2)
471+
{
472+
/* hankaku kana? */
473+
*p++ = *euc++;
474+
}
475+
else if (c1 == SS3)
476+
{
477+
/* JIS X0212 kanji? */
478+
c1 = *euc++;
479+
c2 = *euc++;
480+
k = c1 << 8 | c2;
481+
if (k >= 0xf5a1)
482+
{
483+
/* UDC2 */
484+
c1 -= 0x54;
485+
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
486+
*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
487+
}
488+
else
489+
{
490+
int i, k2;
491+
492+
/* IBM kanji */
493+
for (i = 0;; i++)
494+
{
495+
k2 = ibmkanji[i].euc & 0xffff;
496+
if (k2 == 0xffff)
497+
{
498+
*p++ = PGSJISALTCODE >> 8;
499+
*p++ = PGSJISALTCODE & 0xff;
500+
break;
501+
}
502+
if (k2 == k)
503+
{
504+
k = ibmkanji[i].sjis;
505+
*p++ = k >> 8;
506+
*p++ = k & 0xff;
507+
break;
508+
}
509+
}
510+
}
511+
}
512+
else
513+
{
514+
/* JIS X0208 kanji? */
515+
c2 = *euc++;
516+
k = (c1 << 8) | (c2 & 0xff);
517+
if (k >= 0xf5a1)
518+
{
519+
/* UDC1 */
520+
c1 -= 0x54;
521+
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
522+
}
523+
else
524+
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
525+
*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
526+
}
527+
}
528+
*p = '\0';
529+
}
530+
531+
/*
532+
* SJIS ---> EUC_JP
533+
*/
534+
static void
535+
sjis2euc_jp(unsigned char *sjis, unsigned char *p, int len)
536+
{
537+
int c1,
538+
c2,
539+
i,
540+
k,
541+
k2;
542+
unsigned char *sjis_end = sjis + len;
543+
544+
while (sjis_end >= sjis && (c1 = *sjis++))
545+
{
546+
if(c1 < 0x80)
547+
{
548+
/* should be ASCII */
549+
*p++ = c1;
550+
}
551+
else if (c1 >= 0xa1 && c1 <= 0xdf)
552+
{
553+
/* JIS X0201 (1 byte kana) */
554+
*p++ = SS2;
555+
*p++ = c1;
556+
}
557+
else
558+
{
559+
/*
560+
* JIS X0208, X0212, user defined extended characters
561+
*/
562+
c2 = *sjis++;
563+
k = (c1 << 8) + c2;
564+
if (k >= 0xed40 && k < 0xf040)
565+
{
566+
/* NEC selection IBM kanji */
567+
for (i = 0;; i++)
568+
{
569+
k2 = ibmkanji[i].nec;
570+
if (k2 == 0xffff)
571+
break;
572+
if (k2 == k)
573+
{
574+
k = ibmkanji[i].sjis;
575+
c1 = (k >> 8) & 0xff;
576+
c2 = k & 0xff;
577+
}
578+
}
579+
}
580+
581+
if (k < 0xeb3f)
582+
{
583+
/* JIS X0208 */
584+
*p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
585+
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
586+
}
587+
else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
588+
{
589+
/* NEC selection IBM kanji - Other undecided justice */
590+
*p++ = PGEUCALTCODE >> 8;
591+
*p++ = PGEUCALTCODE & 0xff;
592+
}
593+
else if (k >= 0xf040 && k < 0xf540)
594+
{
595+
/*
596+
* UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
597+
* 0x7e7e EUC 0xf5a1 - 0xfefe
598+
*/
599+
c1 -= 0x6f;
600+
*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
601+
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
602+
}
603+
else if (k >= 0xf540 && k < 0xfa40)
604+
{
605+
/*
606+
* UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
607+
* 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
608+
*/
609+
*p++ = SS3;
610+
c1 -= 0x74;
611+
*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
612+
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
613+
}
614+
else if (k >= 0xfa40)
615+
{
616+
/*
617+
* mapping IBM kanji to X0208 and X0212
618+
*
619+
*/
620+
for (i = 0;; i++)
621+
{
622+
k2 = ibmkanji[i].sjis;
623+
if (k2 == 0xffff)
624+
break;
625+
if (k2 == k)
626+
{
627+
k = ibmkanji[i].euc;
628+
if (k >= 0x8f0000)
629+
{
630+
*p++ = SS3;
631+
*p++ = 0x80 | ((k & 0xff00) >> 8);
632+
*p++ = 0x80 | (k & 0xff);
633+
}
634+
else
635+
{
636+
*p++ = 0x80 | (k >> 8);
637+
*p++ = 0x80 | (k & 0xff);
638+
}
639+
}
640+
}
641+
}
642+
}
643+
}
644+
*p = '\0';
645+
}
646+

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy