Skip to content

Commit 8b9dd6b

Browse files
committed
Support for KOI8U encoding
1 parent 1cb54c2 commit 8b9dd6b

File tree

10 files changed

+334
-14
lines changed

10 files changed

+334
-14
lines changed

doc/src/sgml/charset.sgml

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.90 2008/09/24 16:30:26 momjian Exp $ -->
1+
<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.91 2009/02/10 19:29:39 petere Exp $ -->
22

33
<chapter id="charset">
44
<title>Localization</>
@@ -457,12 +457,20 @@ initdb --locale=sv_SE
457457
<entry></entry>
458458
</row>
459459
<row>
460-
<entry><literal>KOI8</literal></entry>
461-
<entry><acronym>KOI</acronym>8-R(U)</entry>
462-
<entry>Cyrillic</entry>
460+
<entry><literal>KOI8R</literal></entry>
461+
<entry><acronym>KOI</acronym>8-R</entry>
462+
<entry>Cyrillic (Russian)</entry>
463463
<entry>Yes</entry>
464464
<entry>1</entry>
465-
<entry><literal>KOI8R</></entry>
465+
<entry><literal>KOI8</></entry>
466+
</row>
467+
<row>
468+
<entry><literal>KOI8U</literal></entry>
469+
<entry><acronym>KOI</acronym>8-U</entry>
470+
<entry>Cyrillic (Ukrainian)</entry>
471+
<entry>Yes</entry>
472+
<entry>1</entry>
473+
<entry></entry>
466474
</row>
467475
<row>
468476
<entry><literal>LATIN1</literal></entry>

src/backend/utils/mb/Unicode/UCS_to_most.pl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
#
33
# Copyright (c) 2001-2009, PostgreSQL Global Development Group
44
#
5-
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_most.pl,v 1.6 2009/02/10 16:36:55 petere Exp $
5+
# $PostgreSQL: pgsql/src/backend/utils/mb/Unicode/UCS_to_most.pl,v 1.7 2009/02/10 19:29:39 petere Exp $
66
#
77
# Generate UTF-8 <--> character code conversion tables from
88
# map files provided by Unicode organization.
@@ -43,6 +43,7 @@
4343
'ISO8859_15' => '8859-15.TXT',
4444
'ISO8859_16' => '8859-16.TXT',
4545
'KOI8R' => 'KOI8-R.TXT',
46+
'KOI8U' => 'KOI8-U.TXT',
4647
'GBK' => 'CP936.TXT',
4748
'UHC' => 'CP949.TXT',
4849
'JOHAB' => 'JOHAB.TXT',
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
static pg_local_to_utf LUmapKOI8U[ 128 ] = {
2+
{0x0080, 0xe29480},
3+
{0x0081, 0xe29482},
4+
{0x0082, 0xe2948c},
5+
{0x0083, 0xe29490},
6+
{0x0084, 0xe29494},
7+
{0x0085, 0xe29498},
8+
{0x0086, 0xe2949c},
9+
{0x0087, 0xe294a4},
10+
{0x0088, 0xe294ac},
11+
{0x0089, 0xe294b4},
12+
{0x008a, 0xe294bc},
13+
{0x008b, 0xe29680},
14+
{0x008c, 0xe29684},
15+
{0x008d, 0xe29688},
16+
{0x008e, 0xe2968c},
17+
{0x008f, 0xe29690},
18+
{0x0090, 0xe29691},
19+
{0x0091, 0xe29692},
20+
{0x0092, 0xe29693},
21+
{0x0093, 0xe28ca0},
22+
{0x0094, 0xe296a0},
23+
{0x0095, 0xe28899},
24+
{0x0096, 0xe2889a},
25+
{0x0097, 0xe28988},
26+
{0x0098, 0xe289a4},
27+
{0x0099, 0xe289a5},
28+
{0x009a, 0xc2a0},
29+
{0x009b, 0xe28ca1},
30+
{0x009c, 0xc2b0},
31+
{0x009d, 0xc2b2},
32+
{0x009e, 0xc2b7},
33+
{0x009f, 0xc3b7},
34+
{0x00a0, 0xe29590},
35+
{0x00a1, 0xe29591},
36+
{0x00a2, 0xe29592},
37+
{0x00a3, 0xd191},
38+
{0x00a4, 0xd194},
39+
{0x00a5, 0xe29594},
40+
{0x00a6, 0xd196},
41+
{0x00a7, 0xd197},
42+
{0x00a8, 0xe29597},
43+
{0x00a9, 0xe29598},
44+
{0x00aa, 0xe29599},
45+
{0x00ab, 0xe2959a},
46+
{0x00ac, 0xe2959b},
47+
{0x00ad, 0xd291},
48+
{0x00ae, 0xe2959d},
49+
{0x00af, 0xe2959e},
50+
{0x00b0, 0xe2959f},
51+
{0x00b1, 0xe295a0},
52+
{0x00b2, 0xe295a1},
53+
{0x00b3, 0xd081},
54+
{0x00b4, 0xd084},
55+
{0x00b5, 0xe295a3},
56+
{0x00b6, 0xd086},
57+
{0x00b7, 0xd087},
58+
{0x00b8, 0xe295a6},
59+
{0x00b9, 0xe295a7},
60+
{0x00ba, 0xe295a8},
61+
{0x00bb, 0xe295a9},
62+
{0x00bc, 0xe295aa},
63+
{0x00bd, 0xd290},
64+
{0x00be, 0xe295ac},
65+
{0x00bf, 0xc2a9},
66+
{0x00c0, 0xd18e},
67+
{0x00c1, 0xd0b0},
68+
{0x00c2, 0xd0b1},
69+
{0x00c3, 0xd186},
70+
{0x00c4, 0xd0b4},
71+
{0x00c5, 0xd0b5},
72+
{0x00c6, 0xd184},
73+
{0x00c7, 0xd0b3},
74+
{0x00c8, 0xd185},
75+
{0x00c9, 0xd0b8},
76+
{0x00ca, 0xd0b9},
77+
{0x00cb, 0xd0ba},
78+
{0x00cc, 0xd0bb},
79+
{0x00cd, 0xd0bc},
80+
{0x00ce, 0xd0bd},
81+
{0x00cf, 0xd0be},
82+
{0x00d0, 0xd0bf},
83+
{0x00d1, 0xd18f},
84+
{0x00d2, 0xd180},
85+
{0x00d3, 0xd181},
86+
{0x00d4, 0xd182},
87+
{0x00d5, 0xd183},
88+
{0x00d6, 0xd0b6},
89+
{0x00d7, 0xd0b2},
90+
{0x00d8, 0xd18c},
91+
{0x00d9, 0xd18b},
92+
{0x00da, 0xd0b7},
93+
{0x00db, 0xd188},
94+
{0x00dc, 0xd18d},
95+
{0x00dd, 0xd189},
96+
{0x00de, 0xd187},
97+
{0x00df, 0xd18a},
98+
{0x00e0, 0xd0ae},
99+
{0x00e1, 0xd090},
100+
{0x00e2, 0xd091},
101+
{0x00e3, 0xd0a6},
102+
{0x00e4, 0xd094},
103+
{0x00e5, 0xd095},
104+
{0x00e6, 0xd0a4},
105+
{0x00e7, 0xd093},
106+
{0x00e8, 0xd0a5},
107+
{0x00e9, 0xd098},
108+
{0x00ea, 0xd099},
109+
{0x00eb, 0xd09a},
110+
{0x00ec, 0xd09b},
111+
{0x00ed, 0xd09c},
112+
{0x00ee, 0xd09d},
113+
{0x00ef, 0xd09e},
114+
{0x00f0, 0xd09f},
115+
{0x00f1, 0xd0af},
116+
{0x00f2, 0xd0a0},
117+
{0x00f3, 0xd0a1},
118+
{0x00f4, 0xd0a2},
119+
{0x00f5, 0xd0a3},
120+
{0x00f6, 0xd096},
121+
{0x00f7, 0xd092},
122+
{0x00f8, 0xd0ac},
123+
{0x00f9, 0xd0ab},
124+
{0x00fa, 0xd097},
125+
{0x00fb, 0xd0a8},
126+
{0x00fc, 0xd0ad},
127+
{0x00fd, 0xd0a9},
128+
{0x00fe, 0xd0a7},
129+
{0x00ff, 0xd0aa}
130+
};
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
static pg_utf_to_local ULmapKOI8U[ 128 ] = {
2+
{0xc2a0, 0x009a},
3+
{0xc2a9, 0x00bf},
4+
{0xc2b0, 0x009c},
5+
{0xc2b2, 0x009d},
6+
{0xc2b7, 0x009e},
7+
{0xc3b7, 0x009f},
8+
{0xd081, 0x00b3},
9+
{0xd084, 0x00b4},
10+
{0xd086, 0x00b6},
11+
{0xd087, 0x00b7},
12+
{0xd090, 0x00e1},
13+
{0xd091, 0x00e2},
14+
{0xd092, 0x00f7},
15+
{0xd093, 0x00e7},
16+
{0xd094, 0x00e4},
17+
{0xd095, 0x00e5},
18+
{0xd096, 0x00f6},
19+
{0xd097, 0x00fa},
20+
{0xd098, 0x00e9},
21+
{0xd099, 0x00ea},
22+
{0xd09a, 0x00eb},
23+
{0xd09b, 0x00ec},
24+
{0xd09c, 0x00ed},
25+
{0xd09d, 0x00ee},
26+
{0xd09e, 0x00ef},
27+
{0xd09f, 0x00f0},
28+
{0xd0a0, 0x00f2},
29+
{0xd0a1, 0x00f3},
30+
{0xd0a2, 0x00f4},
31+
{0xd0a3, 0x00f5},
32+
{0xd0a4, 0x00e6},
33+
{0xd0a5, 0x00e8},
34+
{0xd0a6, 0x00e3},
35+
{0xd0a7, 0x00fe},
36+
{0xd0a8, 0x00fb},
37+
{0xd0a9, 0x00fd},
38+
{0xd0aa, 0x00ff},
39+
{0xd0ab, 0x00f9},
40+
{0xd0ac, 0x00f8},
41+
{0xd0ad, 0x00fc},
42+
{0xd0ae, 0x00e0},
43+
{0xd0af, 0x00f1},
44+
{0xd0b0, 0x00c1},
45+
{0xd0b1, 0x00c2},
46+
{0xd0b2, 0x00d7},
47+
{0xd0b3, 0x00c7},
48+
{0xd0b4, 0x00c4},
49+
{0xd0b5, 0x00c5},
50+
{0xd0b6, 0x00d6},
51+
{0xd0b7, 0x00da},
52+
{0xd0b8, 0x00c9},
53+
{0xd0b9, 0x00ca},
54+
{0xd0ba, 0x00cb},
55+
{0xd0bb, 0x00cc},
56+
{0xd0bc, 0x00cd},
57+
{0xd0bd, 0x00ce},
58+
{0xd0be, 0x00cf},
59+
{0xd0bf, 0x00d0},
60+
{0xd180, 0x00d2},
61+
{0xd181, 0x00d3},
62+
{0xd182, 0x00d4},
63+
{0xd183, 0x00d5},
64+
{0xd184, 0x00c6},
65+
{0xd185, 0x00c8},
66+
{0xd186, 0x00c3},
67+
{0xd187, 0x00de},
68+
{0xd188, 0x00db},
69+
{0xd189, 0x00dd},
70+
{0xd18a, 0x00df},
71+
{0xd18b, 0x00d9},
72+
{0xd18c, 0x00d8},
73+
{0xd18d, 0x00dc},
74+
{0xd18e, 0x00c0},
75+
{0xd18f, 0x00d1},
76+
{0xd191, 0x00a3},
77+
{0xd194, 0x00a4},
78+
{0xd196, 0x00a6},
79+
{0xd197, 0x00a7},
80+
{0xd290, 0x00bd},
81+
{0xd291, 0x00ad},
82+
{0xe28899, 0x0095},
83+
{0xe2889a, 0x0096},
84+
{0xe28988, 0x0097},
85+
{0xe289a4, 0x0098},
86+
{0xe289a5, 0x0099},
87+
{0xe28ca0, 0x0093},
88+
{0xe28ca1, 0x009b},
89+
{0xe29480, 0x0080},
90+
{0xe29482, 0x0081},
91+
{0xe2948c, 0x0082},
92+
{0xe29490, 0x0083},
93+
{0xe29494, 0x0084},
94+
{0xe29498, 0x0085},
95+
{0xe2949c, 0x0086},
96+
{0xe294a4, 0x0087},
97+
{0xe294ac, 0x0088},
98+
{0xe294b4, 0x0089},
99+
{0xe294bc, 0x008a},
100+
{0xe29590, 0x00a0},
101+
{0xe29591, 0x00a1},
102+
{0xe29592, 0x00a2},
103+
{0xe29594, 0x00a5},
104+
{0xe29597, 0x00a8},
105+
{0xe29598, 0x00a9},
106+
{0xe29599, 0x00aa},
107+
{0xe2959a, 0x00ab},
108+
{0xe2959b, 0x00ac},
109+
{0xe2959d, 0x00ae},
110+
{0xe2959e, 0x00af},
111+
{0xe2959f, 0x00b0},
112+
{0xe295a0, 0x00b1},
113+
{0xe295a1, 0x00b2},
114+
{0xe295a3, 0x00b5},
115+
{0xe295a6, 0x00b8},
116+
{0xe295a7, 0x00b9},
117+
{0xe295a8, 0x00ba},
118+
{0xe295a9, 0x00bb},
119+
{0xe295aa, 0x00bc},
120+
{0xe295ac, 0x00be},
121+
{0xe29680, 0x008b},
122+
{0xe29684, 0x008c},
123+
{0xe29688, 0x008d},
124+
{0xe2968c, 0x008e},
125+
{0xe29690, 0x008f},
126+
{0xe29691, 0x0090},
127+
{0xe29692, 0x0091},
128+
{0xe29693, 0x0092},
129+
{0xe296a0, 0x0094}
130+
};

src/backend/utils/mb/conversion_procs/Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
# Makefile for utils/mb/conversion_procs
55
#
66
# IDENTIFICATION
7-
# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.20 2008/08/23 20:31:37 momjian Exp $
7+
# $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/Makefile,v 1.21 2009/02/10 19:29:39 petere Exp $
88
#
99
#-------------------------------------------------------------------------
1010

@@ -84,6 +84,8 @@ CONVERSIONS = \
8484
utf8_to_big5 UTF8 BIG5 utf8_to_big5 utf8_and_big5 \
8585
utf8_to_koi8_r UTF8 KOI8R utf8_to_koi8r utf8_and_cyrillic \
8686
koi8_r_to_utf8 KOI8R UTF8 koi8r_to_utf8 utf8_and_cyrillic \
87+
utf8_to_koi8_u UTF8 KOI8U utf8_to_koi8u utf8_and_cyrillic \
88+
koi8_u_to_utf8 KOI8U UTF8 koi8u_to_utf8 utf8_and_cyrillic \
8789
utf8_to_windows_866 UTF8 WIN866 utf8_to_win utf8_and_win \
8890
windows_866_to_utf8 WIN866 UTF8 win_to_utf8 utf8_and_win \
8991
utf8_to_windows_874 UTF8 WIN874 utf8_to_win utf8_and_win \

src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1994, Regents of the University of California
77
*
88
* IDENTIFICATION
9-
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.23 2009/01/29 19:23:40 tgl Exp $
9+
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/utf8_and_cyrillic/utf8_and_cyrillic.c,v 1.24 2009/02/10 19:29:39 petere Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -16,15 +16,23 @@
1616
#include "mb/pg_wchar.h"
1717
#include "../../Unicode/utf8_to_koi8r.map"
1818
#include "../../Unicode/koi8r_to_utf8.map"
19+
#include "../../Unicode/utf8_to_koi8u.map"
20+
#include "../../Unicode/koi8u_to_utf8.map"
1921

2022
PG_MODULE_MAGIC;
2123

2224
PG_FUNCTION_INFO_V1(utf8_to_koi8r);
2325
PG_FUNCTION_INFO_V1(koi8r_to_utf8);
2426

27+
PG_FUNCTION_INFO_V1(utf8_to_koi8u);
28+
PG_FUNCTION_INFO_V1(koi8u_to_utf8);
29+
2530
extern Datum utf8_to_koi8r(PG_FUNCTION_ARGS);
2631
extern Datum koi8r_to_utf8(PG_FUNCTION_ARGS);
2732

33+
extern Datum utf8_to_koi8u(PG_FUNCTION_ARGS);
34+
extern Datum koi8u_to_utf8(PG_FUNCTION_ARGS);
35+
2836
/* ----------
2937
* conv_proc(
3038
* INTEGER, -- source encoding id
@@ -65,3 +73,33 @@ koi8r_to_utf8(PG_FUNCTION_ARGS)
6573

6674
PG_RETURN_VOID();
6775
}
76+
77+
Datum
78+
utf8_to_koi8u(PG_FUNCTION_ARGS)
79+
{
80+
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
81+
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
82+
int len = PG_GETARG_INT32(4);
83+
84+
CHECK_ENCODING_CONVERSION_ARGS(PG_UTF8, PG_KOI8U);
85+
86+
UtfToLocal(src, dest, ULmapKOI8U, NULL,
87+
sizeof(ULmapKOI8U) / sizeof(pg_utf_to_local), 0, PG_KOI8U, len);
88+
89+
PG_RETURN_VOID();
90+
}
91+
92+
Datum
93+
koi8u_to_utf8(PG_FUNCTION_ARGS)
94+
{
95+
unsigned char *src = (unsigned char *) PG_GETARG_CSTRING(2);
96+
unsigned char *dest = (unsigned char *) PG_GETARG_CSTRING(3);
97+
int len = PG_GETARG_INT32(4);
98+
99+
CHECK_ENCODING_CONVERSION_ARGS(PG_KOI8U, PG_UTF8);
100+
101+
LocalToUtf(src, dest, LUmapKOI8U, NULL,
102+
sizeof(LUmapKOI8U) / sizeof(pg_local_to_utf), 0, PG_KOI8U, len);
103+
104+
PG_RETURN_VOID();
105+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy