Skip to content

Commit 5bc429a

Browse files
committed
Extend collection of Unicode combining characters to beyond the BMP
The former limit was perhaps a carryover from an older hand-coded table. Since commit bab9821 we have enough space in mbinterval to store larger codepoints, so collect all combining characters. Discussion: https://www.postgresql.org/message-id/49ad1fa0-174e-c901-b14c-c484b60907f1%40enterprisedb.com
1 parent bab9821 commit 5bc429a

File tree

2 files changed

+102
-2
lines changed

2 files changed

+102
-2
lines changed

src/common/unicode/generate-unicode_combining_table.pl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525
my @fields = split ';', $line;
2626
$codepoint = hex $fields[0];
2727

28-
next if $codepoint > 0xFFFF;
29-
3028
if ($fields[2] eq 'Me' || $fields[2] eq 'Mn')
3129
{
3230
# combining character, save for start of range

src/include/common/unicode_combining_table.h

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,4 +193,106 @@ static const struct mbinterval combining[] = {
193193
{0xFB1E, 0xFB1E},
194194
{0xFE00, 0xFE0F},
195195
{0xFE20, 0xFE2F},
196+
{0x101FD, 0x101FD},
197+
{0x102E0, 0x102E0},
198+
{0x10376, 0x1037A},
199+
{0x10A01, 0x10A0F},
200+
{0x10A38, 0x10A3F},
201+
{0x10AE5, 0x10AE6},
202+
{0x10D24, 0x10D27},
203+
{0x10EAB, 0x10EAC},
204+
{0x10F46, 0x10F50},
205+
{0x11001, 0x11001},
206+
{0x11038, 0x11046},
207+
{0x1107F, 0x11081},
208+
{0x110B3, 0x110B6},
209+
{0x110B9, 0x110BA},
210+
{0x11100, 0x11102},
211+
{0x11127, 0x1112B},
212+
{0x1112D, 0x11134},
213+
{0x11173, 0x11173},
214+
{0x11180, 0x11181},
215+
{0x111B6, 0x111BE},
216+
{0x111C9, 0x111CC},
217+
{0x111CF, 0x111CF},
218+
{0x1122F, 0x11231},
219+
{0x11234, 0x11234},
220+
{0x11236, 0x11237},
221+
{0x1123E, 0x1123E},
222+
{0x112DF, 0x112DF},
223+
{0x112E3, 0x112EA},
224+
{0x11300, 0x11301},
225+
{0x1133B, 0x1133C},
226+
{0x11340, 0x11340},
227+
{0x11366, 0x11374},
228+
{0x11438, 0x1143F},
229+
{0x11442, 0x11444},
230+
{0x11446, 0x11446},
231+
{0x1145E, 0x1145E},
232+
{0x114B3, 0x114B8},
233+
{0x114BA, 0x114BA},
234+
{0x114BF, 0x114C0},
235+
{0x114C2, 0x114C3},
236+
{0x115B2, 0x115B5},
237+
{0x115BC, 0x115BD},
238+
{0x115BF, 0x115C0},
239+
{0x115DC, 0x115DD},
240+
{0x11633, 0x1163A},
241+
{0x1163D, 0x1163D},
242+
{0x1163F, 0x11640},
243+
{0x116AB, 0x116AB},
244+
{0x116AD, 0x116AD},
245+
{0x116B0, 0x116B5},
246+
{0x116B7, 0x116B7},
247+
{0x1171D, 0x1171F},
248+
{0x11722, 0x11725},
249+
{0x11727, 0x1172B},
250+
{0x1182F, 0x11837},
251+
{0x11839, 0x1183A},
252+
{0x1193B, 0x1193C},
253+
{0x1193E, 0x1193E},
254+
{0x11943, 0x11943},
255+
{0x119D4, 0x119DB},
256+
{0x119E0, 0x119E0},
257+
{0x11A01, 0x11A0A},
258+
{0x11A33, 0x11A38},
259+
{0x11A3B, 0x11A3E},
260+
{0x11A47, 0x11A47},
261+
{0x11A51, 0x11A56},
262+
{0x11A59, 0x11A5B},
263+
{0x11A8A, 0x11A96},
264+
{0x11A98, 0x11A99},
265+
{0x11C30, 0x11C3D},
266+
{0x11C3F, 0x11C3F},
267+
{0x11C92, 0x11CA7},
268+
{0x11CAA, 0x11CB0},
269+
{0x11CB2, 0x11CB3},
270+
{0x11CB5, 0x11CB6},
271+
{0x11D31, 0x11D45},
272+
{0x11D47, 0x11D47},
273+
{0x11D90, 0x11D91},
274+
{0x11D95, 0x11D95},
275+
{0x11D97, 0x11D97},
276+
{0x11EF3, 0x11EF4},
277+
{0x16AF0, 0x16AF4},
278+
{0x16B30, 0x16B36},
279+
{0x16F4F, 0x16F4F},
280+
{0x16F8F, 0x16F92},
281+
{0x16FE4, 0x16FE4},
282+
{0x1BC9D, 0x1BC9E},
283+
{0x1D167, 0x1D169},
284+
{0x1D17B, 0x1D182},
285+
{0x1D185, 0x1D18B},
286+
{0x1D1AA, 0x1D1AD},
287+
{0x1D242, 0x1D244},
288+
{0x1DA00, 0x1DA36},
289+
{0x1DA3B, 0x1DA6C},
290+
{0x1DA75, 0x1DA75},
291+
{0x1DA84, 0x1DA84},
292+
{0x1DA9B, 0x1E02A},
293+
{0x1E130, 0x1E136},
294+
{0x1E2EC, 0x1E2EF},
295+
{0x1E8D0, 0x1E8D6},
296+
{0x1E944, 0x1E94A},
297+
{0xE0100, 0xE01EF},
196298
};

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy