Skip to content

Commit 719b342

Browse files
committed
Shrink Unicode category table.
Missing entries can implicitly be considered "unassigned". Discussion: https://postgr.es/m/ff4c2f2f9c8fc7ca27c1c24ae37ecaeaeaff6b53.camel@j-davis.com
1 parent d16a0c1 commit 719b342

File tree

3 files changed

+15
-723
lines changed

3 files changed

+15
-723
lines changed

src/common/unicode/generate-unicode_category_table.pl

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,21 @@
7272
# the current range, emit the current range and initialize a new
7373
# range representing the gap.
7474
if ($range_end + 1 != $code && $range_category ne $gap_category) {
75-
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
75+
if ($range_category ne $CATEGORY_UNASSIGNED) {
76+
push(@category_ranges, {start => $range_start, end => $range_end,
77+
category => $range_category});
78+
}
7679
$range_start = $range_end + 1;
7780
$range_end = $code - 1;
7881
$range_category = $gap_category;
7982
}
8083

8184
# different category; new range
8285
if ($range_category ne $category) {
83-
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
86+
if ($range_category ne $CATEGORY_UNASSIGNED) {
87+
push(@category_ranges, {start => $range_start, end => $range_end,
88+
category => $range_category});
89+
}
8490
$range_start = $code;
8591
$range_end = $code;
8692
$range_category = $category;
@@ -109,14 +115,9 @@
109115
if $gap_category ne $CATEGORY_UNASSIGNED;
110116

111117
# emit final range
112-
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
113-
114-
# emit range for any unassigned code points after last entry
115-
if ($range_end < 0x10FFFF) {
116-
$range_start = $range_end + 1;
117-
$range_end = 0x10FFFF;
118-
$range_category = $CATEGORY_UNASSIGNED;
119-
push(@category_ranges, {start => $range_start, end => $range_end, category => $range_category});
118+
if ($range_category ne $CATEGORY_UNASSIGNED) {
119+
push(@category_ranges, {start => $range_start, end => $range_end,
120+
category => $range_category});
120121
}
121122

122123
my $num_ranges = scalar @category_ranges;

src/common/unicode_category.c

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,7 @@ unicode_category(pg_wchar ucs)
2828
int mid;
2929
int max = lengthof(unicode_categories) - 1;
3030

31-
Assert(ucs >= unicode_categories[0].first &&
32-
ucs <= unicode_categories[max].last);
31+
Assert(ucs <= 0x10ffff);
3332

3433
while (max >= min)
3534
{
@@ -42,8 +41,7 @@ unicode_category(pg_wchar ucs)
4241
return unicode_categories[mid].category;
4342
}
4443

45-
Assert(false);
46-
return (pg_unicode_category) - 1;
44+
return PG_U_UNASSIGNED;
4745
}
4846

4947
/*

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy