Skip to content

Commit 2dbe07f

Browse files
Don't handle noncharacters differently than other unassigned codepoints
1 parent 0b13808 commit 2dbe07f

File tree

5 files changed

+5
-32
lines changed

5 files changed

+5
-32
lines changed

scripts/unicode.py

Lines changed: 1 addition & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -106,7 +106,7 @@ def _load_unicode_data(self):
106106

107107
# Characters that cannot be part of a combining character sequence:
108108
# control characters, format characters other than ZWJ and ZWNJ,
109-
# the line and paragraph separators, and noncharacters.
109+
# and the line and paragraph separators.
110110
self.not_in_ccs = []
111111

112112
assigned_start = 0;
@@ -147,14 +147,6 @@ def _load_unicode_data(self):
147147

148148
self.general_category_public_assigned.append((assigned_start, prev_char_int))
149149

150-
# Mark noncharacters as nongraphic
151-
for i in range(0xFDD0, 0xFDF0):
152-
self.not_in_ccs.append(i)
153-
for prefix in range(0, 0x11):
154-
shifted = prefix << 16
155-
self.not_in_ccs.append(shifted | 0xFFFE)
156-
self.not_in_ccs.append(shifted | 0xFFFF)
157-
158150
self.not_in_ccs.sort()
159151

160152
def _load_default_ignorable_marks(self):

src/correct_ccs.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,7 @@ impl CcsKind {
4141
/// [defective combining character sequences](https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#I6.1.36487)
4242
/// by inserting U+00A0 NO-BREAK SPACE in front of them.
4343
///
44-
/// For the purposes of this iterator, private use characters,
45-
/// as well as unassigned codepoints other than noncharacters,
44+
/// For the purposes of this iterator, private use characters and unassigned codepoints
4645
/// are considered valid base characters,
4746
/// so combining character sequences that follow such will not be modified.
4847
///

src/lib.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ pub trait UnicodeNormalization<I: Iterator<Item = char>> {
139139
/// with the correct advance width,
140140
/// in diverse contexts (for example, when printed to a terminal).
141141
///
142-
/// Sequences following a private use character or an unassigned codepoint that is not a noncharacter
142+
/// Sequences following a private use character or an unassigned codepoint
143143
/// are not corrected. Additionally, combining character sequences consisting entirely of
144144
/// [default-ignorable code points](https://www.unicode.org/versions/Unicode15.0.0/UnicodeStandard-15.0.pdf#I8.1.40715)
145145
/// are also left untouched. Handling this last case may require the iterator

src/tables.rs

Lines changed: 1 addition & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -20973,33 +20973,15 @@ pub fn not_in_ccs(c: char) -> bool {
2097320973
| '\u{2028}'..='\u{202E}'
2097420974
| '\u{2060}'..='\u{2064}'
2097520975
| '\u{2066}'..='\u{206F}'
20976-
| '\u{FDD0}'..='\u{FDEF}'
2097720976
| '\u{FEFF}'
2097820977
| '\u{FFF9}'..='\u{FFFB}'
20979-
| '\u{FFFE}'..='\u{FFFF}'
2098020978
| '\u{110BD}'
2098120979
| '\u{110CD}'
2098220980
| '\u{13430}'..='\u{1343F}'
2098320981
| '\u{1BCA0}'..='\u{1BCA3}'
2098420982
| '\u{1D173}'..='\u{1D17A}'
20985-
| '\u{1FFFE}'..='\u{1FFFF}'
20986-
| '\u{2FFFE}'..='\u{2FFFF}'
20987-
| '\u{3FFFE}'..='\u{3FFFF}'
20988-
| '\u{4FFFE}'..='\u{4FFFF}'
20989-
| '\u{5FFFE}'..='\u{5FFFF}'
20990-
| '\u{6FFFE}'..='\u{6FFFF}'
20991-
| '\u{7FFFE}'..='\u{7FFFF}'
20992-
| '\u{8FFFE}'..='\u{8FFFF}'
20993-
| '\u{9FFFE}'..='\u{9FFFF}'
20994-
| '\u{AFFFE}'..='\u{AFFFF}'
20995-
| '\u{BFFFE}'..='\u{BFFFF}'
20996-
| '\u{CFFFE}'..='\u{CFFFF}'
20997-
| '\u{DFFFE}'..='\u{DFFFF}'
2099820983
| '\u{E0001}'
20999-
| '\u{E0020}'..='\u{E007F}'
21000-
| '\u{EFFFE}'..='\u{EFFFF}'
21001-
| '\u{FFFFE}'..='\u{FFFFF}'
21002-
| '\u{10FFFE}'..='\u{10FFFF}' => true,
20984+
| '\u{E0020}'..='\u{E007F}' => true,
2100320985
_ => false,
2100420986
}
2100520987
}

tests/correct_defective_ccs.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ fn defective_css() {
1818
check_ccs!("\u{200C}\u{0301}bcde", "\u{00A0}\u{200C}\u{0301}bcde");
1919
check_ccs!("\u{200C}bcde", "\u{200C}bcde");
2020
check_ccs!("\u{180F}bcde", "\u{180F}bcde");
21-
check_ccs!("\u{FFFF}\u{0301}bcde", "\u{FFFF}\u{00A0}\u{0301}bcde");
21+
check_ccs!("\u{FFFF}\u{0301}bcde", "\u{FFFF}\u{0301}bcde");
2222
check_ccs!("\u{10FFFD}\u{0301}bcde", "\u{10FFFD}\u{0301}bcde");
2323
check_ccs!("\u{180F}\u{180F}\u{180F}", "\u{180F}\u{180F}\u{180F}");
2424
check_ccs!("\u{180F}\u{180F}\u{180F}a", "\u{180F}\u{180F}\u{180F}a");

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy