Skip to content

Commit e96ec2e

Browse files
Use stdlib alphabetic and numeric character tables
…as long as the Unicode versions match
1 parent 592ce00 commit e96ec2e

File tree

2 files changed

+38
-16
lines changed

2 files changed

+38
-16
lines changed

scripts/unicode.py

Lines changed: 20 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -232,19 +232,27 @@ def emit_util_mod(f):
232232
233233
#[inline]
234234
fn is_alphabetic(c: char) -> bool {
235-
match c {
236-
'a' ..= 'z' | 'A' ..= 'Z' => true,
237-
c if c > '\x7f' => super::derived_property::Alphabetic(c),
238-
_ => false,
235+
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
236+
c.is_alphabetic()
237+
} else {
238+
match c {
239+
'a' ..= 'z' | 'A' ..= 'Z' => true,
240+
c if c > '\\x7f' => super::derived_property::Alphabetic(c),
241+
_ => false,
242+
}
239243
}
240244
}
241245
242246
#[inline]
243247
fn is_numeric(c: char) -> bool {
244-
match c {
245-
'0' ..= '9' => true,
246-
c if c > '\x7f' => super::general_category::N(c),
247-
_ => false,
248+
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
249+
c.is_numeric()
250+
} else {
251+
match c {
252+
'0' ..= '9' => true,
253+
c if c > '\\x7f' => super::general_category::N(c),
254+
_ => false,
255+
}
248256
}
249257
}
250258
@@ -388,6 +396,10 @@ def emit_break_module(f, break_table, break_cats, name):
388396
/// The version of [Unicode](http://www.unicode.org/)
389397
/// that this version of unicode-segmentation is based on.
390398
pub const UNICODE_VERSION: (u64, u64, u64) = (%s, %s, %s);
399+
""" % UNICODE_VERSION)
400+
401+
rf.write("""
402+
const UNICODE_VERSION_U8: (u8, u8, u8) = (%s, %s, %s);
391403
""" % UNICODE_VERSION)
392404

393405
# download and parse all the data

src/tables.rs

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
/// that this version of unicode-segmentation is based on.
1717
pub const UNICODE_VERSION: (u64, u64, u64) = (15, 1, 0);
1818

19+
const UNICODE_VERSION_U8: (u8, u8, u8) = (15, 1, 0);
20+
1921
pub mod util {
2022
#[inline]
2123
pub fn bsearch_range_table(c: char, r: &[(char,char)]) -> bool {
@@ -29,19 +31,27 @@ pub mod util {
2931

3032
#[inline]
3133
fn is_alphabetic(c: char) -> bool {
32-
match c {
33-
'a' ..= 'z' | 'A' ..= 'Z' => true,
34-
c if c > '' => super::derived_property::Alphabetic(c),
35-
_ => false,
34+
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
35+
c.is_alphabetic()
36+
} else {
37+
match c {
38+
'a' ..= 'z' | 'A' ..= 'Z' => true,
39+
c if c > '\x7f' => super::derived_property::Alphabetic(c),
40+
_ => false,
41+
}
3642
}
3743
}
3844

3945
#[inline]
4046
fn is_numeric(c: char) -> bool {
41-
match c {
42-
'0' ..= '9' => true,
43-
c if c > '' => super::general_category::N(c),
44-
_ => false,
47+
if super::UNICODE_VERSION_U8 == char::UNICODE_VERSION {
48+
c.is_numeric()
49+
} else {
50+
match c {
51+
'0' ..= '9' => true,
52+
c if c > '\x7f' => super::general_category::N(c),
53+
_ => false,
54+
}
4555
}
4656
}
4757

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy