Skip to content

Commit b80d8f1

Browse files
committed
Move into its own module, apply fixes
1 parent e5da0e4 commit b80d8f1

File tree

3 files changed

+76
-45
lines changed

3 files changed

+76
-45
lines changed

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ pub use tables::UNICODE_VERSION;
5959

6060
pub mod mixed_script;
6161
pub mod general_security_profile;
62+
pub mod restriction_level;
6263

6364
pub use mixed_script::MixedScript;
6465
pub use general_security_profile::GeneralSecurityProfile;

src/mixed_script.rs

Lines changed: 0 additions & 45 deletions
Original file line numberDiff line numberDiff line change
@@ -107,16 +107,6 @@ impl AugmentedScriptSet {
107107
}
108108
}
109109

110-
#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)]
111-
pub enum RestrictionLevel {
112-
ASCIIOnly,
113-
SingleScript,
114-
HighlyRestrictive,
115-
ModeratelyRestrictive,
116-
MinimallyRestrictive,
117-
Unrestricted,
118-
}
119-
120110
/// Extension trait for [mixed-script detection](https://www.unicode.org/reports/tr39/#Mixed_Script_Detection)
121111
pub trait MixedScript {
122112
/// Check if a string is [single-script](https://www.unicode.org/reports/tr39/#def-single-script)
@@ -126,9 +116,6 @@ pub trait MixedScript {
126116

127117
/// Find the [resolved script set](https://www.unicode.org/reports/tr39/#def-resolved-script-set) of a given string
128118
fn resolve_script_set(self) -> AugmentedScriptSet;
129-
130-
/// Detect the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection) of a given string
131-
fn detect_restriction_level(self) -> RestrictionLevel;
132119
}
133120

134121
impl MixedScript for &'_ str {
@@ -139,36 +126,4 @@ impl MixedScript for &'_ str {
139126
fn resolve_script_set(self) -> AugmentedScriptSet {
140127
self.into()
141128
}
142-
143-
fn detect_restriction_level(self) -> RestrictionLevel {
144-
use crate::GeneralSecurityProfile;
145-
let mut ascii_only = true;
146-
let mut set = AugmentedScriptSet::default();
147-
let mut exclude_latin_set = AugmentedScriptSet::default();
148-
for ch in self.chars() {
149-
if !GeneralSecurityProfile::identifier_allowed(ch) {
150-
return RestrictionLevel::Unrestricted;
151-
}
152-
if ch as u32 > 0x7F {
153-
ascii_only = false;
154-
}
155-
let ch_set = ch.into();
156-
set = set.intersect(ch_set);
157-
if !ch_set.base.contains_script(Script::Latin) {
158-
exclude_latin_set.intersect(ch_set);
159-
}
160-
}
161-
if ascii_only {
162-
return RestrictionLevel::ASCIIOnly;
163-
} else if !set.is_empty() {
164-
return RestrictionLevel::SingleScript;
165-
} else if exclude_latin_set.kore || exclude_latin_set.hanb || exclude_latin_set.jpan {
166-
return RestrictionLevel::HighlyRestrictive;
167-
} else if let ScriptExtension::Single(script) = exclude_latin_set.base {
168-
if script.is_recommended() && script != Script::Cyrillic && script != Script::Greek {
169-
return RestrictionLevel::ModeratelyRestrictive;
170-
}
171-
}
172-
return RestrictionLevel::MinimallyRestrictive;
173-
}
174129
}

src/restriction_level.rs

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
//! For detecting the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
2+
//! a string conforms to
3+
4+
use crate::mixed_script::AugmentedScriptSet;
5+
use unicode_script::{Script, ScriptExtension};
6+
use crate::GeneralSecurityProfile;
7+
8+
#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)]
9+
/// The [Restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
10+
/// a string conforms to
11+
pub enum RestrictionLevel {
12+
/// https://www.unicode.org/reports/tr39/#ascii_only
13+
ASCIIOnly,
14+
/// https://www.unicode.org/reports/tr39/#single_script
15+
SingleScript,
16+
/// https://www.unicode.org/reports/tr39/#highly_restrictive
17+
HighlyRestrictive,
18+
/// https://www.unicode.org/reports/tr39/#moderately_restrictive
19+
ModeratelyRestrictive,
20+
/// https://www.unicode.org/reports/tr39/#minimally_restrictive
21+
MinimallyRestrictive,
22+
/// https://www.unicode.org/reports/tr39/#unrestricted
23+
Unrestricted,
24+
}
25+
26+
/// Utilities for determining which [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
27+
/// a string satisfies
28+
pub trait RestrictionLevelDetection: Sized {
29+
/// Detect the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
30+
///
31+
/// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness
32+
fn detect_restriction_level(self) -> RestrictionLevel;
33+
34+
35+
/// Check if a string satisfies the supplied [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection)
36+
///
37+
/// This will _not_ check identifier well-formedness, as different applications may have different notions of well-formedness
38+
fn check_restriction_level(self, level: RestrictionLevel) -> bool {
39+
self.detect_restriction_level() <= level
40+
}
41+
}
42+
43+
impl RestrictionLevelDetection for &'_ str {
44+
fn detect_restriction_level(self) -> RestrictionLevel {
45+
let mut ascii_only = true;
46+
let mut set = AugmentedScriptSet::default();
47+
let mut exclude_latin_set = AugmentedScriptSet::default();
48+
for ch in self.chars() {
49+
if !GeneralSecurityProfile::identifier_allowed(ch) {
50+
return RestrictionLevel::Unrestricted;
51+
}
52+
if ch.is_ascii() {
53+
ascii_only = false;
54+
}
55+
let ch_set = ch.into();
56+
set.intersect_with(ch_set);
57+
if !ch_set.base.contains_script(Script::Latin) {
58+
exclude_latin_set.intersect_with(ch_set);
59+
}
60+
}
61+
62+
if ascii_only {
63+
return RestrictionLevel::ASCIIOnly;
64+
} else if !set.is_empty() {
65+
return RestrictionLevel::SingleScript;
66+
} else if exclude_latin_set.kore || exclude_latin_set.hanb || exclude_latin_set.jpan {
67+
return RestrictionLevel::HighlyRestrictive;
68+
} else if let ScriptExtension::Single(script) = exclude_latin_set.base {
69+
if script.is_recommended() && script != Script::Cyrillic && script != Script::Greek {
70+
return RestrictionLevel::ModeratelyRestrictive;
71+
}
72+
}
73+
return RestrictionLevel::MinimallyRestrictive;
74+
}
75+
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy