Skip to content

Commit e5da0e4

Browse files
crlf0710Manishearth
authored andcommitted
Implement detect_restriction_level().
1 parent 2299150 commit e5da0e4

File tree

1 file changed

+46
-0
lines changed

1 file changed

+46
-0
lines changed

src/mixed_script.rs

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use unicode_script::{Script, ScriptExtension};
55
/// An Augmented script set, as defined by UTS 39
66
///
77
/// https://www.unicode.org/reports/tr39/#def-augmented-script-set
8+
#[derive(Copy, Clone, PartialEq, Debug, Hash)]
89
pub struct AugmentedScriptSet {
910
/// The base ScriptExtension value
1011
pub base: ScriptExtension,
@@ -106,6 +107,16 @@ impl AugmentedScriptSet {
106107
}
107108
}
108109

110+
#[derive(Copy, Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)]
111+
pub enum RestrictionLevel {
112+
ASCIIOnly,
113+
SingleScript,
114+
HighlyRestrictive,
115+
ModeratelyRestrictive,
116+
MinimallyRestrictive,
117+
Unrestricted,
118+
}
119+
109120
/// Extension trait for [mixed-script detection](https://www.unicode.org/reports/tr39/#Mixed_Script_Detection)
110121
pub trait MixedScript {
111122
/// Check if a string is [single-script](https://www.unicode.org/reports/tr39/#def-single-script)
@@ -115,6 +126,9 @@ pub trait MixedScript {
115126

116127
/// Find the [resolved script set](https://www.unicode.org/reports/tr39/#def-resolved-script-set) of a given string
117128
fn resolve_script_set(self) -> AugmentedScriptSet;
129+
130+
/// Detect the [restriction level](https://www.unicode.org/reports/tr39/#Restriction_Level_Detection) of a given string
131+
fn detect_restriction_level(self) -> RestrictionLevel;
118132
}
119133

120134
impl MixedScript for &'_ str {
@@ -125,4 +139,36 @@ impl MixedScript for &'_ str {
125139
fn resolve_script_set(self) -> AugmentedScriptSet {
126140
self.into()
127141
}
142+
143+
fn detect_restriction_level(self) -> RestrictionLevel {
144+
use crate::GeneralSecurityProfile;
145+
let mut ascii_only = true;
146+
let mut set = AugmentedScriptSet::default();
147+
let mut exclude_latin_set = AugmentedScriptSet::default();
148+
for ch in self.chars() {
149+
if !GeneralSecurityProfile::identifier_allowed(ch) {
150+
return RestrictionLevel::Unrestricted;
151+
}
152+
if ch as u32 > 0x7F {
153+
ascii_only = false;
154+
}
155+
let ch_set = ch.into();
156+
set = set.intersect(ch_set);
157+
if !ch_set.base.contains_script(Script::Latin) {
158+
exclude_latin_set.intersect(ch_set);
159+
}
160+
}
161+
if ascii_only {
162+
return RestrictionLevel::ASCIIOnly;
163+
} else if !set.is_empty() {
164+
return RestrictionLevel::SingleScript;
165+
} else if exclude_latin_set.kore || exclude_latin_set.hanb || exclude_latin_set.jpan {
166+
return RestrictionLevel::HighlyRestrictive;
167+
} else if let ScriptExtension::Single(script) = exclude_latin_set.base {
168+
if script.is_recommended() && script != Script::Cyrillic && script != Script::Greek {
169+
return RestrictionLevel::ModeratelyRestrictive;
170+
}
171+
}
172+
return RestrictionLevel::MinimallyRestrictive;
173+
}
128174
}

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy