From d99f11a0e7d4a5429408a7ac7090d168a10598f4 Mon Sep 17 00:00:00 2001 From: Igor Aleksanov Date: Sat, 26 Jun 2021 20:36:14 +0300 Subject: [PATCH 1/4] Add Script::from_full_name and Script::from_short_name --- scripts/unicode.py | 73 ++++++- src/lib.rs | 18 +- src/tables.rs | 494 +++++++++++++++++++++++++++++++++++++-------- 3 files changed, 500 insertions(+), 85 deletions(-) diff --git a/scripts/unicode.py b/scripts/unicode.py index 2676164..57e7d5a 100644 --- a/scripts/unicode.py +++ b/scripts/unicode.py @@ -34,7 +34,16 @@ #![allow(missing_docs, non_upper_case_globals, non_snake_case)] -use super::ScriptExtension; +pub use tables_impl::*; + +#[rustfmt::skip] +mod tables_impl { +use crate::ScriptExtension; +''' + +# Close `mod impl {` +ending=''' +} ''' UNICODE_VERSION = (13, 0, 0) @@ -239,7 +248,21 @@ def emit_enums(f, script_list, extension_list, longforms): f.write(" /// %s\n pub const %s: ScriptExtension = %s;\n" % (longform, name, expr)) f.write("""} -impl Script { +""") + + # Generate implementation for the `Script` + generate_script_impl(f) + + +def generate_script_impl(f): + """Generates an `impl Script { ... }` section with all the required functions""" + + # Open `impl Script` section. + f.write("""impl Script { +""") + + # Generate impl of `inner_full_name`. + f.write(""" #[inline] pub(crate) fn inner_full_name(self) -> &'static str { match self { @@ -251,7 +274,26 @@ def emit_enums(f, script_list, extension_list, longforms): f.write(" Script::%s => \"%s\",\n" % (longforms[script], longforms[script])) f.write(""" } } +""") + # Generate impl of `inner_from_full_name`. + f.write(""" + #[inline] + pub(crate) fn inner_from_full_name(input: &str) -> Option { + match input { + "Unknown" => Some(Script::Unknown), + "Common" => Some(Script::Common), + "Inherited" => Some(Script::Inherited), +""") + for script in script_list: + f.write(" \"%s\" => Some(Script::%s),\n" % (longforms[script], longforms[script])) + f.write(" _ => None,\n" ) + f.write(""" } + } +""") + + # Generate impl of `inner_short_name` + f.write(""" #[inline] pub(crate) fn inner_short_name(self) -> &'static str { match self { @@ -263,7 +305,26 @@ def emit_enums(f, script_list, extension_list, longforms): f.write(" Script::%s => \"%s\",\n" % (longforms[script], script)) f.write(""" } } +""") + # Generate impl of `inner_from_short_name` + f.write(""" + #[inline] + pub(crate) fn inner_from_short_name(input: &str) -> Option { + match input { + "" => Some(Script::Unknown), + "Zyyy" => Some(Script::Common), + "Zinh" => Some(Script::Inherited), +""") + for script in script_list: + f.write(" \"%s\" => Some(Script::%s),\n" % (script, longforms[script])) + f.write(""" _ => None,\n""") + f.write(""" } + } +""") + + # Generate impl of `for_integer` + f.write(""" #[inline] pub(crate) fn for_integer(value: u8) -> Self { match value { @@ -273,6 +334,10 @@ def emit_enums(f, script_list, extension_list, longforms): f.write(""" _ => unreachable!(), } } +""") + + # Close `impl Script` section + f.write(""" } """) @@ -281,8 +346,6 @@ def extension_name(ext): return "script_extensions::%s" % "_".join([e.upper() for e in ext]) - - if __name__ == "__main__": r = "tables.rs" if os.path.exists(r): @@ -336,3 +399,5 @@ def extension_name(ext): is_pub=False , pfun=lambda x: "(%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), extension_name(x[2]))) # emit_table(rf, "FOObar", properties) + + rf.write(ending) \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs index 3b68a55..f5ce4c1 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,16 +15,30 @@ use tables::{get_script, get_script_extension, NEXT_SCRIPT}; pub use tables::{Script, UNICODE_VERSION}; impl Script { - /// Get the full name of a script + /// Get the full name of a script. pub fn full_name(self) -> &'static str { self.inner_full_name() } - /// Get the four-character short name of a script + /// Attempts to parse script name from the provided string. + /// Returns `None` if the provided string does not represent a valid + /// script full name. + pub fn from_full_name(input: &str) -> Option { + Self::inner_from_full_name(input) + } + + /// Get the four-character short name of a script. pub fn short_name(self) -> &'static str { self.inner_short_name() } + /// Attempts to parse script name from the provided string. + /// Returns `None` if the provided string does not represent a valid + /// script four-character short name. + pub fn from_short_name(input: &str) -> Option { + Self::inner_from_short_name(input) + } + /// Is this script "Recommended" according to /// [UAX #31](www.unicode.org/reports/tr31/#Table_Recommended_Scripts)? pub fn is_recommended(self) -> bool { diff --git a/src/tables.rs b/src/tables.rs index c7cf53c..665a310 100644 --- a/src/tables.rs +++ b/src/tables.rs @@ -12,7 +12,11 @@ #![allow(missing_docs, non_upper_case_globals, non_snake_case)] -use super::ScriptExtension; +pub use tables_impl::*; + +#[rustfmt::skip] +mod tables_impl { +use crate::ScriptExtension; /// The version of [Unicode](http://www.unicode.org/) /// that this version of unicode-script is based on. @@ -950,109 +954,110 @@ pub mod script_extensions { pub const ZANABAZAR_SQUARE: ScriptExtension = ScriptExtension::new(0, 0, 0x2000000); /// Zanabazar_Square pub const ZANB: ScriptExtension = ZANABAZAR_SQUARE; - /// Arabic, Coptic - pub const ARAB_COPT: ScriptExtension = ARAB.union(COPT); - /// Arabic, Hanifi_Rohingya - pub const ARAB_ROHG: ScriptExtension = ARAB.union(ROHG); - /// Arabic, Syriac - pub const ARAB_SYRC: ScriptExtension = ARAB.union(SYRC); + /// Devanagari, Grantha + pub const DEVA_GRAN: ScriptExtension = DEVA.union(GRAN); + /// Cypriot, Linear_A, Linear_B + pub const CPRT_LINA_LINB: ScriptExtension = CPRT.union(LINA).union(LINB); /// Arabic, Thaana pub const ARAB_THAA: ScriptExtension = ARAB.union(THAA); - /// Bengali, Devanagari - pub const BENG_DEVA: ScriptExtension = BENG.union(DEVA); - /// Bopomofo, Han - pub const BOPO_HANI: ScriptExtension = BOPO.union(HANI); - /// Buginese, Javanese - pub const BUGI_JAVA: ScriptExtension = BUGI.union(JAVA); - /// Cypriot, Linear_B - pub const CPRT_LINB: ScriptExtension = CPRT.union(LINB); - /// Cyrillic, Glagolitic - pub const CYRL_GLAG: ScriptExtension = CYRL.union(GLAG); + /// Han, Hiragana, Katakana + pub const HANI_HIRA_KANA: ScriptExtension = HANI.union(HIRA).union(KANA); + /// Bengali, Devanagari, Dogra, Gunjala_Gondi, Masaram_Gondi, Grantha, Gujarati, Gurmukhi, Kannada, Mahajani, Malayalam, Nandinagari, Oriya, Khudawadi, Sinhala, Syloti_Nagri, Takri, Tamil, Telugu, Tirhuta + pub const BENG_DEVA_DOGR_GONG_GONM_GRAN_GUJR_GURU_KNDA_MAHJ_MLYM_NAND_ORYA_SIND_SINH_SYLO_TAKR_TAML_TELU_TIRH: ScriptExtension = BENG.union(DEVA).union(DOGR).union(GONG).union(GONM).union(GRAN).union(GUJR).union(GURU).union(KNDA).union(MAHJ).union(MLYM).union(NAND).union(ORYA).union(SIND).union(SINH).union(SYLO).union(TAKR).union(TAML).union(TELU).union(TIRH); + /// Bengali, Devanagari, Grantha, Gujarati, Gurmukhi, Kannada, Latin, Malayalam, Oriya, Sharada, Tamil, Telugu, Tirhuta + pub const BENG_DEVA_GRAN_GUJR_GURU_KNDA_LATN_MLYM_ORYA_SHRD_TAML_TELU_TIRH: ScriptExtension = BENG.union(DEVA).union(GRAN).union(GUJR).union(GURU).union(KNDA).union(LATN).union(MLYM).union(ORYA).union(SHRD).union(TAML).union(TELU).union(TIRH); /// Cyrillic, Latin pub const CYRL_LATN: ScriptExtension = CYRL.union(LATN); - /// Cyrillic, Old_Permic - pub const CYRL_PERM: ScriptExtension = CYRL.union(PERM); - /// Cyrillic, Syriac - pub const CYRL_SYRC: ScriptExtension = CYRL.union(SYRC); - /// Devanagari, Grantha - pub const DEVA_GRAN: ScriptExtension = DEVA.union(GRAN); - /// Devanagari, Nandinagari - pub const DEVA_NAND: ScriptExtension = DEVA.union(NAND); - /// Devanagari, Sharada - pub const DEVA_SHRD: ScriptExtension = DEVA.union(SHRD); - /// Devanagari, Tamil - pub const DEVA_TAML: ScriptExtension = DEVA.union(TAML); + /// Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kannada, Kaithi, Mahajani, Malayalam, Modi, Nandinagari, Khudawadi, Takri, Tirhuta + pub const DEVA_DOGR_GUJR_GURU_KHOJ_KNDA_KTHI_MAHJ_MLYM_MODI_NAND_SIND_TAKR_TIRH: ScriptExtension = DEVA.union(DOGR).union(GUJR).union(GURU).union(KHOJ).union(KNDA).union(KTHI).union(MAHJ).union(MLYM).union(MODI).union(NAND).union(SIND).union(TAKR).union(TIRH); + /// Arabic, Hanifi_Rohingya + pub const ARAB_ROHG: ScriptExtension = ARAB.union(ROHG); + /// Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kaithi, Mahajani, Modi, Khudawadi, Takri, Tirhuta + pub const DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH: ScriptExtension = DEVA.union(DOGR).union(GUJR).union(GURU).union(KHOJ).union(KTHI).union(MAHJ).union(MODI).union(SIND).union(TAKR).union(TIRH); + /// Buginese, Javanese + pub const BUGI_JAVA: ScriptExtension = BUGI.union(JAVA); + /// Arabic, Hanifi_Rohingya, Syriac, Thaana, Yezidi + pub const ARAB_ROHG_SYRC_THAA_YEZI: ScriptExtension = ARAB.union(ROHG).union(SYRC).union(THAA).union(YEZI); /// Georgian, Latin pub const GEOR_LATN: ScriptExtension = GEOR.union(LATN); - /// Grantha, Tamil - pub const GRAN_TAML: ScriptExtension = GRAN.union(TAML); - /// Gujarati, Khojki - pub const GUJR_KHOJ: ScriptExtension = GUJR.union(KHOJ); - /// Gurmukhi, Multani - pub const GURU_MULT: ScriptExtension = GURU.union(MULT); + /// Bengali, Devanagari, Dogra, Gunjala_Gondi, Masaram_Gondi, Grantha, Gujarati, Gurmukhi, Kannada, Limbu, Mahajani, Malayalam, Nandinagari, Oriya, Khudawadi, Sinhala, Syloti_Nagri, Takri, Tamil, Telugu, Tirhuta + pub const BENG_DEVA_DOGR_GONG_GONM_GRAN_GUJR_GURU_KNDA_LIMB_MAHJ_MLYM_NAND_ORYA_SIND_SINH_SYLO_TAKR_TAML_TELU_TIRH: ScriptExtension = BENG.union(DEVA).union(DOGR).union(GONG).union(GONM).union(GRAN).union(GUJR).union(GURU).union(KNDA).union(LIMB).union(MAHJ).union(MLYM).union(NAND).union(ORYA).union(SIND).union(SINH).union(SYLO).union(TAKR).union(TAML).union(TELU).union(TIRH); + /// Devanagari, Dogra, Kaithi, Mahajani + pub const DEVA_DOGR_KTHI_MAHJ: ScriptExtension = DEVA.union(DOGR).union(KTHI).union(MAHJ); + /// Adlam, Arabic, Mandaic, Manichaean, Psalter_Pahlavi, Hanifi_Rohingya, Sogdian, Syriac + pub const ADLM_ARAB_MAND_MANI_PHLP_ROHG_SOGD_SYRC: ScriptExtension = ADLM.union(ARAB).union(MAND).union(MANI).union(PHLP).union(ROHG).union(SOGD).union(SYRC); + /// Kayah_Li, Latin, Myanmar + pub const KALI_LATN_MYMR: ScriptExtension = KALI.union(LATN).union(MYMR); + /// Bengali, Devanagari, Grantha, Kannada + pub const BENG_DEVA_GRAN_KNDA: ScriptExtension = BENG.union(DEVA).union(GRAN).union(KNDA); /// Han, Latin pub const HANI_LATN: ScriptExtension = HANI.union(LATN); - /// Hiragana, Katakana - pub const HIRA_KANA: ScriptExtension = HIRA.union(KANA); - /// Kannada, Nandinagari - pub const KNDA_NAND: ScriptExtension = KNDA.union(NAND); - /// Latin, Mongolian - pub const LATN_MONG: ScriptExtension = LATN.union(MONG); - /// Mongolian, Phags_Pa - pub const MONG_PHAG: ScriptExtension = MONG.union(PHAG); + /// Devanagari, Tamil + pub const DEVA_TAML: ScriptExtension = DEVA.union(TAML); + /// Cyrillic, Syriac + pub const CYRL_SYRC: ScriptExtension = CYRL.union(SYRC); + /// Bopomofo, Hangul, Han, Hiragana, Katakana + pub const BOPO_HANG_HANI_HIRA_KANA: ScriptExtension = BOPO.union(HANG).union(HANI).union(HIRA).union(KANA); /// Arabic, Syriac, Thaana pub const ARAB_SYRC_THAA: ScriptExtension = ARAB.union(SYRC).union(THAA); - /// Arabic, Thaana, Yezidi - pub const ARAB_THAA_YEZI: ScriptExtension = ARAB.union(THAA).union(YEZI); - /// Bengali, Chakma, Syloti_Nagri - pub const BENG_CAKM_SYLO: ScriptExtension = BENG.union(CAKM).union(SYLO); - /// Chakma, Myanmar, Tai_Le - pub const CAKM_MYMR_TALE: ScriptExtension = CAKM.union(MYMR).union(TALE); - /// Cypriot, Linear_A, Linear_B - pub const CPRT_LINA_LINB: ScriptExtension = CPRT.union(LINA).union(LINB); /// Devanagari, Grantha, Kannada pub const DEVA_GRAN_KNDA: ScriptExtension = DEVA.union(GRAN).union(KNDA); + /// Bengali, Chakma, Syloti_Nagri + pub const BENG_CAKM_SYLO: ScriptExtension = BENG.union(CAKM).union(SYLO); + /// Kannada, Nandinagari + pub const KNDA_NAND: ScriptExtension = KNDA.union(NAND); + /// Gurmukhi, Multani + pub const GURU_MULT: ScriptExtension = GURU.union(MULT); + /// Devanagari, Kannada, Malayalam, Oriya, Tamil, Telugu + pub const DEVA_KNDA_MLYM_ORYA_TAML_TELU: ScriptExtension = DEVA.union(KNDA).union(MLYM).union(ORYA).union(TAML).union(TELU); + /// Gujarati, Khojki + pub const GUJR_KHOJ: ScriptExtension = GUJR.union(KHOJ); + /// Cyrillic, Old_Permic + pub const CYRL_PERM: ScriptExtension = CYRL.union(PERM); + /// Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kannada, Kaithi, Mahajani, Modi, Nandinagari, Khudawadi, Takri, Tirhuta + pub const DEVA_DOGR_GUJR_GURU_KHOJ_KNDA_KTHI_MAHJ_MODI_NAND_SIND_TAKR_TIRH: ScriptExtension = DEVA.union(DOGR).union(GUJR).union(GURU).union(KHOJ).union(KNDA).union(KTHI).union(MAHJ).union(MODI).union(NAND).union(SIND).union(TAKR).union(TIRH); + /// Hiragana, Katakana + pub const HIRA_KANA: ScriptExtension = HIRA.union(KANA); + /// Devanagari, Nandinagari + pub const DEVA_NAND: ScriptExtension = DEVA.union(NAND); + /// Cypriot, Linear_B + pub const CPRT_LINB: ScriptExtension = CPRT.union(LINB); /// Devanagari, Grantha, Latin pub const DEVA_GRAN_LATN: ScriptExtension = DEVA.union(GRAN).union(LATN); - /// Han, Hiragana, Katakana - pub const HANI_HIRA_KANA: ScriptExtension = HANI.union(HIRA).union(KANA); - /// Kayah_Li, Latin, Myanmar - pub const KALI_LATN_MYMR: ScriptExtension = KALI.union(LATN).union(MYMR); - /// Bengali, Devanagari, Grantha, Kannada - pub const BENG_DEVA_GRAN_KNDA: ScriptExtension = BENG.union(DEVA).union(GRAN).union(KNDA); /// Buhid, Hanunoo, Tagbanwa, Tagalog pub const BUHD_HANO_TAGB_TGLG: ScriptExtension = BUHD.union(HANO).union(TAGB).union(TGLG); - /// Devanagari, Dogra, Kaithi, Mahajani - pub const DEVA_DOGR_KTHI_MAHJ: ScriptExtension = DEVA.union(DOGR).union(KTHI).union(MAHJ); - /// Arabic, Hanifi_Rohingya, Syriac, Thaana, Yezidi - pub const ARAB_ROHG_SYRC_THAA_YEZI: ScriptExtension = ARAB.union(ROHG).union(SYRC).union(THAA).union(YEZI); - /// Bopomofo, Hangul, Han, Hiragana, Katakana - pub const BOPO_HANG_HANI_HIRA_KANA: ScriptExtension = BOPO.union(HANG).union(HANI).union(HIRA).union(KANA); + /// Arabic, Syriac + pub const ARAB_SYRC: ScriptExtension = ARAB.union(SYRC); + /// Grantha, Tamil + pub const GRAN_TAML: ScriptExtension = GRAN.union(TAML); /// Bopomofo, Hangul, Han, Hiragana, Katakana, Yi pub const BOPO_HANG_HANI_HIRA_KANA_YIII: ScriptExtension = BOPO.union(HANG).union(HANI).union(HIRA).union(KANA).union(YIII); - /// Devanagari, Kannada, Malayalam, Oriya, Tamil, Telugu - pub const DEVA_KNDA_MLYM_ORYA_TAML_TELU: ScriptExtension = DEVA.union(KNDA).union(MLYM).union(ORYA).union(TAML).union(TELU); - /// Adlam, Arabic, Mandaic, Manichaean, Psalter_Pahlavi, Hanifi_Rohingya, Sogdian, Syriac - pub const ADLM_ARAB_MAND_MANI_PHLP_ROHG_SOGD_SYRC: ScriptExtension = ADLM.union(ARAB).union(MAND).union(MANI).union(PHLP).union(ROHG).union(SOGD).union(SYRC); + /// Chakma, Myanmar, Tai_Le + pub const CAKM_MYMR_TALE: ScriptExtension = CAKM.union(MYMR).union(TALE); + /// Bengali, Devanagari + pub const BENG_DEVA: ScriptExtension = BENG.union(DEVA); /// Bengali, Devanagari, Grantha, Kannada, Nandinagari, Oriya, Telugu, Tirhuta pub const BENG_DEVA_GRAN_KNDA_NAND_ORYA_TELU_TIRH: ScriptExtension = BENG.union(DEVA).union(GRAN).union(KNDA).union(NAND).union(ORYA).union(TELU).union(TIRH); - /// Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kaithi, Mahajani, Modi, Khudawadi, Takri, Tirhuta - pub const DEVA_DOGR_GUJR_GURU_KHOJ_KTHI_MAHJ_MODI_SIND_TAKR_TIRH: ScriptExtension = DEVA.union(DOGR).union(GUJR).union(GURU).union(KHOJ).union(KTHI).union(MAHJ).union(MODI).union(SIND).union(TAKR).union(TIRH); + /// Latin, Mongolian + pub const LATN_MONG: ScriptExtension = LATN.union(MONG); /// Bengali, Devanagari, Grantha, Gujarati, Gurmukhi, Kannada, Latin, Malayalam, Oriya, Tamil, Telugu, Tirhuta pub const BENG_DEVA_GRAN_GUJR_GURU_KNDA_LATN_MLYM_ORYA_TAML_TELU_TIRH: ScriptExtension = BENG.union(DEVA).union(GRAN).union(GUJR).union(GURU).union(KNDA).union(LATN).union(MLYM).union(ORYA).union(TAML).union(TELU).union(TIRH); - /// Bengali, Devanagari, Grantha, Gujarati, Gurmukhi, Kannada, Latin, Malayalam, Oriya, Sharada, Tamil, Telugu, Tirhuta - pub const BENG_DEVA_GRAN_GUJR_GURU_KNDA_LATN_MLYM_ORYA_SHRD_TAML_TELU_TIRH: ScriptExtension = BENG.union(DEVA).union(GRAN).union(GUJR).union(GURU).union(KNDA).union(LATN).union(MLYM).union(ORYA).union(SHRD).union(TAML).union(TELU).union(TIRH); - /// Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kannada, Kaithi, Mahajani, Modi, Nandinagari, Khudawadi, Takri, Tirhuta - pub const DEVA_DOGR_GUJR_GURU_KHOJ_KNDA_KTHI_MAHJ_MODI_NAND_SIND_TAKR_TIRH: ScriptExtension = DEVA.union(DOGR).union(GUJR).union(GURU).union(KHOJ).union(KNDA).union(KTHI).union(MAHJ).union(MODI).union(NAND).union(SIND).union(TAKR).union(TIRH); - /// Devanagari, Dogra, Gujarati, Gurmukhi, Khojki, Kannada, Kaithi, Mahajani, Malayalam, Modi, Nandinagari, Khudawadi, Takri, Tirhuta - pub const DEVA_DOGR_GUJR_GURU_KHOJ_KNDA_KTHI_MAHJ_MLYM_MODI_NAND_SIND_TAKR_TIRH: ScriptExtension = DEVA.union(DOGR).union(GUJR).union(GURU).union(KHOJ).union(KNDA).union(KTHI).union(MAHJ).union(MLYM).union(MODI).union(NAND).union(SIND).union(TAKR).union(TIRH); - /// Bengali, Devanagari, Dogra, Gunjala_Gondi, Masaram_Gondi, Grantha, Gujarati, Gurmukhi, Kannada, Mahajani, Malayalam, Nandinagari, Oriya, Khudawadi, Sinhala, Syloti_Nagri, Takri, Tamil, Telugu, Tirhuta - pub const BENG_DEVA_DOGR_GONG_GONM_GRAN_GUJR_GURU_KNDA_MAHJ_MLYM_NAND_ORYA_SIND_SINH_SYLO_TAKR_TAML_TELU_TIRH: ScriptExtension = BENG.union(DEVA).union(DOGR).union(GONG).union(GONM).union(GRAN).union(GUJR).union(GURU).union(KNDA).union(MAHJ).union(MLYM).union(NAND).union(ORYA).union(SIND).union(SINH).union(SYLO).union(TAKR).union(TAML).union(TELU).union(TIRH); - /// Bengali, Devanagari, Dogra, Gunjala_Gondi, Masaram_Gondi, Grantha, Gujarati, Gurmukhi, Kannada, Limbu, Mahajani, Malayalam, Nandinagari, Oriya, Khudawadi, Sinhala, Syloti_Nagri, Takri, Tamil, Telugu, Tirhuta - pub const BENG_DEVA_DOGR_GONG_GONM_GRAN_GUJR_GURU_KNDA_LIMB_MAHJ_MLYM_NAND_ORYA_SIND_SINH_SYLO_TAKR_TAML_TELU_TIRH: ScriptExtension = BENG.union(DEVA).union(DOGR).union(GONG).union(GONM).union(GRAN).union(GUJR).union(GURU).union(KNDA).union(LIMB).union(MAHJ).union(MLYM).union(NAND).union(ORYA).union(SIND).union(SINH).union(SYLO).union(TAKR).union(TAML).union(TELU).union(TIRH); + /// Bopomofo, Han + pub const BOPO_HANI: ScriptExtension = BOPO.union(HANI); + /// Arabic, Coptic + pub const ARAB_COPT: ScriptExtension = ARAB.union(COPT); + /// Devanagari, Sharada + pub const DEVA_SHRD: ScriptExtension = DEVA.union(SHRD); + /// Mongolian, Phags_Pa + pub const MONG_PHAG: ScriptExtension = MONG.union(PHAG); + /// Arabic, Thaana, Yezidi + pub const ARAB_THAA_YEZI: ScriptExtension = ARAB.union(THAA).union(YEZI); + /// Cyrillic, Glagolitic + pub const CYRL_GLAG: ScriptExtension = CYRL.union(GLAG); } impl Script { + #[inline] pub(crate) fn inner_full_name(self) -> &'static str { match self { @@ -1216,6 +1221,170 @@ impl Script { } } + #[inline] + pub(crate) fn inner_from_full_name(input: &str) -> Option { + match input { + "Unknown" => Some(Script::Unknown), + "Common" => Some(Script::Common), + "Inherited" => Some(Script::Inherited), + "Adlam" => Some(Script::Adlam), + "Caucasian_Albanian" => Some(Script::Caucasian_Albanian), + "Ahom" => Some(Script::Ahom), + "Arabic" => Some(Script::Arabic), + "Imperial_Aramaic" => Some(Script::Imperial_Aramaic), + "Armenian" => Some(Script::Armenian), + "Avestan" => Some(Script::Avestan), + "Balinese" => Some(Script::Balinese), + "Bamum" => Some(Script::Bamum), + "Bassa_Vah" => Some(Script::Bassa_Vah), + "Batak" => Some(Script::Batak), + "Bengali" => Some(Script::Bengali), + "Bhaiksuki" => Some(Script::Bhaiksuki), + "Bopomofo" => Some(Script::Bopomofo), + "Brahmi" => Some(Script::Brahmi), + "Braille" => Some(Script::Braille), + "Buginese" => Some(Script::Buginese), + "Buhid" => Some(Script::Buhid), + "Chakma" => Some(Script::Chakma), + "Canadian_Aboriginal" => Some(Script::Canadian_Aboriginal), + "Carian" => Some(Script::Carian), + "Cham" => Some(Script::Cham), + "Cherokee" => Some(Script::Cherokee), + "Chorasmian" => Some(Script::Chorasmian), + "Coptic" => Some(Script::Coptic), + "Cypriot" => Some(Script::Cypriot), + "Cyrillic" => Some(Script::Cyrillic), + "Devanagari" => Some(Script::Devanagari), + "Dives_Akuru" => Some(Script::Dives_Akuru), + "Dogra" => Some(Script::Dogra), + "Deseret" => Some(Script::Deseret), + "Duployan" => Some(Script::Duployan), + "Egyptian_Hieroglyphs" => Some(Script::Egyptian_Hieroglyphs), + "Elbasan" => Some(Script::Elbasan), + "Elymaic" => Some(Script::Elymaic), + "Ethiopic" => Some(Script::Ethiopic), + "Georgian" => Some(Script::Georgian), + "Glagolitic" => Some(Script::Glagolitic), + "Gunjala_Gondi" => Some(Script::Gunjala_Gondi), + "Masaram_Gondi" => Some(Script::Masaram_Gondi), + "Gothic" => Some(Script::Gothic), + "Grantha" => Some(Script::Grantha), + "Greek" => Some(Script::Greek), + "Gujarati" => Some(Script::Gujarati), + "Gurmukhi" => Some(Script::Gurmukhi), + "Hangul" => Some(Script::Hangul), + "Han" => Some(Script::Han), + "Hanunoo" => Some(Script::Hanunoo), + "Hatran" => Some(Script::Hatran), + "Hebrew" => Some(Script::Hebrew), + "Hiragana" => Some(Script::Hiragana), + "Anatolian_Hieroglyphs" => Some(Script::Anatolian_Hieroglyphs), + "Pahawh_Hmong" => Some(Script::Pahawh_Hmong), + "Nyiakeng_Puachue_Hmong" => Some(Script::Nyiakeng_Puachue_Hmong), + "Old_Hungarian" => Some(Script::Old_Hungarian), + "Old_Italic" => Some(Script::Old_Italic), + "Javanese" => Some(Script::Javanese), + "Kayah_Li" => Some(Script::Kayah_Li), + "Katakana" => Some(Script::Katakana), + "Kharoshthi" => Some(Script::Kharoshthi), + "Khmer" => Some(Script::Khmer), + "Khojki" => Some(Script::Khojki), + "Khitan_Small_Script" => Some(Script::Khitan_Small_Script), + "Kannada" => Some(Script::Kannada), + "Kaithi" => Some(Script::Kaithi), + "Tai_Tham" => Some(Script::Tai_Tham), + "Lao" => Some(Script::Lao), + "Latin" => Some(Script::Latin), + "Lepcha" => Some(Script::Lepcha), + "Limbu" => Some(Script::Limbu), + "Linear_A" => Some(Script::Linear_A), + "Linear_B" => Some(Script::Linear_B), + "Lisu" => Some(Script::Lisu), + "Lycian" => Some(Script::Lycian), + "Lydian" => Some(Script::Lydian), + "Mahajani" => Some(Script::Mahajani), + "Makasar" => Some(Script::Makasar), + "Mandaic" => Some(Script::Mandaic), + "Manichaean" => Some(Script::Manichaean), + "Marchen" => Some(Script::Marchen), + "Medefaidrin" => Some(Script::Medefaidrin), + "Mende_Kikakui" => Some(Script::Mende_Kikakui), + "Meroitic_Cursive" => Some(Script::Meroitic_Cursive), + "Meroitic_Hieroglyphs" => Some(Script::Meroitic_Hieroglyphs), + "Malayalam" => Some(Script::Malayalam), + "Modi" => Some(Script::Modi), + "Mongolian" => Some(Script::Mongolian), + "Mro" => Some(Script::Mro), + "Meetei_Mayek" => Some(Script::Meetei_Mayek), + "Multani" => Some(Script::Multani), + "Myanmar" => Some(Script::Myanmar), + "Nandinagari" => Some(Script::Nandinagari), + "Old_North_Arabian" => Some(Script::Old_North_Arabian), + "Nabataean" => Some(Script::Nabataean), + "Newa" => Some(Script::Newa), + "Nko" => Some(Script::Nko), + "Nushu" => Some(Script::Nushu), + "Ogham" => Some(Script::Ogham), + "Ol_Chiki" => Some(Script::Ol_Chiki), + "Old_Turkic" => Some(Script::Old_Turkic), + "Oriya" => Some(Script::Oriya), + "Osage" => Some(Script::Osage), + "Osmanya" => Some(Script::Osmanya), + "Palmyrene" => Some(Script::Palmyrene), + "Pau_Cin_Hau" => Some(Script::Pau_Cin_Hau), + "Old_Permic" => Some(Script::Old_Permic), + "Phags_Pa" => Some(Script::Phags_Pa), + "Inscriptional_Pahlavi" => Some(Script::Inscriptional_Pahlavi), + "Psalter_Pahlavi" => Some(Script::Psalter_Pahlavi), + "Phoenician" => Some(Script::Phoenician), + "Miao" => Some(Script::Miao), + "Inscriptional_Parthian" => Some(Script::Inscriptional_Parthian), + "Rejang" => Some(Script::Rejang), + "Hanifi_Rohingya" => Some(Script::Hanifi_Rohingya), + "Runic" => Some(Script::Runic), + "Samaritan" => Some(Script::Samaritan), + "Old_South_Arabian" => Some(Script::Old_South_Arabian), + "Saurashtra" => Some(Script::Saurashtra), + "SignWriting" => Some(Script::SignWriting), + "Shavian" => Some(Script::Shavian), + "Sharada" => Some(Script::Sharada), + "Siddham" => Some(Script::Siddham), + "Khudawadi" => Some(Script::Khudawadi), + "Sinhala" => Some(Script::Sinhala), + "Sogdian" => Some(Script::Sogdian), + "Old_Sogdian" => Some(Script::Old_Sogdian), + "Sora_Sompeng" => Some(Script::Sora_Sompeng), + "Soyombo" => Some(Script::Soyombo), + "Sundanese" => Some(Script::Sundanese), + "Syloti_Nagri" => Some(Script::Syloti_Nagri), + "Syriac" => Some(Script::Syriac), + "Tagbanwa" => Some(Script::Tagbanwa), + "Takri" => Some(Script::Takri), + "Tai_Le" => Some(Script::Tai_Le), + "New_Tai_Lue" => Some(Script::New_Tai_Lue), + "Tamil" => Some(Script::Tamil), + "Tangut" => Some(Script::Tangut), + "Tai_Viet" => Some(Script::Tai_Viet), + "Telugu" => Some(Script::Telugu), + "Tifinagh" => Some(Script::Tifinagh), + "Tagalog" => Some(Script::Tagalog), + "Thaana" => Some(Script::Thaana), + "Thai" => Some(Script::Thai), + "Tibetan" => Some(Script::Tibetan), + "Tirhuta" => Some(Script::Tirhuta), + "Ugaritic" => Some(Script::Ugaritic), + "Vai" => Some(Script::Vai), + "Warang_Citi" => Some(Script::Warang_Citi), + "Wancho" => Some(Script::Wancho), + "Old_Persian" => Some(Script::Old_Persian), + "Cuneiform" => Some(Script::Cuneiform), + "Yezidi" => Some(Script::Yezidi), + "Yi" => Some(Script::Yi), + "Zanabazar_Square" => Some(Script::Zanabazar_Square), + _ => None, + } + } + #[inline] pub(crate) fn inner_short_name(self) -> &'static str { match self { @@ -1379,6 +1548,170 @@ impl Script { } } + #[inline] + pub(crate) fn inner_from_short_name(input: &str) -> Option { + match input { + "" => Some(Script::Unknown), + "Zyyy" => Some(Script::Common), + "Zinh" => Some(Script::Inherited), + "Adlm" => Some(Script::Adlam), + "Aghb" => Some(Script::Caucasian_Albanian), + "Ahom" => Some(Script::Ahom), + "Arab" => Some(Script::Arabic), + "Armi" => Some(Script::Imperial_Aramaic), + "Armn" => Some(Script::Armenian), + "Avst" => Some(Script::Avestan), + "Bali" => Some(Script::Balinese), + "Bamu" => Some(Script::Bamum), + "Bass" => Some(Script::Bassa_Vah), + "Batk" => Some(Script::Batak), + "Beng" => Some(Script::Bengali), + "Bhks" => Some(Script::Bhaiksuki), + "Bopo" => Some(Script::Bopomofo), + "Brah" => Some(Script::Brahmi), + "Brai" => Some(Script::Braille), + "Bugi" => Some(Script::Buginese), + "Buhd" => Some(Script::Buhid), + "Cakm" => Some(Script::Chakma), + "Cans" => Some(Script::Canadian_Aboriginal), + "Cari" => Some(Script::Carian), + "Cham" => Some(Script::Cham), + "Cher" => Some(Script::Cherokee), + "Chrs" => Some(Script::Chorasmian), + "Copt" => Some(Script::Coptic), + "Cprt" => Some(Script::Cypriot), + "Cyrl" => Some(Script::Cyrillic), + "Deva" => Some(Script::Devanagari), + "Diak" => Some(Script::Dives_Akuru), + "Dogr" => Some(Script::Dogra), + "Dsrt" => Some(Script::Deseret), + "Dupl" => Some(Script::Duployan), + "Egyp" => Some(Script::Egyptian_Hieroglyphs), + "Elba" => Some(Script::Elbasan), + "Elym" => Some(Script::Elymaic), + "Ethi" => Some(Script::Ethiopic), + "Geor" => Some(Script::Georgian), + "Glag" => Some(Script::Glagolitic), + "Gong" => Some(Script::Gunjala_Gondi), + "Gonm" => Some(Script::Masaram_Gondi), + "Goth" => Some(Script::Gothic), + "Gran" => Some(Script::Grantha), + "Grek" => Some(Script::Greek), + "Gujr" => Some(Script::Gujarati), + "Guru" => Some(Script::Gurmukhi), + "Hang" => Some(Script::Hangul), + "Hani" => Some(Script::Han), + "Hano" => Some(Script::Hanunoo), + "Hatr" => Some(Script::Hatran), + "Hebr" => Some(Script::Hebrew), + "Hira" => Some(Script::Hiragana), + "Hluw" => Some(Script::Anatolian_Hieroglyphs), + "Hmng" => Some(Script::Pahawh_Hmong), + "Hmnp" => Some(Script::Nyiakeng_Puachue_Hmong), + "Hung" => Some(Script::Old_Hungarian), + "Ital" => Some(Script::Old_Italic), + "Java" => Some(Script::Javanese), + "Kali" => Some(Script::Kayah_Li), + "Kana" => Some(Script::Katakana), + "Khar" => Some(Script::Kharoshthi), + "Khmr" => Some(Script::Khmer), + "Khoj" => Some(Script::Khojki), + "Kits" => Some(Script::Khitan_Small_Script), + "Knda" => Some(Script::Kannada), + "Kthi" => Some(Script::Kaithi), + "Lana" => Some(Script::Tai_Tham), + "Laoo" => Some(Script::Lao), + "Latn" => Some(Script::Latin), + "Lepc" => Some(Script::Lepcha), + "Limb" => Some(Script::Limbu), + "Lina" => Some(Script::Linear_A), + "Linb" => Some(Script::Linear_B), + "Lisu" => Some(Script::Lisu), + "Lyci" => Some(Script::Lycian), + "Lydi" => Some(Script::Lydian), + "Mahj" => Some(Script::Mahajani), + "Maka" => Some(Script::Makasar), + "Mand" => Some(Script::Mandaic), + "Mani" => Some(Script::Manichaean), + "Marc" => Some(Script::Marchen), + "Medf" => Some(Script::Medefaidrin), + "Mend" => Some(Script::Mende_Kikakui), + "Merc" => Some(Script::Meroitic_Cursive), + "Mero" => Some(Script::Meroitic_Hieroglyphs), + "Mlym" => Some(Script::Malayalam), + "Modi" => Some(Script::Modi), + "Mong" => Some(Script::Mongolian), + "Mroo" => Some(Script::Mro), + "Mtei" => Some(Script::Meetei_Mayek), + "Mult" => Some(Script::Multani), + "Mymr" => Some(Script::Myanmar), + "Nand" => Some(Script::Nandinagari), + "Narb" => Some(Script::Old_North_Arabian), + "Nbat" => Some(Script::Nabataean), + "Newa" => Some(Script::Newa), + "Nkoo" => Some(Script::Nko), + "Nshu" => Some(Script::Nushu), + "Ogam" => Some(Script::Ogham), + "Olck" => Some(Script::Ol_Chiki), + "Orkh" => Some(Script::Old_Turkic), + "Orya" => Some(Script::Oriya), + "Osge" => Some(Script::Osage), + "Osma" => Some(Script::Osmanya), + "Palm" => Some(Script::Palmyrene), + "Pauc" => Some(Script::Pau_Cin_Hau), + "Perm" => Some(Script::Old_Permic), + "Phag" => Some(Script::Phags_Pa), + "Phli" => Some(Script::Inscriptional_Pahlavi), + "Phlp" => Some(Script::Psalter_Pahlavi), + "Phnx" => Some(Script::Phoenician), + "Plrd" => Some(Script::Miao), + "Prti" => Some(Script::Inscriptional_Parthian), + "Rjng" => Some(Script::Rejang), + "Rohg" => Some(Script::Hanifi_Rohingya), + "Runr" => Some(Script::Runic), + "Samr" => Some(Script::Samaritan), + "Sarb" => Some(Script::Old_South_Arabian), + "Saur" => Some(Script::Saurashtra), + "Sgnw" => Some(Script::SignWriting), + "Shaw" => Some(Script::Shavian), + "Shrd" => Some(Script::Sharada), + "Sidd" => Some(Script::Siddham), + "Sind" => Some(Script::Khudawadi), + "Sinh" => Some(Script::Sinhala), + "Sogd" => Some(Script::Sogdian), + "Sogo" => Some(Script::Old_Sogdian), + "Sora" => Some(Script::Sora_Sompeng), + "Soyo" => Some(Script::Soyombo), + "Sund" => Some(Script::Sundanese), + "Sylo" => Some(Script::Syloti_Nagri), + "Syrc" => Some(Script::Syriac), + "Tagb" => Some(Script::Tagbanwa), + "Takr" => Some(Script::Takri), + "Tale" => Some(Script::Tai_Le), + "Talu" => Some(Script::New_Tai_Lue), + "Taml" => Some(Script::Tamil), + "Tang" => Some(Script::Tangut), + "Tavt" => Some(Script::Tai_Viet), + "Telu" => Some(Script::Telugu), + "Tfng" => Some(Script::Tifinagh), + "Tglg" => Some(Script::Tagalog), + "Thaa" => Some(Script::Thaana), + "Thai" => Some(Script::Thai), + "Tibt" => Some(Script::Tibetan), + "Tirh" => Some(Script::Tirhuta), + "Ugar" => Some(Script::Ugaritic), + "Vaii" => Some(Script::Vai), + "Wara" => Some(Script::Warang_Citi), + "Wcho" => Some(Script::Wancho), + "Xpeo" => Some(Script::Old_Persian), + "Xsux" => Some(Script::Cuneiform), + "Yezi" => Some(Script::Yezidi), + "Yiii" => Some(Script::Yi), + "Zanb" => Some(Script::Zanabazar_Square), + _ => None, + } + } + #[inline] pub(crate) fn for_integer(value: u8) -> Self { match value { @@ -1539,6 +1872,7 @@ impl Script { _ => unreachable!(), } } + } pub fn bsearch_range_value_table(c: char, r: &'static [(char, char, T)]) -> Option { @@ -2774,3 +3108,5 @@ pub fn get_script_extension(c: char) -> Option { script_extensions::HANI), ('\u{1f250}', '\u{1f251}', script_extensions::HANI) ]; + +} From 47d8b7e5032d0590326151535acb67c1192f16e5 Mon Sep 17 00:00:00 2001 From: Igor Aleksanov Date: Sun, 27 Jun 2021 10:09:34 +0300 Subject: [PATCH 2/4] Remove 'rustfmt::skip' attribute on 'mod tables' --- src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index f5ce4c1..d650ee0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,7 +4,6 @@ #![cfg_attr(not(test), no_std)] #![cfg_attr(feature = "bench", feature(test))] -#[rustfmt::skip] mod tables; use core::convert::TryFrom; From f81ded97f273428427ef30f0f8fb8a5340925933 Mon Sep 17 00:00:00 2001 From: Igor Aleksanov Date: Sun, 27 Jun 2021 10:11:48 +0300 Subject: [PATCH 3/4] Fix python file formatting --- scripts/unicode.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/scripts/unicode.py b/scripts/unicode.py index 57e7d5a..3d534bd 100644 --- a/scripts/unicode.py +++ b/scripts/unicode.py @@ -252,12 +252,12 @@ def emit_enums(f, script_list, extension_list, longforms): # Generate implementation for the `Script` generate_script_impl(f) - - + + def generate_script_impl(f): """Generates an `impl Script { ... }` section with all the required functions""" - # Open `impl Script` section. + # Open `impl Script` section. f.write("""impl Script { """) @@ -322,7 +322,7 @@ def generate_script_impl(f): f.write(""" } } """) - + # Generate impl of `for_integer` f.write(""" #[inline] @@ -335,7 +335,7 @@ def generate_script_impl(f): } } """) - + # Close `impl Script` section f.write(""" } @@ -399,5 +399,5 @@ def extension_name(ext): is_pub=False , pfun=lambda x: "(%s,%s,%s)" % (escape_char(x[0]), escape_char(x[1]), extension_name(x[2]))) # emit_table(rf, "FOObar", properties) - - rf.write(ending) \ No newline at end of file + + rf.write(ending) From d2c5a10f7b383e55e16d8567b80668dd67b72432 Mon Sep 17 00:00:00 2001 From: Igor Aleksanov Date: Mon, 28 Jun 2021 07:29:46 +0300 Subject: [PATCH 4/4] Do not include an empty string into 'from_short_name' match chain --- scripts/unicode.py | 1 - src/tables.rs | 1 - 2 files changed, 2 deletions(-) diff --git a/scripts/unicode.py b/scripts/unicode.py index 3d534bd..e40a92c 100644 --- a/scripts/unicode.py +++ b/scripts/unicode.py @@ -312,7 +312,6 @@ def generate_script_impl(f): #[inline] pub(crate) fn inner_from_short_name(input: &str) -> Option { match input { - "" => Some(Script::Unknown), "Zyyy" => Some(Script::Common), "Zinh" => Some(Script::Inherited), """) diff --git a/src/tables.rs b/src/tables.rs index 665a310..471e4f2 100644 --- a/src/tables.rs +++ b/src/tables.rs @@ -1551,7 +1551,6 @@ impl Script { #[inline] pub(crate) fn inner_from_short_name(input: &str) -> Option { match input { - "" => Some(Script::Unknown), "Zyyy" => Some(Script::Common), "Zinh" => Some(Script::Inherited), "Adlm" => Some(Script::Adlam), pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy