unicode-rs
diff --git a/‎Cargo.toml
Lines changed: 2 additions & 2 deletions b/‎Cargo.toml
Lines changed: 2 additions & 2 deletions
diff --git a/‎scripts/unicode.py
Lines changed: 80 additions & 17 deletions b/‎scripts/unicode.py
Lines changed: 80 additions & 17 deletions
diff --git a/‎src/jamo.rs
Lines changed: 41 additions & 0 deletions b/‎src/jamo.rs
Lines changed: 41 additions & 0 deletions
@@ -1,9 +1,9 @@
 [package]
 name = "unicode-charname"
 version = "0.1.0"
-authors = ["CrLF0710"]
+authors = ["Charles Lew <crlf0710@gmail.com>"]
 edition = "2018"
-
+license = "MIT/Apache-2.0"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
@@ -16,7 +16,10 @@
 # Since this should not require frequent updates, we just store this
 # out-of-line and check the unicode.rs file into git.
 
-import fileinput, re, os, sys
+import fileinput
+import re
+import os
+import sys
 
 preamble = '''// Copyright 2012-2018 The Rust Project Developers. See the COPYRIGHT
 // file at the top-level directory of this distribution and at
@@ -36,11 +39,13 @@
 
 UNICODE_VERSION = (13, 0, 0)
 
-UNICODE_VERSION_NUMBER = "%s.%s.%s" %UNICODE_VERSION
+UNICODE_VERSION_NUMBER = "%s.%s.%s" % UNICODE_VERSION
+
 
 def escape_char(c):
     return "'\\u{%x}'" % c
 
+
 def fetch(f):
     if not os.path.exists(os.path.basename(f)):
         if "emoji" in f:
@@ -55,6 +60,8 @@ def fetch(f):
         exit(1)
 
 # Implementation from unicode-segmentation
+
+
 def load_names(f, interestingprops):
     fetch(f)
     normal_names = {}
@@ -76,17 +83,21 @@ def load_names(f, interestingprops):
             normal_names[d_ch] = d_name
     return (normal_names, special_names)
 
+
 SPACE_SYMBOL = ' '
 CODEPOINT_SYMBOL = '@'
 SPECIAL_SYMBOLS = ['-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
 
+
 def tokenize(str, codepoint):
     name_sep = str
     name_sep = name_sep.replace(codepoint, CODEPOINT_SYMBOL)
     for symbol in SPECIAL_SYMBOLS:
-        name_sep = name_sep.replace(symbol, SPACE_SYMBOL + symbol + SPACE_SYMBOL)
+        name_sep = name_sep.replace(
+            symbol, SPACE_SYMBOL + symbol + SPACE_SYMBOL)
     return name_sep
 
+
 def make_wordset(names):
     word_set = {}
     word_set[SPACE_SYMBOL] = SPACE_SYMBOL
@@ -97,6 +108,7 @@ def make_wordset(names):
             word_set[word] = word
     return word_set
 
+
 class WordIndex:
     def __init__(self, normal_names):
         word_set = make_wordset(normal_names)
@@ -123,7 +135,7 @@ def __init__(self, normal_names):
         self.word_map = word_map
         self.special_map = special_map
         self.special_list = special_list
-    
+
     def encode(self, name, codepoint):
         name_sep = tokenize(name, codepoint)
         word_list = name_sep.split(SPACE_SYMBOL)
@@ -143,16 +155,18 @@ def encode(self, name, codepoint):
                     encoded_sequence.append(self.word_map[SPACE_SYMBOL])
                 name_build += SPACE_SYMBOL
             if not name[len(name_build):].startswith(word):
-                raise Exception("Divergence! [%s] - [%s] vs [%s]" % (name, name[len(name_build):], word))
+                raise Exception(
+                    "Divergence! [%s] - [%s] vs [%s]" % (name, name[len(name_build):], word))
             name_build += word
             encoded_sequence.append(word_idx)
             # encoded_sequence.append(word)
             last_is_special = word_is_special
         if name_build != name:
             raise Exception("Different! [%s] vs [%s]" % (name, name_build))
-        
+
         return encoded_sequence
 
+
 def create_intervals(list):
     list.sort()
     in_group = False
@@ -167,8 +181,10 @@ def create_intervals(list):
             in_group = False
     return result
 
+
 def create_normal_groups(normal_names):
-    normal_intervals = create_intervals([int(key, 16) for key in normal_names.keys()])
+    normal_intervals = create_intervals(
+        [int(key, 16) for key in normal_names.keys()])
     encoded_groups = []
     for first, last in normal_intervals:
         group_buffer = []
@@ -184,6 +200,7 @@ def create_normal_groups(normal_names):
         encoded_groups.append((first, last, group_buffer, pos_buffer))
     return encoded_groups
 
+
 def create_special_groups(special_names):
     item_idx = 0
     item_count = len(special_names)
@@ -198,9 +215,11 @@ def create_special_groups(special_names):
             label = m1.group(1)
             m2 = re2.match(special_names[item_idx + 1][1])
             if not m2 or m2.group(1) != label:
-                raise Exception("Pair mismatch! [%s] vs [%s]" % (special_names[item_idx], special_names[item_idx + 1]))
-            
-            result.append((int(special_names[item_idx][0], 16), int(special_names[item_idx + 1][0], 16), label))
+                raise Exception("Pair mismatch! [%s] vs [%s]" % (
+                    special_names[item_idx], special_names[item_idx + 1]))
+
+            result.append((int(special_names[item_idx][0], 16), int(
+                special_names[item_idx + 1][0], 16), label))
             item_idx += 2
             continue
         m3 = re3.match(item_text)
@@ -211,25 +230,30 @@ def create_special_groups(special_names):
             while try_item_idx < item_count and special_names[try_item_idx][1] == item_text:
                 last_repeat_item_idx = try_item_idx
                 try_item_idx += 1
-            result.append((int(special_names[item_idx][0], 16), int(special_names[last_repeat_item_idx][0], 16), label))
+            result.append((int(special_names[item_idx][0], 16), int(
+                special_names[last_repeat_item_idx][0], 16), label))
             item_idx = last_repeat_item_idx + 1
             continue
-            
+
         raise Exception("Unexpected item: %s" % item_text)
     return result
 
+
 def write_enumeration_char_names(rf, encoded_groups):
     rf.write("""
 pub const ENUMERATION_CHAR_NAMES: &'static [(u32, u32, &'static [u16], &'static [u32])] = &[
 """)
     for (first, last, group_buffer, pos_buffer) in encoded_groups:
-        rf.write("\t(%d, %d, &%s, &%s),\n" % (first, last, group_buffer, pos_buffer))
+        rf.write("\t(%d, %d, &%s, &%s),\n" %
+                 (first, last, group_buffer, pos_buffer))
     rf.write("""];
 """)
 
+
 def write_special_groups(rf, special_groups):
     rf.write("""
 #[allow(non_camel_case_types)]
+#[derive(Copy, Clone, PartialEq, Eq)]
 pub enum SpecialGroup {
 """)
     for ((_, _, groupname)) in special_groups:
@@ -249,8 +273,27 @@ def write_special_groups(rf, special_groups):
         if (idx + 1) % 2 == 0:
             rf.write('\n')
     rf.write("""];
+
+pub fn find_in_special_groups(ch: u32) -> Option<SpecialGroup> {
+    let record_idx = SPECIAL_GROUPS
+        .binary_search_by(|record| {
+            use std::cmp::Ordering;
+            if record.1 < ch {
+                Ordering::Less
+            } else if record.0 > ch {
+                Ordering::Greater
+            } else {
+                Ordering::Equal
+            }
+        })
+        .ok()?;
+    let group = SPECIAL_GROUPS[record_idx].2;
+    Some(group)
+}
+
 """)
 
+
 def write_word_table(rf, word_table):
     rf.write("""
 pub const ENUMERATION_WORD_TABLE: &'static [&'static str] = &[
@@ -262,8 +305,29 @@ def write_word_table(rf, word_table):
         if (idx + 1) % 8 == 0:
             rf.write('\n')
     rf.write("""];
+
+pub fn find_in_enumerate_names(ch: u32) -> Option<&'static [u16]> {
+    let record_idx = ENUMERATION_CHAR_NAMES
+        .binary_search_by(|record| {
+            use std::cmp::Ordering;
+            if record.1 < ch {
+                Ordering::Less
+            } else if record.0 > ch {
+                Ordering::Greater
+            } else {
+                Ordering::Equal
+            }
+        })
+        .ok()?;
+    let offset = (ch - ENUMERATION_CHAR_NAMES[record_idx].0) as usize;
+    let index_slice = ENUMERATION_CHAR_NAMES[record_idx].2;
+    let offset_slice = ENUMERATION_CHAR_NAMES[record_idx].3;
+    let range = (offset_slice[offset] as usize)..(offset_slice[offset + 1] as usize);
+    Some(&index_slice[range])
+}
 """)
 
+
 def write_special_symbols(rf, word_index):
     rf.write("""
 pub const WORD_TABLE_INDEX_SPACE: u16 = %d;
@@ -279,12 +343,12 @@ def write_special_symbols(rf, word_index):
 """)
     for (first, last) in special_intervals:
         rf.write("\t\t%d..=%d => true,\n" % (first, last))
-    rf.write("""
-        _ => false,
+    rf.write("""\t\t_ => false,
     }
 }
 """)
 
+
 if __name__ == "__main__":
     r = "tables.rs"
     if os.path.exists(r):
@@ -302,9 +366,8 @@ def write_special_symbols(rf, word_index):
         word_index = WordIndex(normal_names)
         normal_encoded_groups = create_normal_groups(normal_names)
         special_groups = create_special_groups(special_names)
-        
+
         write_enumeration_char_names(rf, normal_encoded_groups)
         write_special_groups(rf, special_groups)
         write_word_table(rf, word_index.word_list)
         write_special_symbols(rf, word_index)
-
 
@@ -0,0 +1,41 @@
+#![allow(dead_code)]
+// This is adapted from Unicode 13.0, 3.12.
+
+const S_BASE: u32 = 0xAC00;
+const L_BASE: u32 = 0x1100;
+const V_BASE: u32 = 0x1161;
+const T_BASE: u32 = 0x11A7;
+const L_COUNT: u32 = 19;
+const V_COUNT: u32 = 21;
+const T_COUNT: u32 = 28;
+const N_COUNT: u32 = V_COUNT * T_COUNT; // 588
+const S_COUNT: u32 = L_COUNT * N_COUNT; // 11172
+
+const JAMO_L_TABLE: &[&'static str] = &[
+    "G", "GG", "N", "D", "DD", "R", "M", "B", "BB", "S", "SS", "", "J", "JJ", "C", "K", "T", "P",
+    "H",
+];
+
+const JAMO_V_TABLE: &[&'static str] = &[
+    "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", "WA", "WAE", "OE", "YO", "U", "WEO", "WE",
+    "WI", "YU", "EU", "YI", "I",
+];
+
+const JAMO_T_TABLE: &[&'static str] = &[
+    "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM", "LB", "LS", "LT", "LP", "LH", "M",
+    "B", "BS", "S", "SS", "NG", "J", "C", "K", "T", "P", "H",
+];
+
+pub(crate) fn hangul_name(s: u32) -> String {
+    let s_index = s - S_BASE;
+    assert!(s_index < S_COUNT);
+    let l_index = s_index / N_COUNT;
+    let v_index = (s_index % N_COUNT) / T_COUNT;
+    let t_index = s_index % T_COUNT;
+    format!(
+        "HANGUL SYLLABLE {}{}{}",
+        JAMO_L_TABLE[l_index as usize],
+        JAMO_V_TABLE[v_index as usize],
+        JAMO_T_TABLE[t_index as usize]
+    )
+}