35
35
// NOTE: The following code was generated by "scripts/unicode.py", do not edit directly
36
36
37
37
#![allow(missing_docs, non_upper_case_globals, non_snake_case)]
38
+
39
+ use super::ScriptExtension;
38
40
'''
39
41
40
42
UNICODE_VERSION = (12 , 0 , 0 )
@@ -183,44 +185,68 @@ def emit_search(f):
183
185
}
184
186
""" )
185
187
186
- def emit_enums (f , script_list , extension_list , longforms , intersections ):
188
+ def emit_enums (f , script_list , extension_list , longforms ):
187
189
"""
188
190
Emit the Script and ScriptExtension enums as well as any related utility functions
189
191
"""
192
+
190
193
f .write ("""
191
194
#[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
192
195
#[non_exhaustive]
193
196
#[allow(non_camel_case_types)]
197
+ #[repr(u8)]
194
198
/// A value of the `Script` property
195
199
pub enum Script {
196
200
/// Unknown script
197
- Unknown,
201
+ Unknown = 0xFF,
202
+ /// Zyyy
203
+ Common = 0xFE,
204
+ /// Zinh,
205
+ Inherited = 0xFD,
198
206
""" )
199
- for script in script_list :
200
- f .write (" /// %s\n %s,\n " % (script , longforms [script ]))
201
- f .write ("""}
202
- #[derive(Clone, Copy, PartialEq, Eq, Debug, Hash)]
203
- #[non_exhaustive]
204
- /// A value for the `Script_Extension` property
205
- ///
206
- /// [`ScriptExtension`] is one or more [`Script`]
207
- ///
208
- /// This is essentially an optimized version of `Vec<Script>`,
209
- /// optimized by script sets and intersections actually present in Unicode.
210
- pub enum ScriptExtension {
211
- /// A single script
212
- Single(Script),
207
+ for (i , script ) in enumerate (script_list ):
208
+ f .write (" /// %s\n %s = %s,\n " % (script , longforms [script ], i ))
209
+ f .write ("}\n " )
210
+ f .write ("pub const NEXT_SCRIPT: u8 = %s;" % len (script_list ))
211
+ f .write ("""
212
+
213
+ pub mod script_extensions {
214
+ use crate::ScriptExtension;
215
+ pub const COMMON: ScriptExtension = ScriptExtension::new_common();
216
+ pub const INHERITED: ScriptExtension = ScriptExtension::new_inherited();
217
+ pub const UNKNOWN: ScriptExtension = ScriptExtension::new_unknown();
213
218
""" )
219
+ for (i , script ) in enumerate (script_list ):
220
+ first = 0
221
+ second = 0
222
+ third = 0
223
+ if i < 64 :
224
+ first = hex (1 << i ).replace ("L" , "" )
225
+ elif i < 128 :
226
+ second = hex (1 << (i - 64 )).replace ("L" , "" )
227
+ else :
228
+ third = hex (1 << (i - 128 )).replace ("L" , "" )
229
+ f .write (" /// %s\n pub const %s: ScriptExtension = ScriptExtension::new(%s, %s, %s);\n " %
230
+ (longforms [script ], longforms [script ].upper (), first , second , third ))
231
+ if script != longforms [script ]:
232
+ f .write (" /// %s\n pub const %s: ScriptExtension = %s;\n " %
233
+ (longforms [script ], script .upper (), longforms [script ].upper ()))
214
234
for ext in extension_list :
215
235
longform = ", " .join ([longforms [s ] for s in ext ])
216
- f .write (" /// %s\n %s,\n " % (longform , "" .join (ext )))
236
+ name = "_" .join ([s .upper () for s in ext ])
237
+ expr = ext [0 ].upper ()
238
+ for e in ext [1 :]:
239
+ expr = "%s.intersection(%s)" % (expr , e .upper ())
240
+ f .write (" /// %s\n pub const %s: ScriptExtension = %s;\n " % (longform , name , expr ))
217
241
f .write ("""}
218
242
219
243
impl Script {
220
244
#[inline]
221
245
pub(crate) fn inner_full_name(self) -> &'static str {
222
246
match self {
223
247
Script::Unknown => "Unknown",
248
+ Script::Common => "Common",
249
+ Script::Inherited => "Inherited",
224
250
""" )
225
251
for script in script_list :
226
252
f .write (" Script::%s => \" %s\" ,\n " % (longforms [script ], longforms [script ]))
@@ -231,119 +257,29 @@ def emit_enums(f, script_list, extension_list, longforms, intersections):
231
257
pub(crate) fn inner_short_name(self) -> &'static str {
232
258
match self {
233
259
Script::Unknown => "",
260
+ Script::Common => "Zyyy",
261
+ Script::Inherited => "Zinh",
234
262
""" )
235
263
for script in script_list :
236
264
f .write (" Script::%s => \" %s\" ,\n " % (longforms [script ], script ))
237
265
f .write (""" }
238
266
}
239
- }
240
-
241
- impl ScriptExtension {
242
- #[inline]
243
- #[cfg(feature = "with_std")]
244
- pub(crate) fn inner_scripts(self) -> Vec<Script> {
245
- match self {
246
- ScriptExtension::Single(s) => vec![s],
247
- """ )
248
- for ext in extension_list :
249
- scripts = ", " .join (["Script::%s" % longforms [s ] for s in ext ])
250
- f .write (" %s => vec![%s],\n " % (extension_name (ext ), scripts ))
251
- f .write (""" _ => unreachable!()
252
- }
253
- }
254
-
255
- #[inline]
256
- pub(crate) fn inner_contains_script(self, other: Script) -> bool {
257
- match self {
258
- ScriptExtension::Single(s) => s == other,
259
- """ )
260
- for ext in extension_list :
261
- scripts = " || " .join (["other == Script::%s" % longforms [s ] for s in ext ])
262
- f .write (" %s => %s,\n " % (extension_name (ext ), scripts ))
263
- f .write (""" }
264
- }
265
267
266
268
#[inline]
267
- pub(crate) fn inner_intersect(self, other: Self) -> Self {
268
- match (self, other) {
269
- (ScriptExtension::Single(Script::Unknown), _) |
270
- (_, ScriptExtension::Single(Script::Unknown)) => ScriptExtension::Single(Script::Unknown),
271
- (a, b) if a == b => a,
272
- (ScriptExtension::Single(Script::Common), a) |
273
- (ScriptExtension::Single(Script::Inherited), a) |
274
- (a, ScriptExtension::Single(Script::Common)) |
275
- (a, ScriptExtension::Single(Script::Inherited)) => a,
276
- (ScriptExtension::Single(s), o) | (o, ScriptExtension::Single(s)) if o.inner_contains_script(s) => ScriptExtension::Single(s),
269
+ pub(crate) fn for_integer(value: u8) -> Self {
270
+ match value {
277
271
""" )
278
- for (e1 , e2 , i ) in intersections :
279
- f .write (" (%s, %s) => %s,\n " % (extension_name ( e1 ), extension_name ( e2 ), extension_name ( i , longforms ) ))
280
- f .write (""" _ => ScriptExtension::Single(Script::Unknown ),
272
+ for (i , script ) in enumerate ( script_list ) :
273
+ f .write (" %s => Script:: %s,\n " % (i , longforms [ script ] ))
274
+ f .write (""" _ => unreachable!( ),
281
275
}
282
276
}
283
277
}
284
278
""" )
285
279
286
-
287
- def compute_intersections_elements (extension_list ):
288
- """
289
- Compute all intersections between the script extensions.
290
- This will add new elements to extension_list, be sure to call it first!
291
- """
292
-
293
- # This is the only third-level intersection
294
- # It's easier to hardcode things here rather than
295
- # do the below calculation in a loop
296
- extension_list .append (['Deva' , 'Knda' , 'Tirh' ])
297
- intersections = []
298
- # Some intersections will not exist in extension_list and we'll need to add them
299
- new_elements = []
300
- sets = [(e , set (e )) for e in extension_list ]
301
- for (e1 , s1 ) in sets :
302
- for (e2 , s2 ) in sets :
303
- if e1 == e2 :
304
- continue
305
- intersection = s1 .intersection (s2 )
306
- if len (intersection ) > 0 :
307
- intersection = [i for i in intersection ]
308
- intersection .sort ()
309
- if len (intersection ) > 1 and intersection not in extension_list and intersection not in new_elements :
310
- new_elements .append (intersection )
311
- if (e1 , e2 , intersection ) not in intersections :
312
- intersections .append ((e1 , e2 , intersection ))
313
- extension_list .extend (new_elements )
314
-
315
- # We now go through the newly added second-level extension values and calculate their intersections
316
- # with the original set and each other
317
- new_sets = [(e , set (e )) for e in new_elements ]
318
- sets = [(e , set (e )) for e in extension_list ]
319
- for (e1 , s1 ) in new_sets :
320
- for (e2 , s2 ) in sets :
321
- if e1 == e2 :
322
- continue
323
- intersection = s1 .intersection (s2 )
324
- if len (intersection ) > 0 :
325
- intersection = [i for i in intersection ]
326
- intersection .sort ()
327
- if len (intersection ) > 1 and intersection not in extension_list :
328
- raise "Found new third-level intersection, please hardcode it"
329
- # The previous routine would automatically get both versions
330
- # of an intersection because it would iterate each pair in both orders,
331
- # but here we're working on an asymmetric pair, so we insert both in order to not
332
- # miss anything
333
- if (e1 , e2 , intersection ) not in intersections :
334
- intersections .append ((e1 , e2 , intersection ))
335
- if (e2 , e1 , intersection ) not in intersections :
336
- intersections .append ((e2 , e1 , intersection ))
337
-
338
- intersections .sort ()
339
- return intersections
340
-
341
- def extension_name (ext , longforms = {}):
280
+ def extension_name (ext ):
342
281
"""Get the rust source for a given ScriptExtension"""
343
- if len (ext ) == 1 :
344
- return "ScriptExtension::Single(Script::%s)" % longforms [ext [0 ]]
345
- else :
346
- return "ScriptExtension::%s" % "" .join (ext )
282
+ return "script_extensions::%s" % "_" .join ([e .upper () for e in ext ])
347
283
348
284
349
285
@@ -370,8 +306,10 @@ def extension_name(ext, longforms={}):
370
306
script_list = []
371
307
372
308
for script in scripts :
373
- script_list .append (shortforms [script ])
309
+ if script not in ["Common" , "Unknown" , "Inherited" ]:
310
+ script_list .append (shortforms [script ])
374
311
script_table .extend ([(x , y , shortforms [script ]) for (x , y ) in scripts [script ]])
312
+ script_list .sort ()
375
313
script_table .sort (key = lambda w : w [0 ])
376
314
377
315
@@ -389,14 +327,13 @@ def extension_name(ext, longforms={}):
389
327
extension_table .extend ([(x , y , output_ext ) for (x , y ) in extensions [ext ]])
390
328
extension_table .sort (key = lambda w : w [0 ])
391
329
392
- intersections = compute_intersections_elements (extension_list )
393
330
394
- emit_enums (rf , script_list , extension_list , longforms , intersections )
331
+ emit_enums (rf , script_list , extension_list , longforms )
395
332
emit_search (rf )
396
333
397
334
emit_table (rf , "SCRIPTS" , script_table , t_type = "&'static [(char, char, Script)]" ,
398
335
is_pub = False , pfun = lambda x : "(%s,%s, Script::%s)" % (escape_char (x [0 ]), escape_char (x [1 ]), longforms [x [2 ]]))
399
336
emit_table (rf , "SCRIPT_EXTENSIONS" , extension_table , t_type = "&'static [(char, char, ScriptExtension)]" ,
400
- is_pub = False , pfun = lambda x : "(%s,%s,%s)" % (escape_char (x [0 ]), escape_char (x [1 ]), extension_name (x [2 ], longforms )))
337
+ is_pub = False , pfun = lambda x : "(%s,%s,%s)" % (escape_char (x [0 ]), escape_char (x [1 ]), extension_name (x [2 ])))
401
338
402
339
# emit_table(rf, "FOObar", properties)
0 commit comments