Skip to content

Commit ba42eb6

Browse files
committed
Replace boolean intersects() with intersect(), which actually calculates an intersection
1 parent 66fb0d3 commit ba42eb6

File tree

3 files changed

+1485
-545
lines changed

3 files changed

+1485
-545
lines changed

scripts/unicode.py

Lines changed: 58 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -183,7 +183,7 @@ def emit_search(f):
183183
}
184184
""")
185185

186-
def emit_enums(f, script_list, extension_list, longforms):
186+
def emit_enums(f, script_list, extension_list, longforms, intersections):
187187
"""
188188
Emit the Script and ScriptExtension enums as well as any related utility functions
189189
"""
@@ -278,51 +278,81 @@ def emit_enums(f, script_list, extension_list, longforms):
278278
}
279279
280280
#[inline]
281-
pub(crate) fn inner_intersects(self, other: Self) -> bool {
281+
pub(crate) fn inner_intersect(self, other: Self) -> Self {
282282
match (self, other) {
283283
(ScriptExtension::Single(Script::Unknown), _) |
284-
(_, ScriptExtension::Single(Script::Unknown)) => false,
285-
(a, b) if a == b => true,
286-
(ScriptExtension::Single(Script::Common), _) |
287-
(ScriptExtension::Single(Script::Inherited), _) |
288-
(_, ScriptExtension::Single(Script::Common)) |
289-
(_, ScriptExtension::Single(Script::Inherited)) => true,
290-
(ScriptExtension::Single(s), o) | (o, ScriptExtension::Single(s)) => o.inner_contains_script(s),
284+
(_, ScriptExtension::Single(Script::Unknown)) => ScriptExtension::Single(Script::Unknown),
285+
(a, b) if a == b => a,
286+
(ScriptExtension::Single(Script::Common), a) |
287+
(ScriptExtension::Single(Script::Inherited), a) |
288+
(a, ScriptExtension::Single(Script::Common)) |
289+
(a, ScriptExtension::Single(Script::Inherited)) => a,
290+
(ScriptExtension::Single(s), o) | (o, ScriptExtension::Single(s)) if o.inner_contains_script(s) => ScriptExtension::Single(s),
291291
""")
292-
intersections = compute_intersections(extension_list)
293-
for (e1, e2) in intersections:
294-
f.write(" (%s, %s) => true,\n" % (extension_name(e1), extension_name(e2)))
295-
f.write(""" _ => false,
292+
for (e1, e2, i) in intersections:
293+
f.write(" (%s, %s) => %s,\n" % (extension_name(e1), extension_name(e2), extension_name(i, longforms)))
294+
f.write(""" _ => ScriptExtension::Single(Script::Unknown),
296295
}
297296
}
298297
}
299298
""")
300299

301300

302-
# We currently do NOT have an optimized method to compute
303-
# the actual intersection between two script extensions, we
304-
# only check if they *do* intersect
305-
#
306-
# To add such a method we'd need to do an extra pass where we compute any
307-
# new ScriptExtension enums we'll need from the intersections. It doesn't
308-
# seem worth it for now
309-
def compute_intersections(extension_list):
301+
def compute_intersections_elements(extension_list):
310302
"""
311-
Compute which pairs of elements intersect. This will return duplicate pairs with
312-
the elements swapped, but that's fine.
303+
Compute all intersections between the script extensions.
304+
This will add new elements to extension_list, be sure to call it first!
313305
"""
306+
307+
# This is the only third-level intersection
308+
# It's easier to hardcode things here rather than
309+
# do the below calculation in a loop
310+
extension_list.append(['Deva', 'Knda', 'Tirh'])
314311
intersections = []
312+
# Some intersections will not exist in extension_list and we'll need to add them
313+
new_elements = []
315314
sets = [(e, set(e)) for e in extension_list]
316315
for (e1, s1) in sets:
317316
for (e2, s2) in sets:
318317
if e1 == e2:
319318
continue
320319
intersection = s1.intersection(s2)
321320
if len(intersection) > 0:
322-
intersections.append((e1, e2))
321+
intersection = [i for i in intersection]
322+
intersection.sort()
323+
if len(intersection) > 1 and intersection not in extension_list and intersection not in new_elements:
324+
new_elements.append(intersection)
325+
if (e1, e2, intersection) not in intersections:
326+
intersections.append((e1, e2, intersection))
327+
extension_list.extend(new_elements)
328+
329+
# We now go through the newly added second-level extension values and calculate their intersections
330+
# with the original set and each other
331+
new_sets = [(e, set(e)) for e in new_elements]
332+
sets = [(e, set(e)) for e in extension_list]
333+
for (e1, s1) in new_sets:
334+
for (e2, s2) in sets:
335+
if e1 == e2:
336+
continue
337+
intersection = s1.intersection(s2)
338+
if len(intersection) > 0:
339+
intersection = [i for i in intersection]
340+
intersection.sort()
341+
if len(intersection) > 1 and intersection not in extension_list:
342+
raise "Found new third-level intersection, please hardcode it"
343+
# The previous routine would automatically get both versions
344+
# of an intersection because it would iterate each pair in both orders,
345+
# but here we're working on an asymmetric pair, so we insert both in order to not
346+
# miss anything
347+
if (e1, e2, intersection) not in intersections:
348+
intersections.append((e1, e2, intersection))
349+
if (e2, e1, intersection) not in intersections:
350+
intersections.append((e2, e1, intersection))
351+
352+
intersections.sort()
323353
return intersections
324354

325-
def extension_name(ext, longforms=[]):
355+
def extension_name(ext, longforms={}):
326356
"""Get the rust source for a given ScriptExtension"""
327357
if len(ext) == 1:
328358
return "ScriptExtension::Single(Script::%s)" % longforms[ext[0]]
@@ -373,7 +403,9 @@ def extension_name(ext, longforms=[]):
373403
extension_table.extend([(x, y, output_ext) for (x, y) in extensions[ext]])
374404
extension_table.sort(key=lambda w: w[0])
375405

376-
emit_enums(rf, script_list, extension_list, longforms)
406+
intersections = compute_intersections_elements(extension_list)
407+
408+
emit_enums(rf, script_list, extension_list, longforms, intersections)
377409
emit_search(rf)
378410

379411
emit_table(rf, "SCRIPTS", script_table, t_type = "&'static [(char, char, Script)]",

src/lib.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -40,15 +40,13 @@ impl ScriptExtension {
4040
self.inner_contains_script(script)
4141
}
4242

43-
/// Check if this ScriptExtension has any intersection with another
44-
/// ScriptExtension
43+
/// Find the intersection between two ScriptExtensions. Returns Unknown if things
44+
/// do not intersect.
4545
///
4646
/// "Common" (`Zyyy`) and "Inherited" (`Zinh`) are considered as intersecting
4747
/// everything.
48-
///
49-
/// "Unknown" intersects nothing
50-
pub fn intersects(self, other: Self) -> bool {
51-
self.inner_intersects(other)
48+
pub fn intersect(self, other: Self) -> Self {
49+
self.inner_intersect(other)
5250
}
5351
}
5452

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy