Skip to content

Commit dce3a34

Browse files
Add comments to handle_incb_consonant
1 parent 4e4a7c6 commit dce3a34

File tree

1 file changed

+26
-10
lines changed

1 file changed

+26
-10
lines changed

src/grapheme.rs

Lines changed: 26 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -462,10 +462,21 @@ impl GraphemeCursor {
462462
}
463463
}
464464

465+
/// For handling rule GB9c:
466+
///
467+
/// There's an `InCB=Consonant` after this, and we need to look back
468+
/// to verify whether there should be a break.
469+
///
470+
/// Seek backward to find an `InCB=Linker` preceded by an `InCB=Consonsnt`
471+
/// (potentially separated by some number of `InCB=Linker` or `InCB=Extend`).
472+
/// If we find the consonant in question, then there's no break; if we find a consonant
473+
/// with no linker, or a non-linker non-extend non-consonant, or the start of text, there's a break;
474+
/// otherwise we need more context
465475
#[inline]
466476
fn handle_incb_consonant(&mut self, chunk: &str, chunk_start: usize) {
467477
use crate::tables::{self, grapheme as gr};
468478

479+
// GB9c only applies to extended grapheme clusters
469480
if !self.is_extended {
470481
self.decide(true);
471482
return;
@@ -475,23 +486,28 @@ impl GraphemeCursor {
475486

476487
for ch in chunk.chars().rev() {
477488
if tables::is_incb_linker(ch) {
489+
// We found an InCB linker
478490
incb_linker_count += 1;
479491
self.incb_linker_count = Some(incb_linker_count);
480492
} else if tables::derived_property::InCB_Extend(ch) {
481-
// continue
493+
// We ignore InCB extends, continue
482494
} else {
495+
// Prev character is neither linker nor extend, break suppressed iff it's InCB=Consonant
483496
let result = !(self.incb_linker_count.unwrap_or(0) > 0
484497
&& self.grapheme_category(ch) == gr::GC_InCB_Consonant);
485498
self.decide(result);
486499
return;
487500
}
488501
}
502+
489503
if chunk_start == 0 {
504+
// Start of text and we still haven't found a consonant, so break
490505
self.decide(true);
491-
return;
506+
} else {
507+
// We need more context
508+
self.pre_context_offset = Some(chunk_start);
509+
self.state = GraphemeState::InCbConsonant;
492510
}
493-
self.pre_context_offset = Some(chunk_start);
494-
self.state = GraphemeState::InCbConsonant;
495511
}
496512

497513
#[inline]
@@ -509,10 +525,10 @@ impl GraphemeCursor {
509525
self.ris_count = Some(ris_count);
510526
if chunk_start == 0 {
511527
self.decide((ris_count % 2) == 0);
512-
return;
528+
} else {
529+
self.pre_context_offset = Some(chunk_start);
530+
self.state = GraphemeState::Regional;
513531
}
514-
self.pre_context_offset = Some(chunk_start);
515-
self.state = GraphemeState::Regional;
516532
}
517533

518534
#[inline]
@@ -540,10 +556,10 @@ impl GraphemeCursor {
540556
}
541557
if chunk_start == 0 {
542558
self.decide(true);
543-
return;
559+
} else {
560+
self.pre_context_offset = Some(chunk_start);
561+
self.state = GraphemeState::Emoji;
544562
}
545-
self.pre_context_offset = Some(chunk_start);
546-
self.state = GraphemeState::Emoji;
547563
}
548564

549565
#[inline]

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy