From 5b31ba1aa877da5cace5af5b50970f89903972cf Mon Sep 17 00:00:00 2001 From: Lieselotte <52315535+she3py@users.noreply.github.com> Date: Sun, 13 Oct 2024 00:57:38 +0200 Subject: [PATCH] Expose all iterator constructors, add hyperlinks to Unicode glossary/technical reports --- src/decompose.rs | 2 +- src/lib.rs | 17 +++++++++++------ src/normalize.rs | 11 ++++++----- src/replace.rs | 15 ++++++++++++--- src/stream_safe.rs | 13 ++++++++++--- 5 files changed, 40 insertions(+), 18 deletions(-) diff --git a/src/decompose.rs b/src/decompose.rs index 2c73383..b11b1f5 100644 --- a/src/decompose.rs +++ b/src/decompose.rs @@ -53,7 +53,7 @@ impl> Decompositions { /// Create a new decomposition iterator for compatability decompositions (NFkD) /// - /// Note that this iterator can also be obtained by directly calling [`.nfd()`](crate::UnicodeNormalization::nfd) + /// Note that this iterator can also be obtained by directly calling [`.nfkd()`](crate::UnicodeNormalization::nfkd) /// on the iterator. #[inline] pub fn new_compatible(iter: I) -> Decompositions { diff --git a/src/lib.rs b/src/lib.rs index 6c5e029..963d41a 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -113,8 +113,8 @@ pub trait UnicodeNormalization> { /// (compatibility decomposition followed by canonical composition). fn nfkc(self) -> Recompositions; - /// A transformation which replaces CJK Compatibility Ideograph codepoints - /// with normal forms using Standardized Variation Sequences. This is not + /// A transformation which replaces [CJK Compatibility Ideograph] codepoints + /// with normal forms using [Standardized Variation Sequences]. This is not /// part of the canonical or compatibility decomposition algorithms, but /// performing it before those algorithms produces normalized output which /// better preserves the intent of the original text. @@ -123,10 +123,15 @@ pub trait UnicodeNormalization> { /// may not immediately help text display as intended, but they at /// least preserve the information in a standardized form, giving /// implementations the option to recognize them. + /// + /// [CJK Compatibility Ideograph]: https://www.unicode.org/glossary/#compatibility_ideograph + /// [Standardized Variation Sequences]: https://www.unicode.org/glossary/#standardized_variation_sequence fn cjk_compat_variants(self) -> Replacements; /// An Iterator over the string with Conjoining Grapheme Joiner characters - /// inserted according to the Stream-Safe Text Process (UAX15-D4) + /// inserted according to the Stream-Safe Text Process ([UAX15-D4]). + /// + /// [UAX15-D4]: https://www.unicode.org/reports/tr15/#UAX15-D4 fn stream_safe(self) -> StreamSafe; } @@ -153,7 +158,7 @@ impl<'a> UnicodeNormalization> for &'a str { #[inline] fn cjk_compat_variants(self) -> Replacements> { - replace::new_cjk_compat_variants(self.chars()) + Replacements::new_cjk_compat_variants(self.chars()) } #[inline] @@ -185,7 +190,7 @@ impl UnicodeNormalization> for char { #[inline] fn cjk_compat_variants(self) -> Replacements> { - replace::new_cjk_compat_variants(Some(self).into_iter()) + Replacements::new_cjk_compat_variants(Some(self).into_iter()) } #[inline] @@ -217,7 +222,7 @@ impl> UnicodeNormalization for I { #[inline] fn cjk_compat_variants(self) -> Replacements { - replace::new_cjk_compat_variants(self) + Replacements::new_cjk_compat_variants(self) } #[inline] diff --git a/src/normalize.rs b/src/normalize.rs index e59b667..96277f1 100644 --- a/src/normalize.rs +++ b/src/normalize.rs @@ -41,11 +41,12 @@ pub fn decompose_compatible(c: char, emit_char: F) { /// /// [Standardized Variation Sequences] are used instead of the standard canonical /// decompositions, notably for CJK codepoints with singleton canonical decompositions, -/// to avoid losing information. See the -/// [Unicode Variation Sequence FAQ](http://unicode.org/faq/vs.html) and the -/// "Other Enhancements" section of the -/// [Unicode 6.3 Release Summary](https://www.unicode.org/versions/Unicode6.3.0/#Summary) -/// for more information. +/// to avoid losing information. See the [Unicode Variation Sequence FAQ] and the +/// "Other Enhancements" section of the [Unicode 6.3 Release Summary] for more information. +/// +/// [Standardized Variation Sequences]: https://www.unicode.org/glossary/#standardized_variation_sequence +/// [Unicode Variation Sequence FAQ]: http://unicode.org/faq/vs.html +/// [Unicode 6.3 Release Summary]: https://www.unicode.org/versions/Unicode6.3.0/#Summary #[inline] pub fn decompose_cjk_compat_variants(c: char, mut emit_char: F) where diff --git a/src/replace.rs b/src/replace.rs index 3ab2a57..b21a669 100644 --- a/src/replace.rs +++ b/src/replace.rs @@ -22,9 +22,18 @@ pub struct Replacements { buffer: Option, } -#[inline] -pub fn new_cjk_compat_variants>(iter: I) -> Replacements { - Replacements { iter, buffer: None } +impl> Replacements { + /// Create a new iterator that replaces [CJK Compatibility Ideograph] codepoints with normal forms using [Standardized Variation Sequences]. + /// + /// Note that this iterator can also be obtained by directly calling [`.cjk_compat_variants()`] on the iterator. + /// + /// [CJK Compatibility Ideograph]: https://www.unicode.org/glossary/#compatibility_ideograph + /// [Standardized Variation Sequences]: https://www.unicode.org/glossary/#standardized_variation_sequence + /// [`.cjk_compat_variants()`]: crate::UnicodeNormalization::cjk_compat_variants + #[inline] + pub fn new_cjk_compat_variants(iter: I) -> Replacements { + Replacements { iter, buffer: None } + } } impl> Iterator for Replacements { diff --git a/src/stream_safe.rs b/src/stream_safe.rs index 86498d6..5e77bee 100644 --- a/src/stream_safe.rs +++ b/src/stream_safe.rs @@ -10,17 +10,24 @@ use crate::tables::stream_safe_leading_nonstarters; pub(crate) const MAX_NONSTARTERS: usize = 30; const COMBINING_GRAPHEME_JOINER: char = '\u{034F}'; -/// UAX15-D4: This iterator keeps track of how many non-starters there have been +/// [UAX15-D4]: This iterator keeps track of how many non-starters there have been /// since the last starter in *NFKD* and will emit a Combining Grapheme Joiner /// (U+034F) if the count exceeds 30. +/// +/// [UAX15-D4]: https://www.unicode.org/reports/tr15/#UAX15-D4 pub struct StreamSafe { iter: I, nonstarter_count: usize, buffer: Option, } -impl StreamSafe { - pub(crate) fn new(iter: I) -> Self { +impl> StreamSafe { + /// Create a new stream safe iterator. + /// + /// Note that this iterator can also be obtained by directly calling [`.stream_safe()`](crate::UnicodeNormalization::stream_safe) + /// on the iterator. + #[inline] + pub fn new(iter: I) -> Self { Self { iter, nonstarter_count: 0, pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy