From bb6f52cf2ac25dadcd1448b0e9a6547b6c58464e Mon Sep 17 00:00:00 2001 From: Yorwba Date: Sun, 4 Oct 2020 19:47:33 +0200 Subject: [PATCH 1/2] Swap docs of unicode_sentences and split_sentence_bounds Seems like those two functions got mixed up when the documentation was written. `unicode_sentences` is the one that filters for alphanumeric characters. --- src/lib.rs | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 8d1c99f..fdec8e9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -177,12 +177,6 @@ pub trait UnicodeSegmentation { /// ``` fn split_word_bound_indices<'a>(&'a self) -> UWordBoundIndices<'a>; - /// Returns an iterator over substrings of `self` separated on - /// [UAX#29 sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries). - /// - /// The concatenation of the substrings returned by this function is just the original string. - fn unicode_sentences<'a>(&'a self) -> UnicodeSentences<'a>; - /// Returns an iterator over substrings of `self` separated on /// [UAX#29 sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries). /// @@ -192,6 +186,12 @@ pub trait UnicodeSegmentation { /// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic) /// property, or with /// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values). + fn unicode_sentences<'a>(&'a self) -> UnicodeSentences<'a>; + + /// Returns an iterator over substrings of `self` separated on + /// [UAX#29 sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries). + /// + /// The concatenation of the substrings returned by this function is just the original string. fn split_sentence_bounds<'a>(&'a self) -> USentenceBounds<'a>; /// Returns an iterator over substrings of `self`, split on UAX#29 sentence boundaries, From bf55e02707a2c7dc20ab226bdaccccd4815ee2a5 Mon Sep 17 00:00:00 2001 From: Yorwba Date: Sun, 4 Oct 2020 20:12:44 +0200 Subject: [PATCH 2/2] Add an example for sentence segmentation --- src/lib.rs | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/src/lib.rs b/src/lib.rs index fdec8e9..f2f2962 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -186,16 +186,50 @@ pub trait UnicodeSegmentation { /// [Alphabetic](http://unicode.org/reports/tr44/#Alphabetic) /// property, or with /// [General_Category=Number](http://unicode.org/reports/tr44/#General_Category_Values). + /// + /// # Example + /// + /// ``` + /// # use self::unicode_segmentation::UnicodeSegmentation; + /// let uss = "Mr. Fox jumped. [...] The dog was too lazy."; + /// let us1 = uss.unicode_sentences().collect::>(); + /// let b: &[_] = &["Mr. ", "Fox jumped. ", "The dog was too lazy."]; + /// + /// assert_eq!(&us1[..], b); + /// ``` fn unicode_sentences<'a>(&'a self) -> UnicodeSentences<'a>; /// Returns an iterator over substrings of `self` separated on /// [UAX#29 sentence boundaries](http://www.unicode.org/reports/tr29/#Sentence_Boundaries). /// /// The concatenation of the substrings returned by this function is just the original string. + /// + /// # Example + /// + /// ``` + /// # use self::unicode_segmentation::UnicodeSegmentation; + /// let ssbs = "Mr. Fox jumped. [...] The dog was too lazy."; + /// let ssb1 = ssbs.split_sentence_bounds().collect::>(); + /// let b: &[_] = &["Mr. ", "Fox jumped. ", "[...] ", "The dog was too lazy."]; + /// + /// assert_eq!(&ssb1[..], b); + /// ``` fn split_sentence_bounds<'a>(&'a self) -> USentenceBounds<'a>; /// Returns an iterator over substrings of `self`, split on UAX#29 sentence boundaries, /// and their offsets. See `split_sentence_bounds()` for more information. + /// + /// # Example + /// + /// ``` + /// # use self::unicode_segmentation::UnicodeSegmentation; + /// let ssis = "Mr. Fox jumped. [...] The dog was too lazy."; + /// let ssi1 = ssis.split_sentence_bound_indices().collect::>(); + /// let b: &[_] = &[(0, "Mr. "), (4, "Fox jumped. "), (16, "[...] "), + /// (22, "The dog was too lazy.")]; + /// + /// assert_eq!(&ssi1[..], b); + /// ``` fn split_sentence_bound_indices<'a>(&'a self) -> USentenceBoundIndices<'a>; } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy