From c9aa6fa3aee39a0cf5697090a560b01183d7b942 Mon Sep 17 00:00:00 2001 From: Nathan Vegdahl Date: Fri, 14 Feb 2020 21:22:00 +0900 Subject: [PATCH 1/2] Implement a special-case lookup for ascii grapeheme categories. This speeds up processing even for many non-ascii texts, since they often still use ascii-range punctuation and whitespace. --- src/grapheme.rs | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/src/grapheme.rs b/src/grapheme.rs index b66536e..067de25 100644 --- a/src/grapheme.rs +++ b/src/grapheme.rs @@ -284,12 +284,30 @@ impl GraphemeCursor { fn grapheme_category(&mut self, ch: char) -> GraphemeCat { use tables::grapheme as gr; - // If this char isn't within the cached range, update the cache to the - // range that includes it. - if (ch as u32) < self.grapheme_cat_cache.0 || (ch as u32) > self.grapheme_cat_cache.1 { - self.grapheme_cat_cache = gr::grapheme_category(ch); + use tables::grapheme::GraphemeCat::*; + + if ch <= '\u{7e}' { + // Special-case optimization for ascii, except U+007F. This + // improves performance even for many primarily non-ascii texts, + // due to use of punctuation and white space characters from the + // ascii range. + if ch >= '\u{20}' { + GC_Any + } else if ch == '\u{a}' { + GC_LF + } else if ch == '\u{d}' { + GC_CR + } else { + GC_Control + } + } else { + // If this char isn't within the cached range, update the cache to the + // range that includes it. + if (ch as u32) < self.grapheme_cat_cache.0 || (ch as u32) > self.grapheme_cat_cache.1 { + self.grapheme_cat_cache = gr::grapheme_category(ch); + } + self.grapheme_cat_cache.2 } - self.grapheme_cat_cache.2 } // Not sure I'm gonna keep this, the advantage over new() seems thin. From 945dbb61c7db54c98c3282b8ec42ee0acd010ca5 Mon Sep 17 00:00:00 2001 From: Manish Goregaokar Date: Fri, 14 Feb 2020 12:44:14 -0800 Subject: [PATCH 2/2] Apply suggestions from code review --- src/grapheme.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/grapheme.rs b/src/grapheme.rs index 067de25..176a7aa 100644 --- a/src/grapheme.rs +++ b/src/grapheme.rs @@ -293,9 +293,9 @@ impl GraphemeCursor { // ascii range. if ch >= '\u{20}' { GC_Any - } else if ch == '\u{a}' { + } else if ch == '\n' { GC_LF - } else if ch == '\u{d}' { + } else if ch == '\r' { GC_CR } else { GC_Control pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy