From a08c4e552bfe4048cf55cb68621080697924a166 Mon Sep 17 00:00:00 2001 From: Tim McNamara Date: Wed, 2 Jun 2021 15:41:44 +1200 Subject: [PATCH 1/3] Replace bencher with criterion Enable performance improvements to be tracked over time more easily. --- Cargo.toml | 2 +- benches/graphemes.rs | 59 ++++++++++++++++++++++---------------------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index bf237cf..ae5116b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,7 +24,7 @@ no_std = [] # This is a no-op, preserved for backward compatibility only. [dev-dependencies] quickcheck = "0.7" -bencher = "0.1" +criterion = "0.3" [[bench]] name = "graphemes" diff --git a/benches/graphemes.rs b/benches/graphemes.rs index 5f14352..1641cfe 100644 --- a/benches/graphemes.rs +++ b/benches/graphemes.rs @@ -1,55 +1,54 @@ -#[macro_use] -extern crate bencher; -extern crate unicode_segmentation; +use criterion::{black_box, criterion_group, criterion_main, Criterion}; +use unicode_segmentation; -use bencher::Bencher; -use unicode_segmentation::UnicodeSegmentation; use std::fs; +use unicode_segmentation::UnicodeSegmentation; -fn graphemes(bench: &mut Bencher, path: &str) { +fn graphemes(c: &mut Criterion, lang: &str, path: &str) { let text = fs::read_to_string(path).unwrap(); - bench.iter(|| { - for g in UnicodeSegmentation::graphemes(&*text, true) { - bencher::black_box(g); - } - }); - bench.bytes = text.len() as u64; + c.bench_function(&format!("grapheme {}",lang), |bench| { + bench.iter(|| { + for g in UnicodeSegmentation::graphemes(black_box(&*text), true) { + black_box(g); + } + }) + }); } -fn graphemes_arabic(bench: &mut Bencher) { - graphemes(bench, "benches/texts/arabic.txt"); +fn graphemes_arabic(c: &mut Criterion) { + graphemes(c, "arabic" ,"benches/texts/arabic.txt"); } -fn graphemes_english(bench: &mut Bencher) { - graphemes(bench, "benches/texts/english.txt"); +fn graphemes_english(c: &mut Criterion) { + graphemes(c, "english" ,"benches/texts/english.txt"); } -fn graphemes_hindi(bench: &mut Bencher) { - graphemes(bench, "benches/texts/hindi.txt"); +fn graphemes_hindi(c: &mut Criterion) { + graphemes(c, "hindi" ,"benches/texts/hindi.txt"); } -fn graphemes_japanese(bench: &mut Bencher) { - graphemes(bench, "benches/texts/japanese.txt"); +fn graphemes_japanese(c: &mut Criterion) { + graphemes(c, "japanese" ,"benches/texts/japanese.txt"); } -fn graphemes_korean(bench: &mut Bencher) { - graphemes(bench, "benches/texts/korean.txt"); +fn graphemes_korean(c: &mut Criterion) { + graphemes(c, "korean" ,"benches/texts/korean.txt"); } -fn graphemes_mandarin(bench: &mut Bencher) { - graphemes(bench, "benches/texts/mandarin.txt"); +fn graphemes_mandarin(c: &mut Criterion) { + graphemes(c, "mandarin" ,"benches/texts/mandarin.txt"); } -fn graphemes_russian(bench: &mut Bencher) { - graphemes(bench, "benches/texts/russian.txt"); +fn graphemes_russian(c: &mut Criterion) { + graphemes(c, "russian" ,"benches/texts/russian.txt"); } -fn graphemes_source_code(bench: &mut Bencher) { - graphemes(bench, "benches/texts/source_code.txt"); +fn graphemes_source_code(c: &mut Criterion) { + graphemes(c, "source_code","benches/texts/source_code.txt"); } -benchmark_group!( +criterion_group!( benches, graphemes_arabic, graphemes_english, @@ -61,4 +60,4 @@ benchmark_group!( graphemes_source_code, ); -benchmark_main!(benches); +criterion_main!(benches); From 86509eeeaa712e164072d959828ef22cb565b0f2 Mon Sep 17 00:00:00 2001 From: Tim McNamara Date: Wed, 2 Jun 2021 16:09:16 +1200 Subject: [PATCH 2/3] Increase inlining --- src/grapheme.rs | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/grapheme.rs b/src/grapheme.rs index ef9e1a1..7fb5cc0 100644 --- a/src/grapheme.rs +++ b/src/grapheme.rs @@ -228,6 +228,7 @@ enum PairResult { Emoji, // a break if preceded by emoji base and (Extend)* } +#[inline] fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult { use crate::tables::grapheme::GraphemeCat::*; use self::PairResult::*; @@ -407,6 +408,7 @@ impl GraphemeCursor { } } + #[inline] fn decide(&mut self, is_break: bool) { self.state = if is_break { GraphemeState::Break @@ -415,11 +417,13 @@ impl GraphemeCursor { }; } + #[inline] fn decision(&mut self, is_break: bool) -> Result { self.decide(is_break); Ok(is_break) } + #[inline] fn is_boundary_result(&self) -> Result { if self.state == GraphemeState::Break { Ok(true) @@ -432,6 +436,7 @@ impl GraphemeCursor { } } + #[inline] fn handle_regional(&mut self, chunk: &str, chunk_start: usize) { use crate::tables::grapheme as gr; let mut ris_count = self.ris_count.unwrap_or(0); @@ -452,6 +457,7 @@ impl GraphemeCursor { self.state = GraphemeState::Regional; } + #[inline] fn handle_emoji(&mut self, chunk: &str, chunk_start: usize) { use crate::tables::grapheme as gr; let mut iter = chunk.chars().rev(); @@ -482,6 +488,7 @@ impl GraphemeCursor { self.state = GraphemeState::Emoji; } + #[inline] /// Determine whether the current cursor location is a grapheme cluster boundary. /// Only a part of the string need be supplied. If `chunk_start` is nonzero or /// the length of `chunk` is not equal to `len` on creation, then this method @@ -563,6 +570,7 @@ impl GraphemeCursor { } } + #[inline] /// Find the next boundary after the current cursor position. Only a part of /// the string need be supplied. If the chunk is incomplete, then this /// method might return `GraphemeIncomplete::PreContext` or From 9310f0f073c050b0c16aa72de3c75da168d918cd Mon Sep 17 00:00:00 2001 From: Tim McNamara Date: Wed, 2 Jun 2021 16:14:33 +1200 Subject: [PATCH 3/3] Make bench output same as function names --- benches/graphemes.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/benches/graphemes.rs b/benches/graphemes.rs index 1641cfe..8a7a379 100644 --- a/benches/graphemes.rs +++ b/benches/graphemes.rs @@ -7,7 +7,7 @@ use unicode_segmentation::UnicodeSegmentation; fn graphemes(c: &mut Criterion, lang: &str, path: &str) { let text = fs::read_to_string(path).unwrap(); - c.bench_function(&format!("grapheme {}",lang), |bench| { + c.bench_function(&format!("graphemes_{}",lang), |bench| { bench.iter(|| { for g in UnicodeSegmentation::graphemes(black_box(&*text), true) { black_box(g); pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy