diff --git a/Cargo.toml b/Cargo.toml index bf237cf..981394e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,4 +28,12 @@ bencher = "0.1" [[bench]] name = "graphemes" +harness = false + +[[bench]] +name = "unicode_words" +harness = false + +[[bench]] +name = "word_bounds" harness = false \ No newline at end of file diff --git a/benches/unicode_words.rs b/benches/unicode_words.rs new file mode 100644 index 0000000..731e325 --- /dev/null +++ b/benches/unicode_words.rs @@ -0,0 +1,64 @@ +#[macro_use] +extern crate bencher; +extern crate unicode_segmentation; + +use bencher::Bencher; +use unicode_segmentation::UnicodeSegmentation; +use std::fs; + +fn unicode_words(bench: &mut Bencher, path: &str) { + let text = fs::read_to_string(path).unwrap(); + bench.iter(|| { + for w in text.unicode_words() { + bencher::black_box(w); + } + }); + + bench.bytes = text.len() as u64; +} + +fn unicode_words_arabic(bench: &mut Bencher) { + unicode_words(bench, "benches/texts/arabic.txt"); +} + +fn unicode_words_english(bench: &mut Bencher) { + unicode_words(bench, "benches/texts/english.txt"); +} + +fn unicode_words_hindi(bench: &mut Bencher) { + unicode_words(bench, "benches/texts/hindi.txt"); +} + +fn unicode_words_japanese(bench: &mut Bencher) { + unicode_words(bench, "benches/texts/japanese.txt"); +} + +fn unicode_words_korean(bench: &mut Bencher) { + unicode_words(bench, "benches/texts/korean.txt"); +} + +fn unicode_words_mandarin(bench: &mut Bencher) { + unicode_words(bench, "benches/texts/mandarin.txt"); +} + +fn unicode_words_russian(bench: &mut Bencher) { + unicode_words(bench, "benches/texts/russian.txt"); +} + +fn unicode_words_source_code(bench: &mut Bencher) { + unicode_words(bench, "benches/texts/source_code.txt"); +} + +benchmark_group!( + benches, + unicode_words_arabic, + unicode_words_english, + unicode_words_hindi, + unicode_words_japanese, + unicode_words_korean, + unicode_words_mandarin, + unicode_words_russian, + unicode_words_source_code, +); + +benchmark_main!(benches); diff --git a/benches/word_bounds.rs b/benches/word_bounds.rs new file mode 100644 index 0000000..035f57e --- /dev/null +++ b/benches/word_bounds.rs @@ -0,0 +1,64 @@ +#[macro_use] +extern crate bencher; +extern crate unicode_segmentation; + +use bencher::Bencher; +use unicode_segmentation::UnicodeSegmentation; +use std::fs; + +fn word_bounds(bench: &mut Bencher, path: &str) { + let text = fs::read_to_string(path).unwrap(); + bench.iter(|| { + for w in text.split_word_bounds() { + bencher::black_box(w); + } + }); + + bench.bytes = text.len() as u64; +} + +fn word_bounds_arabic(bench: &mut Bencher) { + word_bounds(bench, "benches/texts/arabic.txt"); +} + +fn word_bounds_english(bench: &mut Bencher) { + word_bounds(bench, "benches/texts/english.txt"); +} + +fn word_bounds_hindi(bench: &mut Bencher) { + word_bounds(bench, "benches/texts/hindi.txt"); +} + +fn word_bounds_japanese(bench: &mut Bencher) { + word_bounds(bench, "benches/texts/japanese.txt"); +} + +fn word_bounds_korean(bench: &mut Bencher) { + word_bounds(bench, "benches/texts/korean.txt"); +} + +fn word_bounds_mandarin(bench: &mut Bencher) { + word_bounds(bench, "benches/texts/mandarin.txt"); +} + +fn word_bounds_russian(bench: &mut Bencher) { + word_bounds(bench, "benches/texts/russian.txt"); +} + +fn word_bounds_source_code(bench: &mut Bencher) { + word_bounds(bench, "benches/texts/source_code.txt"); +} + +benchmark_group!( + benches, + word_bounds_arabic, + word_bounds_english, + word_bounds_hindi, + word_bounds_japanese, + word_bounds_korean, + word_bounds_mandarin, + word_bounds_russian, + word_bounds_source_code, +); + +benchmark_main!(benches); pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.

Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy