diff --git a/Cargo.toml b/Cargo.toml index bf237cf..981394e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -28,4 +28,12 @@ bencher = "0.1" [[bench]] name = "graphemes" +harness = false + +[[bench]] +name = "unicode_words" +harness = false + +[[bench]] +name = "word_bounds" harness = false \ No newline at end of file diff --git a/benches/unicode_words.rs b/benches/unicode_words.rs new file mode 100644 index 0000000..731e325 --- /dev/null +++ b/benches/unicode_words.rs @@ -0,0 +1,64 @@ +#[macro_use] +extern crate bencher; +extern crate unicode_segmentation; + +use bencher::Bencher; +use unicode_segmentation::UnicodeSegmentation; +use std::fs; + +fn unicode_words(bench: &mut Bencher, path: &str) { + let text = fs::read_to_string(path).unwrap(); + bench.iter(|| { + for w in text.unicode_words() { + bencher::black_box(w); + } + }); + + bench.bytes = text.len() as u64; +} + +fn unicode_words_arabic(bench: &mut Bencher) { + unicode_words(bench, "benches/texts/arabic.txt"); +} + +fn unicode_words_english(bench: &mut Bencher) { + unicode_words(bench, "benches/texts/english.txt"); +} + +fn unicode_words_hindi(bench: &mut Bencher) { + unicode_words(bench, "benches/texts/hindi.txt"); +} + +fn unicode_words_japanese(bench: &mut Bencher) { + unicode_words(bench, "benches/texts/japanese.txt"); +} + +fn unicode_words_korean(bench: &mut Bencher) { + unicode_words(bench, "benches/texts/korean.txt"); +} + +fn unicode_words_mandarin(bench: &mut Bencher) { + unicode_words(bench, "benches/texts/mandarin.txt"); +} + +fn unicode_words_russian(bench: &mut Bencher) { + unicode_words(bench, "benches/texts/russian.txt"); +} + +fn unicode_words_source_code(bench: &mut Bencher) { + unicode_words(bench, "benches/texts/source_code.txt"); +} + +benchmark_group!( + benches, + unicode_words_arabic, + unicode_words_english, + unicode_words_hindi, + unicode_words_japanese, + unicode_words_korean, + unicode_words_mandarin, + unicode_words_russian, + unicode_words_source_code, +); + +benchmark_main!(benches); diff --git a/benches/word_bounds.rs b/benches/word_bounds.rs new file mode 100644 index 0000000..035f57e --- /dev/null +++ b/benches/word_bounds.rs @@ -0,0 +1,64 @@ +#[macro_use] +extern crate bencher; +extern crate unicode_segmentation; + +use bencher::Bencher; +use unicode_segmentation::UnicodeSegmentation; +use std::fs; + +fn word_bounds(bench: &mut Bencher, path: &str) { + let text = fs::read_to_string(path).unwrap(); + bench.iter(|| { + for w in text.split_word_bounds() { + bencher::black_box(w); + } + }); + + bench.bytes = text.len() as u64; +} + +fn word_bounds_arabic(bench: &mut Bencher) { + word_bounds(bench, "benches/texts/arabic.txt"); +} + +fn word_bounds_english(bench: &mut Bencher) { + word_bounds(bench, "benches/texts/english.txt"); +} + +fn word_bounds_hindi(bench: &mut Bencher) { + word_bounds(bench, "benches/texts/hindi.txt"); +} + +fn word_bounds_japanese(bench: &mut Bencher) { + word_bounds(bench, "benches/texts/japanese.txt"); +} + +fn word_bounds_korean(bench: &mut Bencher) { + word_bounds(bench, "benches/texts/korean.txt"); +} + +fn word_bounds_mandarin(bench: &mut Bencher) { + word_bounds(bench, "benches/texts/mandarin.txt"); +} + +fn word_bounds_russian(bench: &mut Bencher) { + word_bounds(bench, "benches/texts/russian.txt"); +} + +fn word_bounds_source_code(bench: &mut Bencher) { + word_bounds(bench, "benches/texts/source_code.txt"); +} + +benchmark_group!( + benches, + word_bounds_arabic, + word_bounds_english, + word_bounds_hindi, + word_bounds_japanese, + word_bounds_korean, + word_bounds_mandarin, + word_bounds_russian, + word_bounds_source_code, +); + +benchmark_main!(benches);
Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.
Alternative Proxies: