Skip to content

Commit 58d73ac

Browse files
authored
Merge pull request #97 from timClicks/add-word-benches
Benchmark other methods mentioned in README
2 parents 12fc8d9 + 52dba3b commit 58d73ac

File tree

3 files changed

+136
-0
lines changed

3 files changed

+136
-0
lines changed

Cargo.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,12 @@ criterion = "0.3"
2828

2929
[[bench]]
3030
name = "graphemes"
31+
harness = false
32+
33+
[[bench]]
34+
name = "unicode_words"
35+
harness = false
36+
37+
[[bench]]
38+
name = "word_bounds"
3139
harness = false

benches/unicode_words.rs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#[macro_use]
2+
extern crate bencher;
3+
extern crate unicode_segmentation;
4+
5+
use bencher::Bencher;
6+
use unicode_segmentation::UnicodeSegmentation;
7+
use std::fs;
8+
9+
fn unicode_words(bench: &mut Bencher, path: &str) {
10+
let text = fs::read_to_string(path).unwrap();
11+
bench.iter(|| {
12+
for w in text.unicode_words() {
13+
bencher::black_box(w);
14+
}
15+
});
16+
17+
bench.bytes = text.len() as u64;
18+
}
19+
20+
fn unicode_words_arabic(bench: &mut Bencher) {
21+
unicode_words(bench, "benches/texts/arabic.txt");
22+
}
23+
24+
fn unicode_words_english(bench: &mut Bencher) {
25+
unicode_words(bench, "benches/texts/english.txt");
26+
}
27+
28+
fn unicode_words_hindi(bench: &mut Bencher) {
29+
unicode_words(bench, "benches/texts/hindi.txt");
30+
}
31+
32+
fn unicode_words_japanese(bench: &mut Bencher) {
33+
unicode_words(bench, "benches/texts/japanese.txt");
34+
}
35+
36+
fn unicode_words_korean(bench: &mut Bencher) {
37+
unicode_words(bench, "benches/texts/korean.txt");
38+
}
39+
40+
fn unicode_words_mandarin(bench: &mut Bencher) {
41+
unicode_words(bench, "benches/texts/mandarin.txt");
42+
}
43+
44+
fn unicode_words_russian(bench: &mut Bencher) {
45+
unicode_words(bench, "benches/texts/russian.txt");
46+
}
47+
48+
fn unicode_words_source_code(bench: &mut Bencher) {
49+
unicode_words(bench, "benches/texts/source_code.txt");
50+
}
51+
52+
benchmark_group!(
53+
benches,
54+
unicode_words_arabic,
55+
unicode_words_english,
56+
unicode_words_hindi,
57+
unicode_words_japanese,
58+
unicode_words_korean,
59+
unicode_words_mandarin,
60+
unicode_words_russian,
61+
unicode_words_source_code,
62+
);
63+
64+
benchmark_main!(benches);

benches/word_bounds.rs

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
#[macro_use]
2+
extern crate bencher;
3+
extern crate unicode_segmentation;
4+
5+
use bencher::Bencher;
6+
use unicode_segmentation::UnicodeSegmentation;
7+
use std::fs;
8+
9+
fn word_bounds(bench: &mut Bencher, path: &str) {
10+
let text = fs::read_to_string(path).unwrap();
11+
bench.iter(|| {
12+
for w in text.split_word_bounds() {
13+
bencher::black_box(w);
14+
}
15+
});
16+
17+
bench.bytes = text.len() as u64;
18+
}
19+
20+
fn word_bounds_arabic(bench: &mut Bencher) {
21+
word_bounds(bench, "benches/texts/arabic.txt");
22+
}
23+
24+
fn word_bounds_english(bench: &mut Bencher) {
25+
word_bounds(bench, "benches/texts/english.txt");
26+
}
27+
28+
fn word_bounds_hindi(bench: &mut Bencher) {
29+
word_bounds(bench, "benches/texts/hindi.txt");
30+
}
31+
32+
fn word_bounds_japanese(bench: &mut Bencher) {
33+
word_bounds(bench, "benches/texts/japanese.txt");
34+
}
35+
36+
fn word_bounds_korean(bench: &mut Bencher) {
37+
word_bounds(bench, "benches/texts/korean.txt");
38+
}
39+
40+
fn word_bounds_mandarin(bench: &mut Bencher) {
41+
word_bounds(bench, "benches/texts/mandarin.txt");
42+
}
43+
44+
fn word_bounds_russian(bench: &mut Bencher) {
45+
word_bounds(bench, "benches/texts/russian.txt");
46+
}
47+
48+
fn word_bounds_source_code(bench: &mut Bencher) {
49+
word_bounds(bench, "benches/texts/source_code.txt");
50+
}
51+
52+
benchmark_group!(
53+
benches,
54+
word_bounds_arabic,
55+
word_bounds_english,
56+
word_bounds_hindi,
57+
word_bounds_japanese,
58+
word_bounds_korean,
59+
word_bounds_mandarin,
60+
word_bounds_russian,
61+
word_bounds_source_code,
62+
);
63+
64+
benchmark_main!(benches);

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy