Skip to content

Commit bf8599b

Browse files
authored
Merge pull request #104 from unicode-rs/fmt
Run rustfmt
2 parents 122cd59 + deaf747 commit bf8599b

File tree

10 files changed

+5381
-2093
lines changed

10 files changed

+5381
-2093
lines changed

.github/workflows/rust.yml

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,13 @@ jobs:
2020
run: cargo build --verbose
2121
- name: Run tests
2222
run: cargo test --verbose
23+
fmt:
24+
25+
runs-on: ubuntu-latest
26+
27+
steps:
28+
- uses: actions/checkout@v2
29+
- name: Rustfmt
30+
run: cargo fmt --check
2331
- name: Verify regenerated files
24-
run: ./scripts/unicode.py && diff tables.rs src/tables.rs
32+
run: ./scripts/unicode.py && diff tables.rs src/tables.rs

benches/graphemes.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ use unicode_segmentation::UnicodeSegmentation;
77
fn graphemes(c: &mut Criterion, lang: &str, path: &str) {
88
let text = fs::read_to_string(path).unwrap();
99

10-
c.bench_function(&format!("graphemes_{}",lang), |bench| {
10+
c.bench_function(&format!("graphemes_{}", lang), |bench| {
1111
bench.iter(|| {
1212
for g in UnicodeSegmentation::graphemes(black_box(&*text), true) {
1313
black_box(g);
@@ -17,35 +17,35 @@ fn graphemes(c: &mut Criterion, lang: &str, path: &str) {
1717
}
1818

1919
fn graphemes_arabic(c: &mut Criterion) {
20-
graphemes(c, "arabic" ,"benches/texts/arabic.txt");
20+
graphemes(c, "arabic", "benches/texts/arabic.txt");
2121
}
2222

2323
fn graphemes_english(c: &mut Criterion) {
24-
graphemes(c, "english" ,"benches/texts/english.txt");
24+
graphemes(c, "english", "benches/texts/english.txt");
2525
}
2626

2727
fn graphemes_hindi(c: &mut Criterion) {
28-
graphemes(c, "hindi" ,"benches/texts/hindi.txt");
28+
graphemes(c, "hindi", "benches/texts/hindi.txt");
2929
}
3030

3131
fn graphemes_japanese(c: &mut Criterion) {
32-
graphemes(c, "japanese" ,"benches/texts/japanese.txt");
32+
graphemes(c, "japanese", "benches/texts/japanese.txt");
3333
}
3434

3535
fn graphemes_korean(c: &mut Criterion) {
36-
graphemes(c, "korean" ,"benches/texts/korean.txt");
36+
graphemes(c, "korean", "benches/texts/korean.txt");
3737
}
3838

3939
fn graphemes_mandarin(c: &mut Criterion) {
40-
graphemes(c, "mandarin" ,"benches/texts/mandarin.txt");
40+
graphemes(c, "mandarin", "benches/texts/mandarin.txt");
4141
}
4242

4343
fn graphemes_russian(c: &mut Criterion) {
44-
graphemes(c, "russian" ,"benches/texts/russian.txt");
44+
graphemes(c, "russian", "benches/texts/russian.txt");
4545
}
4646

4747
fn graphemes_source_code(c: &mut Criterion) {
48-
graphemes(c, "source_code","benches/texts/source_code.txt");
48+
graphemes(c, "source_code", "benches/texts/source_code.txt");
4949
}
5050

5151
criterion_group!(

benches/unicode_words.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ extern crate bencher;
33
extern crate unicode_segmentation;
44

55
use bencher::Bencher;
6-
use unicode_segmentation::UnicodeSegmentation;
76
use std::fs;
7+
use unicode_segmentation::UnicodeSegmentation;
88

99
fn unicode_words(bench: &mut Bencher, path: &str) {
1010
let text = fs::read_to_string(path).unwrap();

benches/word_bounds.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@ extern crate bencher;
33
extern crate unicode_segmentation;
44

55
use bencher::Bencher;
6-
use unicode_segmentation::UnicodeSegmentation;
76
use std::fs;
7+
use unicode_segmentation::UnicodeSegmentation;
88

99
fn word_bounds(bench: &mut Bencher, path: &str) {
1010
let text = fs::read_to_string(path).unwrap();

src/grapheme.rs

Lines changed: 92 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ impl<'a> Iterator for GraphemeIndices<'a> {
4949

5050
#[inline]
5151
fn next(&mut self) -> Option<(usize, &'a str)> {
52-
self.iter.next().map(|s| (s.as_ptr() as usize - self.start_offset, s))
52+
self.iter
53+
.next()
54+
.map(|s| (s.as_ptr() as usize - self.start_offset, s))
5355
}
5456

5557
#[inline]
@@ -61,7 +63,9 @@ impl<'a> Iterator for GraphemeIndices<'a> {
6163
impl<'a> DoubleEndedIterator for GraphemeIndices<'a> {
6264
#[inline]
6365
fn next_back(&mut self) -> Option<(usize, &'a str)> {
64-
self.iter.next_back().map(|s| (s.as_ptr() as usize - self.start_offset, s))
66+
self.iter
67+
.next_back()
68+
.map(|s| (s.as_ptr() as usize - self.start_offset, s))
6569
}
6670
}
6771

@@ -126,7 +130,11 @@ impl<'a> DoubleEndedIterator for Graphemes<'a> {
126130
if end == self.cursor.cur_cursor() {
127131
return None;
128132
}
129-
let prev = self.cursor_back.prev_boundary(self.string, 0).unwrap().unwrap();
133+
let prev = self
134+
.cursor_back
135+
.prev_boundary(self.string, 0)
136+
.unwrap()
137+
.unwrap();
130138
Some(&self.string[prev..end])
131139
}
132140
}
@@ -143,7 +151,10 @@ pub fn new_graphemes<'b>(s: &'b str, is_extended: bool) -> Graphemes<'b> {
143151

144152
#[inline]
145153
pub fn new_grapheme_indices<'b>(s: &'b str, is_extended: bool) -> GraphemeIndices<'b> {
146-
GraphemeIndices { start_offset: s.as_ptr() as usize, iter: new_graphemes(s, is_extended) }
154+
GraphemeIndices {
155+
start_offset: s.as_ptr() as usize,
156+
iter: new_graphemes(s, is_extended),
157+
}
147158
}
148159

149160
// maybe unify with PairResult?
@@ -215,7 +226,7 @@ pub enum GraphemeIncomplete {
215226
/// current chunk, so the chunk after that is requested. This will only be
216227
/// returned if the chunk ends before the `len` parameter provided on
217228
/// creation of the cursor.
218-
NextChunk, // requesting chunk following the one given
229+
NextChunk, // requesting chunk following the one given
219230

220231
/// An error returned when the chunk given does not contain the cursor position.
221232
InvalidOffset,
@@ -224,42 +235,42 @@ pub enum GraphemeIncomplete {
224235
// An enum describing the result from lookup of a pair of categories.
225236
#[derive(PartialEq, Eq)]
226237
enum PairResult {
227-
NotBreak, // definitely not a break
228-
Break, // definitely a break
229-
Extended, // a break iff not in extended mode
230-
Regional, // a break if preceded by an even number of RIS
231-
Emoji, // a break if preceded by emoji base and (Extend)*
238+
NotBreak, // definitely not a break
239+
Break, // definitely a break
240+
Extended, // a break iff not in extended mode
241+
Regional, // a break if preceded by an even number of RIS
242+
Emoji, // a break if preceded by emoji base and (Extend)*
232243
}
233244

234245
#[inline]
235246
fn check_pair(before: GraphemeCat, after: GraphemeCat) -> PairResult {
236-
use crate::tables::grapheme::GraphemeCat::*;
237247
use self::PairResult::*;
248+
use crate::tables::grapheme::GraphemeCat::*;
238249
match (before, after) {
239-
(GC_CR, GC_LF) => NotBreak, // GB3
240-
(GC_Control, _) => Break, // GB4
241-
(GC_CR, _) => Break, // GB4
242-
(GC_LF, _) => Break, // GB4
243-
(_, GC_Control) => Break, // GB5
244-
(_, GC_CR) => Break, // GB5
245-
(_, GC_LF) => Break, // GB5
246-
(GC_L, GC_L) => NotBreak, // GB6
247-
(GC_L, GC_V) => NotBreak, // GB6
248-
(GC_L, GC_LV) => NotBreak, // GB6
249-
(GC_L, GC_LVT) => NotBreak, // GB6
250-
(GC_LV, GC_V) => NotBreak, // GB7
251-
(GC_LV, GC_T) => NotBreak, // GB7
252-
(GC_V, GC_V) => NotBreak, // GB7
253-
(GC_V, GC_T) => NotBreak, // GB7
254-
(GC_LVT, GC_T) => NotBreak, // GB8
255-
(GC_T, GC_T) => NotBreak, // GB8
256-
(_, GC_Extend) => NotBreak, // GB9
257-
(_, GC_ZWJ) => NotBreak, // GB9
258-
(_, GC_SpacingMark) => Extended, // GB9a
259-
(GC_Prepend, _) => Extended, // GB9b
260-
(GC_ZWJ, GC_Extended_Pictographic) => Emoji, // GB11
261-
(GC_Regional_Indicator, GC_Regional_Indicator) => Regional, // GB12, GB13
262-
(_, _) => Break, // GB999
250+
(GC_CR, GC_LF) => NotBreak, // GB3
251+
(GC_Control, _) => Break, // GB4
252+
(GC_CR, _) => Break, // GB4
253+
(GC_LF, _) => Break, // GB4
254+
(_, GC_Control) => Break, // GB5
255+
(_, GC_CR) => Break, // GB5
256+
(_, GC_LF) => Break, // GB5
257+
(GC_L, GC_L) => NotBreak, // GB6
258+
(GC_L, GC_V) => NotBreak, // GB6
259+
(GC_L, GC_LV) => NotBreak, // GB6
260+
(GC_L, GC_LVT) => NotBreak, // GB6
261+
(GC_LV, GC_V) => NotBreak, // GB7
262+
(GC_LV, GC_T) => NotBreak, // GB7
263+
(GC_V, GC_V) => NotBreak, // GB7
264+
(GC_V, GC_T) => NotBreak, // GB7
265+
(GC_LVT, GC_T) => NotBreak, // GB8
266+
(GC_T, GC_T) => NotBreak, // GB8
267+
(_, GC_Extend) => NotBreak, // GB9
268+
(_, GC_ZWJ) => NotBreak, // GB9
269+
(_, GC_SpacingMark) => Extended, // GB9a
270+
(GC_Prepend, _) => Extended, // GB9b
271+
(GC_ZWJ, GC_Extended_Pictographic) => Emoji, // GB11
272+
(GC_Regional_Indicator, GC_Regional_Indicator) => Regional, // GB12, GB13
273+
(_, _) => Break, // GB999
263274
}
264275
}
265276

@@ -397,17 +408,19 @@ impl GraphemeCursor {
397408
if self.is_extended && chunk_start + chunk.len() == self.offset {
398409
let ch = chunk.chars().rev().next().unwrap();
399410
if self.grapheme_category(ch) == gr::GC_Prepend {
400-
self.decide(false); // GB9b
411+
self.decide(false); // GB9b
401412
return;
402413
}
403414
}
404415
match self.state {
405416
GraphemeState::Regional => self.handle_regional(chunk, chunk_start),
406417
GraphemeState::Emoji => self.handle_emoji(chunk, chunk_start),
407-
_ => if self.cat_before.is_none() && self.offset == chunk.len() + chunk_start {
408-
let ch = chunk.chars().rev().next().unwrap();
409-
self.cat_before = Some(self.grapheme_category(ch));
410-
},
418+
_ => {
419+
if self.cat_before.is_none() && self.offset == chunk.len() + chunk_start {
420+
let ch = chunk.chars().rev().next().unwrap();
421+
self.cat_before = Some(self.grapheme_category(ch));
422+
}
423+
}
411424
}
412425
}
413426

@@ -515,17 +528,21 @@ impl GraphemeCursor {
515528
/// cursor.set_cursor(12);
516529
/// assert_eq!(cursor.is_boundary(flags, 0), Ok(false));
517530
/// ```
518-
pub fn is_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<bool, GraphemeIncomplete> {
531+
pub fn is_boundary(
532+
&mut self,
533+
chunk: &str,
534+
chunk_start: usize,
535+
) -> Result<bool, GraphemeIncomplete> {
519536
use crate::tables::grapheme as gr;
520537
if self.state == GraphemeState::Break {
521-
return Ok(true)
538+
return Ok(true);
522539
}
523540
if self.state == GraphemeState::NotBreak {
524-
return Ok(false)
541+
return Ok(false);
525542
}
526543
if self.offset < chunk_start || self.offset >= chunk_start + chunk.len() {
527544
if self.offset > chunk_start + chunk.len() || self.cat_after.is_none() {
528-
return Err(GraphemeIncomplete::InvalidOffset)
545+
return Err(GraphemeIncomplete::InvalidOffset);
529546
}
530547
}
531548
if let Some(pre_context_offset) = self.pre_context_offset {
@@ -606,7 +623,11 @@ impl GraphemeCursor {
606623
/// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(Some(4)));
607624
/// assert_eq!(cursor.next_boundary(&s[2..4], 2), Ok(None));
608625
/// ```
609-
pub fn next_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<Option<usize>, GraphemeIncomplete> {
626+
pub fn next_boundary(
627+
&mut self,
628+
chunk: &str,
629+
chunk_start: usize,
630+
) -> Result<Option<usize>, GraphemeIncomplete> {
610631
if self.offset == self.len {
611632
return Ok(None);
612633
}
@@ -681,7 +702,11 @@ impl GraphemeCursor {
681702
/// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(Some(0)));
682703
/// assert_eq!(cursor.prev_boundary(&s[0..2], 0), Ok(None));
683704
/// ```
684-
pub fn prev_boundary(&mut self, chunk: &str, chunk_start: usize) -> Result<Option<usize>, GraphemeIncomplete> {
705+
pub fn prev_boundary(
706+
&mut self,
707+
chunk: &str,
708+
chunk_start: usize,
709+
) -> Result<Option<usize>, GraphemeIncomplete> {
685710
if self.offset == 0 {
686711
return Ok(None);
687712
}
@@ -702,7 +727,11 @@ impl GraphemeCursor {
702727
self.cat_after = self.cat_before.take();
703728
self.state = GraphemeState::Unknown;
704729
if let Some(ris_count) = self.ris_count {
705-
self.ris_count = if ris_count > 0 { Some(ris_count - 1) } else { None };
730+
self.ris_count = if ris_count > 0 {
731+
Some(ris_count - 1)
732+
} else {
733+
None
734+
};
706735
}
707736
if let Some(prev_ch) = iter.next() {
708737
ch = prev_ch;
@@ -729,7 +758,10 @@ impl GraphemeCursor {
729758
fn test_grapheme_cursor_ris_precontext() {
730759
let s = "\u{1f1fa}\u{1f1f8}\u{1f1fa}\u{1f1f8}\u{1f1fa}\u{1f1f8}";
731760
let mut c = GraphemeCursor::new(8, s.len(), true);
732-
assert_eq!(c.is_boundary(&s[4..], 4), Err(GraphemeIncomplete::PreContext(4)));
761+
assert_eq!(
762+
c.is_boundary(&s[4..], 4),
763+
Err(GraphemeIncomplete::PreContext(4))
764+
);
733765
c.provide_context(&s[..4], 0);
734766
assert_eq!(c.is_boundary(&s[4..], 4), Ok(true));
735767
}
@@ -738,7 +770,10 @@ fn test_grapheme_cursor_ris_precontext() {
738770
fn test_grapheme_cursor_chunk_start_require_precontext() {
739771
let s = "\r\n";
740772
let mut c = GraphemeCursor::new(1, s.len(), true);
741-
assert_eq!(c.is_boundary(&s[1..], 1), Err(GraphemeIncomplete::PreContext(1)));
773+
assert_eq!(
774+
c.is_boundary(&s[1..], 1),
775+
Err(GraphemeIncomplete::PreContext(1))
776+
);
742777
c.provide_context(&s[..1], 0);
743778
assert_eq!(c.is_boundary(&s[1..], 1), Ok(false));
744779
}
@@ -747,14 +782,20 @@ fn test_grapheme_cursor_chunk_start_require_precontext() {
747782
fn test_grapheme_cursor_prev_boundary() {
748783
let s = "abcd";
749784
let mut c = GraphemeCursor::new(3, s.len(), true);
750-
assert_eq!(c.prev_boundary(&s[2..], 2), Err(GraphemeIncomplete::PrevChunk));
785+
assert_eq!(
786+
c.prev_boundary(&s[2..], 2),
787+
Err(GraphemeIncomplete::PrevChunk)
788+
);
751789
assert_eq!(c.prev_boundary(&s[..2], 0), Ok(Some(2)));
752790
}
753791

754792
#[test]
755793
fn test_grapheme_cursor_prev_boundary_chunk_start() {
756794
let s = "abcd";
757795
let mut c = GraphemeCursor::new(2, s.len(), true);
758-
assert_eq!(c.prev_boundary(&s[2..], 2), Err(GraphemeIncomplete::PrevChunk));
796+
assert_eq!(
797+
c.prev_boundary(&s[2..], 2),
798+
Err(GraphemeIncomplete::PrevChunk)
799+
);
759800
assert_eq!(c.prev_boundary(&s[..2], 0), Ok(Some(1)));
760801
}

src/lib.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -50,9 +50,10 @@
5050
//! ```
5151
5252
#![deny(missing_docs, unsafe_code)]
53-
#![doc(html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
54-
html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png")]
55-
53+
#![doc(
54+
html_logo_url = "https://unicode-rs.github.io/unicode-rs_sm.png",
55+
html_favicon_url = "https://unicode-rs.github.io/unicode-rs_sm.png"
56+
)]
5657
#![no_std]
5758

5859
#[cfg(test)]
@@ -63,16 +64,17 @@ extern crate std;
6364
#[macro_use]
6465
extern crate quickcheck;
6566

66-
pub use grapheme::{Graphemes, GraphemeIndices};
6767
pub use grapheme::{GraphemeCursor, GraphemeIncomplete};
68+
pub use grapheme::{GraphemeIndices, Graphemes};
69+
pub use sentence::{USentenceBoundIndices, USentenceBounds, UnicodeSentences};
6870
pub use tables::UNICODE_VERSION;
69-
pub use word::{UWordBounds, UWordBoundIndices, UnicodeWords, UnicodeWordIndices};
70-
pub use sentence::{USentenceBounds, USentenceBoundIndices, UnicodeSentences};
71+
pub use word::{UWordBoundIndices, UWordBounds, UnicodeWordIndices, UnicodeWords};
7172

7273
mod grapheme;
74+
#[rustfmt::skip]
7375
mod tables;
74-
mod word;
7576
mod sentence;
77+
mod word;
7678

7779
#[cfg(test)]
7880
mod test;

0 commit comments

Comments
 (0)
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy