From 0d589e1d08c06d5de044b77f5b2ec36654a71283 Mon Sep 17 00:00:00 2001 From: Patrick Norton Date: Sun, 25 Jul 2021 07:42:12 -0400 Subject: [PATCH] Made AsciiStr::split generic over Pattern --- src/ascii_char.rs | 10 ++++++ src/ascii_str.rs | 87 ++++++++++++++++++++++++++++++----------------- 2 files changed, 66 insertions(+), 31 deletions(-) diff --git a/src/ascii_char.rs b/src/ascii_char.rs index f726df6..843f576 100644 --- a/src/ascii_char.rs +++ b/src/ascii_char.rs @@ -3,6 +3,7 @@ use core::mem; use core::{char, fmt}; #[cfg(feature = "std")] use std::error::Error; +use std::str::pattern::{Pattern, CharSearcher}; #[allow(non_camel_case_types)] /// An ASCII character. It wraps a `u8`, with the highest bit always zero. @@ -938,6 +939,15 @@ impl ToAsciiChar for u16 { } } +impl<'a> Pattern<'a> for AsciiChar { + type Searcher = CharSearcher<'a>; + + fn into_searcher(self, haystack: &'a str) -> Self::Searcher { + self.as_char().into_searcher(haystack) + } +} + + #[cfg(test)] mod tests { use super::{AsciiChar, ToAsciiChar, ToAsciiCharError}; diff --git a/src/ascii_str.rs b/src/ascii_str.rs index e33b28f..99c9c24 100644 --- a/src/ascii_str.rs +++ b/src/ascii_str.rs @@ -14,6 +14,7 @@ use std::ffi::CStr; use ascii_char::AsciiChar; #[cfg(feature = "alloc")] use ascii_string::AsciiString; +use std::str::pattern::{Pattern, ReverseSearcher, Searcher}; /// [`AsciiStr`] represents a byte or string slice that only contains ASCII characters. /// @@ -186,11 +187,13 @@ impl AsciiStr { /// assert_eq!(words, ["apple", "banana", "lemon"]); /// ``` #[must_use] - pub fn split(&self, on: AsciiChar) -> impl DoubleEndedIterator { + pub fn split(&self, on: T) -> impl DoubleEndedIterator { Split { - on, + matcher: on.into_searcher(self.as_str()), ended: false, - chars: self.chars(), + start: 0, + end: self.len(), + allow_trailing_empty: false } } @@ -653,47 +656,69 @@ impl<'a> DoubleEndedIterator for CharsRef<'a> { /// An iterator over parts of an `AsciiStr` separated by an `AsciiChar`. /// /// This type is created by [`AsciiChar::split()`](struct.AsciiChar.html#method.split). -#[derive(Clone, Debug)] -struct Split<'a> { - on: AsciiChar, +struct Split<'a, T> where T: Pattern<'a> { + // Safety invariant: matcher.haystack() must be valid ASCII + matcher: T::Searcher, ended: bool, - chars: Chars<'a>, + start: usize, + end: usize, + allow_trailing_empty: bool, } -impl<'a> Iterator for Split<'a> { +impl<'a, T> Iterator for Split<'a, T> where T: Pattern<'a> { type Item = &'a AsciiStr; - fn next(&mut self) -> Option<&'a AsciiStr> { + fn next(&mut self) -> Option { if !self.ended { - let start: &AsciiStr = self.chars.as_str(); - let split_on = self.on; - - if let Some(at) = self.chars.position(|ch| ch == split_on) { - // SAFETY: `at` is guaranteed to be in bounds, as `position` returns `Ok(0..len)`. - Some(unsafe { start.as_slice().get_unchecked(..at).into() }) - } else { - self.ended = true; - Some(start) + let haystack = self.matcher.haystack(); + match self.matcher.next_match() { + // SAFETY: `Searcher` guarantees that `a` and `b` lie on unicode boundaries. + Some((a, b)) => unsafe { + let elt = haystack.get_unchecked(self.start..a); + self.start = b; + // SAFETY: Value given to matcher is guaranteed ASCII, so any slice of it is + // ASCII as well + Some(AsciiStr::from_ascii_unchecked(elt.as_bytes())) + }, + None => self.get_end(), } } else { None } } } -impl<'a> DoubleEndedIterator for Split<'a> { - fn next_back(&mut self) -> Option<&'a AsciiStr> { - if !self.ended { - let start: &AsciiStr = self.chars.as_str(); - let split_on = self.on; +impl<'a, T> DoubleEndedIterator for Split<'a, T> where T: Pattern<'a>, T::Searcher: ReverseSearcher<'a>, { + fn next_back(&mut self) -> Option { + if self.ended { + return None; + } - if let Some(at) = self.chars.rposition(|ch| ch == split_on) { - // SAFETY: `at` is guaranteed to be in bounds, as `rposition` returns `Ok(0..len)`, and slices `1..`, `2..`, etc... until `len..` inclusive, are valid. - Some(unsafe { start.as_slice().get_unchecked(at + 1..).into() }) - } else { - self.ended = true; - Some(start) + if !self.allow_trailing_empty { + self.allow_trailing_empty = true; + match self.next_back() { + Some(elt) if !elt.is_empty() => return Some(elt), + _ => { + if self.ended { + return None; + } + } } - } else { - None + } + + let haystack = self.matcher.haystack(); + match self.matcher.next_match_back() { + // SAFETY: `haystack` is always ASCII, so all indices are valid + Some((a, b)) => unsafe { + let elt = haystack.get_unchecked(b..self.end); + self.end = a; + // SAFETY: Value given to matcher is guaranteed ASCII, so any slice of it is + // ASCII as well + Some(AsciiStr::from_ascii_unchecked(elt.as_bytes())) + }, + // SAFETY: `haystack` is always ASCII, so all indices are valid + None => unsafe { + self.ended = true; + Some(haystack.get_unchecked(self.start..self.end)) + }, } } } pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy