1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
// Copyright 2012-2015 The Rust Project Developers. See the COPYRIGHT
// file at the top-level directory of this distribution and at
// http://rust-lang.org/COPYRIGHT.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::fmt::{self, Write};

#[derive(Clone)]
enum DecompositionType {
    Canonical,
    Compatible
}

/// External iterator for a string decomposition's characters.
#[derive(Clone)]
pub struct Decompositions<I> {
    kind: DecompositionType,
    iter: I,
    done: bool,

    // This buffer stores pairs of (canonical combining class, character),
    // pushed onto the end in text order.
    //
    // It's split into two contiguous regions by the `ready` offset.  The first
    // `ready` pairs are sorted and ready to emit on demand.  The "pending"
    // suffix afterwards still needs more characters for us to be able to sort
    // in canonical order and is not safe to emit.
    buffer: Vec<(u8, char)>,
    ready: usize,
}

#[inline]
pub fn new_canonical<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
    Decompositions {
        kind: self::DecompositionType::Canonical,
        iter: iter,
        done: false,
        buffer: Vec::new(),
        ready: 0,
    }
}

#[inline]
pub fn new_compatible<I: Iterator<Item=char>>(iter: I) -> Decompositions<I> {
    Decompositions {
        kind: self::DecompositionType::Compatible,
        iter: iter,
        done: false,
        buffer: Vec::new(),
        ready: 0,
    }
}

impl<I> Decompositions<I> {
    #[inline]
    fn push_back(&mut self, ch: char) {
        let class = super::char::canonical_combining_class(ch);
        if class == 0 {
            self.sort_pending();
        }
        self.buffer.push((class, ch));
    }

    #[inline]
    fn sort_pending(&mut self) {
        if self.ready == 0 && self.buffer.is_empty() {
            return;
        }

        // NB: `sort_by_key` is stable, so it will preserve the original text's
        // order within a combining class.
        self.buffer[self.ready..].sort_by_key(|k| k.0);
        self.ready = self.buffer.len();
    }

    #[inline]
    fn pop_front(&mut self) -> Option<char> {
        if self.ready == 0 {
            None
        } else {
            self.ready -= 1;
            Some(self.buffer.remove(0).1)
        }
    }
}

impl<I: Iterator<Item=char>> Iterator for Decompositions<I> {
    type Item = char;

    #[inline]
    fn next(&mut self) -> Option<char> {
        while self.ready == 0 && !self.done {
            match (self.iter.next(), &self.kind) {
                (Some(ch), &DecompositionType::Canonical) => {
                    super::char::decompose_canonical(ch, |d| self.push_back(d));
                },
                (Some(ch), &DecompositionType::Compatible) => {
                    super::char::decompose_compatible(ch, |d| self.push_back(d));
                },
                (None, _) => {
                    self.sort_pending();
                    self.done = true;
                },
            }
        }
        self.pop_front()
    }

    fn size_hint(&self) -> (usize, Option<usize>) {
        let (lower, _) = self.iter.size_hint();
        (lower, None)
    }
}

impl<I: Iterator<Item=char> + Clone> fmt::Display for Decompositions<I> {
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
        for c in self.clone() {
            f.write_char(c)?;
        }
        Ok(())
    }
}
pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy