1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
use Utf8Char;
extern crate std;
use std::{fmt,mem,u32,u64};
use std::ops::Not;
use std::io::{Read, Error as ioError};
#[derive(Clone,Copy)]
pub struct Utf8Iterator (u32);
impl From<Utf8Char> for Utf8Iterator {
fn from(uc: Utf8Char) -> Self {
let used = u32::from_le(unsafe{ mem::transmute(uc) });
let unused_set = (u64::MAX << uc.len() as u64*8) as u32;
Utf8Iterator(used | unused_set)
}
}
impl From<char> for Utf8Iterator {
fn from(c: char) -> Self {
Self::from(Utf8Char::from(c))
}
}
impl Iterator for Utf8Iterator {
type Item=u8;
fn next(&mut self) -> Option<u8> {
let next = self.0 as u8;
if next == 0xff {
None
} else {
self.0 = (self.0 >> 8) | 0xff_00_00_00;
Some(next)
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
(self.len(), Some(self.len()))
}
}
impl ExactSizeIterator for Utf8Iterator {
fn len(&self) -> usize {
let unused_bytes = self.0.not().leading_zeros() / 8;
4 - unused_bytes as usize
}
}
impl Read for Utf8Iterator {
fn read(&mut self, buf: &mut[u8]) -> Result<usize, ioError> {
let mut wrote = 0;
while let Some(ptr) = buf.get_mut(wrote) {
if let Some(b) = self.next() {
*ptr = b;
wrote += 1;
} else {
break;
}
}
Ok(wrote)
}
}
impl fmt::Debug for Utf8Iterator {
fn fmt(&self, fmtr: &mut fmt::Formatter) -> fmt::Result {
let content: Vec<u8> = self.collect();
write!(fmtr, "bytes left: {:?}, content: {:x}", content, self.0)
}
}