77 lines
2.4 KiB
Rust
77 lines
2.4 KiB
Rust
use std::io::{self, Read};
|
|
|
|
pub struct Latin12Utf8Reader<X> {
|
|
inner: std::io::Bytes<X>,
|
|
buf: Vec<u8>,
|
|
}
|
|
|
|
impl<X: Read> Latin12Utf8Reader<X> {
|
|
pub fn new(x: X) -> Self {
|
|
Self {
|
|
inner: x.bytes(),
|
|
buf: Default::default(),
|
|
}
|
|
}
|
|
|
|
fn move_to_dest(&mut self, buf: &mut [u8]) -> usize {
|
|
let ret = std::cmp::min(buf.len(), self.buf.len());
|
|
debug_assert!(ret > 0);
|
|
buf[..ret].copy_from_slice(&self.buf[..ret]);
|
|
let _ = self.buf.drain(..ret);
|
|
ret
|
|
}
|
|
}
|
|
|
|
impl<X: Read> Read for Latin12Utf8Reader<X> {
|
|
fn read(&mut self, mut buf: &mut [u8]) -> io::Result<usize> {
|
|
if buf.is_empty() {
|
|
return Ok(0);
|
|
}
|
|
let mut ret = 0;
|
|
if !self.buf.is_empty() {
|
|
let ret2 = self.move_to_dest(buf);
|
|
buf = &mut buf[ret2..];
|
|
ret = ret2;
|
|
}
|
|
while !buf.is_empty() {
|
|
if let Some(y) = self.inner.next() {
|
|
match y {
|
|
Ok(x) => {
|
|
let x = if x <= 0x7f {
|
|
x as char
|
|
} else {
|
|
std::char::from_u32(x as u32).ok_or_else(|| {
|
|
std::io::Error::new(
|
|
std::io::ErrorKind::InvalidData,
|
|
format!("got invalid input character: {:#02x}", x),
|
|
)
|
|
})?
|
|
};
|
|
let xl = x.len_utf8();
|
|
if buf.len() >= xl {
|
|
x.encode_utf8(buf);
|
|
buf = &mut buf[xl..];
|
|
} else {
|
|
let mut tmp = [0; 4];
|
|
x.encode_utf8(&mut tmp[..]);
|
|
self.buf.extend(tmp.iter().take(xl).copied());
|
|
ret += self.move_to_dest(buf);
|
|
break;
|
|
}
|
|
}
|
|
Err(x) if ret == 0 => {
|
|
return Err(x);
|
|
}
|
|
Err(_) => {
|
|
// ignore error
|
|
break;
|
|
}
|
|
}
|
|
} else {
|
|
break;
|
|
}
|
|
}
|
|
Ok(ret)
|
|
}
|
|
}
|