shared/utf8.rs
1pub fn utf8_chunks(mut bytes: &[u8]) -> impl Iterator<Item = &str> {
2 core::iter::from_fn(move || {
3 if bytes.is_empty() {
4 return None;
5 }
6
7 match str::from_utf8(bytes) {
8 Ok(s) => {
9 // whole remaining slice is valid
10 bytes = &[];
11 Some(s)
12 }
13 Err(e) => {
14 let valid = e.valid_up_to();
15
16 if valid > 0 {
17 // return the valid prefix
18 let s = unsafe {
19 // safe because `valid` bytes were confirmed valid UTF-8
20 str::from_utf8_unchecked(&bytes[..valid])
21 };
22 bytes = &bytes[e.error_len().map_or(valid + 1, |len| valid + len)..];
23 Some(s)
24 } else {
25 // skip the invalid byte
26 bytes = &bytes[1..];
27 // continue the iterator until we find a valid chunk
28 // (returning None here would stop early)
29 Some("") // or continue looping: but iterators can't loop
30 }
31 }
32 }
33 })
34}