shared/
utf8.rs

1pub fn utf8_chunks(mut bytes: &[u8]) -> impl Iterator<Item = &str> {
2    core::iter::from_fn(move || {
3        if bytes.is_empty() {
4            return None;
5        }
6
7        match str::from_utf8(bytes) {
8            Ok(s) => {
9                // whole remaining slice is valid
10                bytes = &[];
11                Some(s)
12            }
13            Err(e) => {
14                let valid = e.valid_up_to();
15
16                if valid > 0 {
17                    // return the valid prefix
18                    let s = unsafe {
19                        // safe because `valid` bytes were confirmed valid UTF-8
20                        str::from_utf8_unchecked(&bytes[..valid])
21                    };
22                    bytes = &bytes[e.error_len().map_or(valid + 1, |len| valid + len)..];
23                    Some(s)
24                } else {
25                    // skip the invalid byte
26                    bytes = &bytes[1..];
27                    // continue the iterator until we find a valid chunk
28                    // (returning None here would stop early)
29                    Some("") // or continue looping: but iterators can't loop
30                }
31            }
32        }
33    })
34}