1use super::char::EscapeDebugExtArgs;
2use super::from_utf8_unchecked;
3use super::validations::utf8_char_width;
4use crate::fmt;
5use crate::fmt::{Formatter, Write};
6use crate::iter::FusedIterator;
7
8impl [u8] {
9 #[stable(feature = "utf8_chunks", since = "1.79.0")]
45 pub fn utf8_chunks(&self) -> Utf8Chunks<'_> {
46 Utf8Chunks { source: self }
47 }
48}
49
50#[stable(feature = "utf8_chunks", since = "1.79.0")]
71#[derive(#[automatically_derived]
#[stable(feature = "utf8_chunks", since = "1.79.0")]
impl<'a> crate::clone::Clone for Utf8Chunk<'a> {
#[inline]
fn clone(&self) -> Utf8Chunk<'a> {
Utf8Chunk {
valid: crate::clone::Clone::clone(&self.valid),
invalid: crate::clone::Clone::clone(&self.invalid),
}
}
}Clone, #[automatically_derived]
#[stable(feature = "utf8_chunks", since = "1.79.0")]
impl<'a> crate::fmt::Debug for Utf8Chunk<'a> {
#[inline]
fn fmt(&self, f: &mut crate::fmt::Formatter) -> crate::fmt::Result {
crate::fmt::Formatter::debug_struct_field2_finish(f, "Utf8Chunk",
"valid", &self.valid, "invalid", &&self.invalid)
}
}Debug, #[automatically_derived]
#[stable(feature = "utf8_chunks", since = "1.79.0")]
impl<'a> crate::cmp::PartialEq for Utf8Chunk<'a> {
#[inline]
fn eq(&self, other: &Utf8Chunk<'a>) -> bool {
self.valid == other.valid && self.invalid == other.invalid
}
}PartialEq, #[automatically_derived]
#[stable(feature = "utf8_chunks", since = "1.79.0")]
impl<'a> crate::cmp::Eq for Utf8Chunk<'a> {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_fields_are_eq(&self) {
let _: crate::cmp::AssertParamIsEq<&'a str>;
let _: crate::cmp::AssertParamIsEq<&'a [u8]>;
}
}Eq)]
72pub struct Utf8Chunk<'a> {
73 valid: &'a str,
74 invalid: &'a [u8],
75}
76
77impl<'a> Utf8Chunk<'a> {
78 #[must_use]
83 #[stable(feature = "utf8_chunks", since = "1.79.0")]
84 pub fn valid(&self) -> &'a str {
85 self.valid
86 }
87
88 #[must_use]
103 #[stable(feature = "utf8_chunks", since = "1.79.0")]
104 pub fn invalid(&self) -> &'a [u8] {
105 self.invalid
106 }
107}
108
109#[must_use]
110#[unstable(feature = "str_internals", issue = "none")]
111pub struct Debug<'a>(&'a [u8]);
112
113#[unstable(feature = "str_internals", issue = "none")]
114impl fmt::Debug for Debug<'_> {
115 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
116 f.write_char('"')?;
117
118 for chunk in self.0.utf8_chunks() {
119 {
122 let valid = chunk.valid();
123 let mut from = 0;
124 for (i, c) in valid.char_indices() {
125 let esc = c.escape_debug_ext(EscapeDebugExtArgs {
126 escape_grapheme_extended: true,
127 escape_single_quote: false,
128 escape_double_quote: true,
129 });
130 if esc.len() != 1 {
132 f.write_str(&valid[from..i])?;
133 for c in esc {
134 f.write_char(c)?;
135 }
136 from = i + c.len_utf8();
137 }
138 }
139 f.write_str(&valid[from..])?;
140 }
141
142 for &b in chunk.invalid() {
144 f.write_fmt(format_args!("\\x{0:02X}", b))write!(f, "\\x{:02X}", b)?;
145 }
146 }
147
148 f.write_char('"')
149 }
150}
151
152#[must_use = "iterators are lazy and do nothing unless consumed"]
184#[stable(feature = "utf8_chunks", since = "1.79.0")]
185#[derive(#[automatically_derived]
#[stable(feature = "utf8_chunks", since = "1.79.0")]
impl<'a> crate::clone::Clone for Utf8Chunks<'a> {
#[inline]
fn clone(&self) -> Utf8Chunks<'a> {
Utf8Chunks { source: crate::clone::Clone::clone(&self.source) }
}
}Clone)]
186pub struct Utf8Chunks<'a> {
187 source: &'a [u8],
188}
189
190impl<'a> Utf8Chunks<'a> {
191 #[doc(hidden)]
192 #[unstable(feature = "str_internals", issue = "none")]
193 pub fn debug(&self) -> Debug<'_> {
194 Debug(self.source)
195 }
196}
197
198#[stable(feature = "utf8_chunks", since = "1.79.0")]
199impl<'a> Iterator for Utf8Chunks<'a> {
200 type Item = Utf8Chunk<'a>;
201
202 fn next(&mut self) -> Option<Utf8Chunk<'a>> {
203 if self.source.is_empty() {
204 return None;
205 }
206
207 const TAG_CONT_U8: u8 = 128;
208 fn safe_get(xs: &[u8], i: usize) -> u8 {
209 *xs.get(i).unwrap_or(&0)
210 }
211
212 let mut i = 0;
213 let mut valid_up_to = 0;
214 while let Some(byte) = self.source.get(i).copied() {
215 i += 1;
216
217 if byte < 128 {
218 } else {
222 let w = utf8_char_width(byte);
223
224 match w {
225 2 => {
226 if safe_get(self.source, i) & 192 != TAG_CONT_U8 {
227 break;
228 }
229 i += 1;
230 }
231 3 => {
232 match (byte, safe_get(self.source, i)) {
233 (0xE0, 0xA0..=0xBF) => (),
234 (0xE1..=0xEC, 0x80..=0xBF) => (),
235 (0xED, 0x80..=0x9F) => (),
236 (0xEE..=0xEF, 0x80..=0xBF) => (),
237 _ => break,
238 }
239 i += 1;
240 if safe_get(self.source, i) & 192 != TAG_CONT_U8 {
241 break;
242 }
243 i += 1;
244 }
245 4 => {
246 match (byte, safe_get(self.source, i)) {
247 (0xF0, 0x90..=0xBF) => (),
248 (0xF1..=0xF3, 0x80..=0xBF) => (),
249 (0xF4, 0x80..=0x8F) => (),
250 _ => break,
251 }
252 i += 1;
253 if safe_get(self.source, i) & 192 != TAG_CONT_U8 {
254 break;
255 }
256 i += 1;
257 if safe_get(self.source, i) & 192 != TAG_CONT_U8 {
258 break;
259 }
260 i += 1;
261 }
262 _ => break,
263 }
264 }
265
266 valid_up_to = i;
267 }
268
269 let (inspected, remaining) = unsafe { self.source.split_at_unchecked(i) };
278 self.source = remaining;
279
280 let (valid, invalid) = unsafe { inspected.split_at_unchecked(valid_up_to) };
283
284 Some(Utf8Chunk {
285 valid: unsafe { from_utf8_unchecked(valid) },
287 invalid,
288 })
289 }
290}
291
292#[stable(feature = "utf8_chunks", since = "1.79.0")]
293impl FusedIterator for Utf8Chunks<'_> {}
294
295#[stable(feature = "utf8_chunks", since = "1.79.0")]
296impl fmt::Debug for Utf8Chunks<'_> {
297 fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
298 f.debug_struct("Utf8Chunks").field("source", &self.debug()).finish()
299 }
300}