1//! Implementation of [the WTF-8 encoding](https://simonsapin.github.io/wtf-8/).
2//!
3//! This library uses Rust’s type system to maintain
4//! [well-formedness](https://simonsapin.github.io/wtf-8/#well-formed),
5//! like the `String` and `&str` types do for UTF-8.
6//!
7//! Since [WTF-8 must not be used
8//! for interchange](https://simonsapin.github.io/wtf-8/#intended-audience),
9//! this library deliberately does not provide access to the underlying bytes
10//! of WTF-8 strings,
11//! nor can it decode WTF-8 from arbitrary bytes.
12//! WTF-8 strings can be obtained from UTF-8, UTF-16, or code points.
13#![unstable(
14 feature = "wtf8_internals",
15 issue = "none",
16 reason = "this is internal code for representing OsStr on some platforms and not a public API"
17)]
18// rustdoc bug: doc(hidden) on the module won't stop types in the module from showing up in trait
19// implementations, so, we'll have to add more doc(hidden)s anyway
20#![doc(hidden)]
2122use crate::char::{EscapeDebugExtArgs, encode_utf16_raw};
23use crate::clone::CloneToUninit;
24use crate::fmt::{self, Write};
25use crate::hash::{Hash, Hasher};
26use crate::iter::FusedIterator;
27use crate::num::niche_types::CodePointInner;
28use crate::str::next_code_point;
29use crate::{ops, slice, str};
3031/// A Unicode code point: from U+0000 to U+10FFFF.
32///
33/// Compares with the `char` type,
34/// which represents a Unicode scalar value:
35/// a code point that is not a surrogate (U+D800 to U+DFFF).
36#[derive(#[automatically_derived]
impl crate::cmp::Eq for CodePoint {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_fields_are_eq(&self) {
let _: crate::cmp::AssertParamIsEq<CodePointInner>;
}
}Eq, #[automatically_derived]
impl crate::cmp::PartialEq for CodePoint {
#[inline]
fn eq(&self, other: &CodePoint) -> bool { self.0 == other.0 }
}PartialEq, #[automatically_derived]
impl crate::cmp::Ord for CodePoint {
#[inline]
fn cmp(&self, other: &CodePoint) -> crate::cmp::Ordering {
crate::cmp::Ord::cmp(&self.0, &other.0)
}
}Ord, #[automatically_derived]
impl crate::cmp::PartialOrd for CodePoint {
#[inline]
fn partial_cmp(&self, other: &CodePoint)
-> crate::option::Option<crate::cmp::Ordering> {
crate::cmp::PartialOrd::partial_cmp(&self.0, &other.0)
}
}PartialOrd, #[automatically_derived]
impl crate::clone::Clone for CodePoint {
#[inline]
fn clone(&self) -> CodePoint {
let _: crate::clone::AssertParamIsClone<CodePointInner>;
*self
}
}Clone, #[automatically_derived]
impl crate::marker::Copy for CodePoint { }Copy)]
37#[doc(hidden)]
38pub struct CodePoint(CodePointInner);
3940/// Format the code point as `U+` followed by four to six hexadecimal digits.
41/// Example: `U+1F4A9`
42impl fmt::Debugfor CodePoint {
43#[inline]
44fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
45formatter.write_fmt(format_args!("U+{0:04X}", self.0.as_inner()))write!(formatter, "U+{:04X}", self.0.as_inner())46 }
47}
4849impl CodePoint {
50/// Unsafely creates a new `CodePoint` without checking the value.
51 ///
52 /// Only use when `value` is known to be less than or equal to 0x10FFFF.
53#[inline]
54pub unsafe fn from_u32_unchecked(value: u32) -> CodePoint {
55// SAFETY: Guaranteed by caller.
56CodePoint(unsafe { CodePointInner::new_unchecked(value) })
57 }
5859/// Creates a new `CodePoint` if the value is a valid code point.
60 ///
61 /// Returns `None` if `value` is above 0x10FFFF.
62#[inline]
63pub fn from_u32(value: u32) -> Option<CodePoint> {
64Some(CodePoint(CodePointInner::new(value)?))
65 }
6667/// Creates a new `CodePoint` from a `char`.
68 ///
69 /// Since all Unicode scalar values are code points, this always succeeds.
70#[inline]
71pub fn from_char(value: char) -> CodePoint {
72// SAFETY: All char are valid for this type.
73unsafe { CodePoint::from_u32_unchecked(valueas u32) }
74 }
7576/// Returns the numeric value of the code point.
77#[inline]
78pub fn to_u32(&self) -> u32 {
79self.0.as_inner()
80 }
8182/// Returns the numeric value of the code point if it is a leading surrogate.
83#[inline]
84pub fn to_lead_surrogate(&self) -> Option<u16> {
85match self.to_u32() {
86 lead @ 0xD800..=0xDBFF => Some(leadas u16),
87_ => None,
88 }
89 }
9091/// Returns the numeric value of the code point if it is a trailing surrogate.
92#[inline]
93pub fn to_trail_surrogate(&self) -> Option<u16> {
94match self.to_u32() {
95 trail @ 0xDC00..=0xDFFF => Some(trailas u16),
96_ => None,
97 }
98 }
99100/// Optionally returns a Unicode scalar value for the code point.
101 ///
102 /// Returns `None` if the code point is a surrogate (from U+D800 to U+DFFF).
103#[inline]
104pub fn to_char(&self) -> Option<char> {
105match self.to_u32() {
1060xD800..=0xDFFF => None,
107// SAFETY: We explicitly check that the char is valid.
108valid => Some(unsafe { char::from_u32_unchecked(valid) }),
109 }
110 }
111112/// Returns a Unicode scalar value for the code point.
113 ///
114 /// Returns `'\u{FFFD}'` (the replacement character “�”)
115 /// if the code point is a surrogate (from U+D800 to U+DFFF).
116#[inline]
117pub fn to_char_lossy(&self) -> char {
118self.to_char().unwrap_or(char::REPLACEMENT_CHARACTER)
119 }
120}
121122/// A borrowed slice of well-formed WTF-8 data.
123///
124/// Similar to `&str`, but can additionally contain surrogate code points
125/// if they’re not in a surrogate pair.
126#[derive(#[automatically_derived]
impl crate::cmp::Eq for Wtf8 {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_fields_are_eq(&self) {
let _: crate::cmp::AssertParamIsEq<[u8]>;
}
}Eq, #[automatically_derived]
impl crate::cmp::Ord for Wtf8 {
#[inline]
fn cmp(&self, other: &Wtf8) -> crate::cmp::Ordering {
crate::cmp::Ord::cmp(&self.bytes, &other.bytes)
}
}Ord, #[automatically_derived]
impl crate::cmp::PartialEq for Wtf8 {
#[inline]
fn eq(&self, other: &Wtf8) -> bool { self.bytes == other.bytes }
}PartialEq, #[automatically_derived]
impl crate::cmp::PartialOrd for Wtf8 {
#[inline]
fn partial_cmp(&self, other: &Wtf8)
-> crate::option::Option<crate::cmp::Ordering> {
crate::cmp::PartialOrd::partial_cmp(&self.bytes, &other.bytes)
}
}PartialOrd)]
127#[repr(transparent)]
128#[rustc_has_incoherent_inherent_impls]
129#[doc(hidden)]
130pub struct Wtf8 {
131 bytes: [u8],
132}
133134impl AsRef<[u8]> for Wtf8 {
135#[inline]
136fn as_ref(&self) -> &[u8] {
137&self.bytes
138 }
139}
140141/// Formats the string in double quotes, with characters escaped according to
142/// [`char::escape_debug`] and unpaired surrogates represented as `\u{xxxx}`,
143/// where each `x` is a hexadecimal digit.
144impl fmt::Debugfor Wtf8 {
145fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
146fn write_str_escaped(f: &mut fmt::Formatter<'_>, s: &str) -> fmt::Result {
147use crate::fmt::Writeas _;
148for c in s.chars().flat_map(|c| {
149 c.escape_debug_ext(EscapeDebugExtArgs {
150 escape_grapheme_extended: true,
151 escape_single_quote: false,
152 escape_double_quote: true,
153 })
154 }) {
155 f.write_char(c)?
156}
157Ok(())
158 }
159160formatter.write_char('"')?;
161let mut pos = 0;
162while let Some((surrogate_pos, surrogate)) = self.next_surrogate(pos) {
163// SAFETY: next_surrogate provides an index for a range of valid UTF-8 bytes.
164write_str_escaped(formatter, unsafe {
165 str::from_utf8_unchecked(&self.bytes[pos..surrogate_pos])
166 })?;
167formatter.write_fmt(format_args!("\\u{{{0:x}}}", surrogate))write!(formatter, "\\u{{{:x}}}", surrogate)?;
168 pos = surrogate_pos + 3;
169 }
170171// SAFETY: after next_surrogate returns None, the remainder is valid UTF-8.
172write_str_escaped(formatter, unsafe { str::from_utf8_unchecked(&self.bytes[pos..]) })?;
173formatter.write_char('"')
174 }
175}
176177/// Formats the string with unpaired surrogates substituted with the replacement
178/// character, U+FFFD.
179impl fmt::Displayfor Wtf8 {
180fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> fmt::Result {
181let wtf8_bytes = &self.bytes;
182let mut pos = 0;
183loop {
184match self.next_surrogate(pos) {
185Some((surrogate_pos, _)) => {
186// SAFETY: next_surrogate provides an index for a range of valid UTF-8 bytes.
187formatter.write_str(unsafe {
188 str::from_utf8_unchecked(&wtf8_bytes[pos..surrogate_pos])
189 })?;
190formatter.write_char(char::REPLACEMENT_CHARACTER)?;
191pos = surrogate_pos + 3;
192 }
193None => {
194// SAFETY: after next_surrogate returns None, the remainder is valid UTF-8.
195let s = unsafe { str::from_utf8_unchecked(&wtf8_bytes[pos..]) };
196if pos == 0 { return s.fmt(formatter) } else { return formatter.write_str(s) }
197 }
198 }
199 }
200 }
201}
202203impl Wtf8 {
204/// Creates a WTF-8 slice from a UTF-8 `&str` slice.
205#[inline]
206pub fn from_str(value: &str) -> &Wtf8 {
207// SAFETY: Since WTF-8 is a superset of UTF-8, this always is valid.
208unsafe { Wtf8::from_bytes_unchecked(value.as_bytes()) }
209 }
210211/// Creates a WTF-8 slice from a WTF-8 byte slice.
212 ///
213 /// Since the byte slice is not checked for valid WTF-8, this functions is
214 /// marked unsafe.
215#[inline]
216pub unsafe fn from_bytes_unchecked(value: &[u8]) -> &Wtf8 {
217// SAFETY: start with &[u8], end with fancy &[u8]
218unsafe { &*(valueas *const [u8] as *const Wtf8) }
219 }
220221/// Creates a mutable WTF-8 slice from a mutable WTF-8 byte slice.
222 ///
223 /// Since the byte slice is not checked for valid WTF-8, this functions is
224 /// marked unsafe.
225#[inline]
226pub unsafe fn from_mut_bytes_unchecked(value: &mut [u8]) -> &mut Wtf8 {
227// SAFETY: start with &mut [u8], end with fancy &mut [u8]
228unsafe { &mut *(valueas *mut [u8] as *mut Wtf8) }
229 }
230231/// Returns the length, in WTF-8 bytes.
232#[inline]
233pub fn len(&self) -> usize {
234self.bytes.len()
235 }
236237#[inline]
238pub fn is_empty(&self) -> bool {
239self.bytes.is_empty()
240 }
241242/// Returns the code point at `position` if it is in the ASCII range,
243 /// or `b'\xFF'` otherwise.
244 ///
245 /// # Panics
246 ///
247 /// Panics if `position` is beyond the end of the string.
248#[inline]
249pub fn ascii_byte_at(&self, position: usize) -> u8 {
250match self.bytes[position] {
251 ascii_byte @ 0x00..=0x7F => ascii_byte,
252_ => 0xFF,
253 }
254 }
255256/// Returns an iterator for the string’s code points.
257#[inline]
258pub fn code_points(&self) -> Wtf8CodePoints<'_> {
259Wtf8CodePoints { bytes: self.bytes.iter() }
260 }
261262/// Access raw bytes of WTF-8 data
263#[inline]
264pub fn as_bytes(&self) -> &[u8] {
265&self.bytes
266 }
267268/// Tries to convert the string to UTF-8 and return a `&str` slice.
269 ///
270 /// Returns `None` if the string contains surrogates.
271 ///
272 /// This does not copy the data.
273#[inline]
274pub fn as_str(&self) -> Result<&str, str::Utf8Error> {
275 str::from_utf8(&self.bytes)
276 }
277278/// Converts the WTF-8 string to potentially ill-formed UTF-16
279 /// and return an iterator of 16-bit code units.
280 ///
281 /// This is lossless:
282 /// calling `Wtf8Buf::from_ill_formed_utf16` on the resulting code units
283 /// would always return the original WTF-8 string.
284#[inline]
285pub fn encode_wide(&self) -> EncodeWide<'_> {
286EncodeWide { code_points: self.code_points(), extra: 0 }
287 }
288289#[inline]
290pub fn next_surrogate(&self, mut pos: usize) -> Option<(usize, u16)> {
291let mut iter = self.bytes[pos..].iter();
292loop {
293let b = *iter.next()?;
294if b < 0x80 {
295pos += 1;
296 } else if b < 0xE0 {
297iter.next();
298pos += 2;
299 } else if b == 0xED {
300match (iter.next(), iter.next()) {
301 (Some(&b2), Some(&b3)) if b2 >= 0xA0 => {
302return Some((pos, decode_surrogate(b2, b3)));
303 }
304_ => pos += 3,
305 }
306 } else if b < 0xF0 {
307iter.next();
308iter.next();
309pos += 3;
310 } else {
311iter.next();
312iter.next();
313iter.next();
314pos += 4;
315 }
316 }
317 }
318319#[inline]
320pub fn final_lead_surrogate(&self) -> Option<u16> {
321match self.bytes {
322 [.., 0xED, b2 @ 0xA0..=0xAF, b3] => Some(decode_surrogate(b2, b3)),
323_ => None,
324 }
325 }
326327#[inline]
328pub fn initial_trail_surrogate(&self) -> Option<u16> {
329match self.bytes {
330 [0xED, b2 @ 0xB0..=0xBF, b3, ..] => Some(decode_surrogate(b2, b3)),
331_ => None,
332 }
333 }
334335#[inline]
336pub fn make_ascii_lowercase(&mut self) {
337self.bytes.make_ascii_lowercase()
338 }
339340#[inline]
341pub fn make_ascii_uppercase(&mut self) {
342self.bytes.make_ascii_uppercase()
343 }
344345#[inline]
346pub fn is_ascii(&self) -> bool {
347self.bytes.is_ascii()
348 }
349350#[inline]
351pub fn eq_ignore_ascii_case(&self, other: &Self) -> bool {
352self.bytes.eq_ignore_ascii_case(&other.bytes)
353 }
354}
355356/// Returns a slice of the given string for the byte range \[`begin`..`end`).
357///
358/// # Panics
359///
360/// Panics when `begin` and `end` do not point to code point boundaries,
361/// or point beyond the end of the string.
362impl ops::Index<ops::Range<usize>> for Wtf8 {
363type Output = Wtf8;
364365#[inline]
366fn index(&self, range: ops::Range<usize>) -> &Wtf8 {
367if range.start <= range.end
368 && self.is_code_point_boundary(range.start)
369 && self.is_code_point_boundary(range.end)
370 {
371// SAFETY: is_code_point_boundary checks that the index is valid
372unsafe { slice_unchecked(self, range.start, range.end) }
373 } else {
374slice_error_fail(self, range.start, range.end)
375 }
376 }
377}
378379/// Returns a slice of the given string from byte `begin` to its end.
380///
381/// # Panics
382///
383/// Panics when `begin` is not at a code point boundary,
384/// or is beyond the end of the string.
385impl ops::Index<ops::RangeFrom<usize>> for Wtf8 {
386type Output = Wtf8;
387388#[inline]
389fn index(&self, range: ops::RangeFrom<usize>) -> &Wtf8 {
390if self.is_code_point_boundary(range.start) {
391// SAFETY: is_code_point_boundary checks that the index is valid
392unsafe { slice_unchecked(self, range.start, self.len()) }
393 } else {
394slice_error_fail(self, range.start, self.len())
395 }
396 }
397}
398399/// Returns a slice of the given string from its beginning to byte `end`.
400///
401/// # Panics
402///
403/// Panics when `end` is not at a code point boundary,
404/// or is beyond the end of the string.
405impl ops::Index<ops::RangeTo<usize>> for Wtf8 {
406type Output = Wtf8;
407408#[inline]
409fn index(&self, range: ops::RangeTo<usize>) -> &Wtf8 {
410if self.is_code_point_boundary(range.end) {
411// SAFETY: is_code_point_boundary checks that the index is valid
412unsafe { slice_unchecked(self, 0, range.end) }
413 } else {
414slice_error_fail(self, 0, range.end)
415 }
416 }
417}
418419impl ops::Index<ops::RangeFull> for Wtf8 {
420type Output = Wtf8;
421422#[inline]
423fn index(&self, _range: ops::RangeFull) -> &Wtf8 {
424self425 }
426}
427428#[inline]
429fn decode_surrogate(second_byte: u8, third_byte: u8) -> u16 {
430// The first byte is assumed to be 0xED
4310xD800 | (second_byteas u16 & 0x3F) << 6 | third_byteas u16 & 0x3F
432}
433434impl Wtf8 {
435/// Copied from str::is_char_boundary
436#[inline]
437pub fn is_code_point_boundary(&self, index: usize) -> bool {
438if index == 0 {
439return true;
440 }
441match self.bytes.get(index) {
442None => index == self.len(),
443Some(&b) => (bas i8) >= -0x40,
444 }
445 }
446447/// Verify that `index` is at the edge of either a valid UTF-8 codepoint
448 /// (i.e. a codepoint that's not a surrogate) or of the whole string.
449 ///
450 /// These are the cases currently permitted by `OsStr::self_encoded_bytes`.
451 /// Splitting between surrogates is valid as far as WTF-8 is concerned, but
452 /// we do not permit it in the public API because WTF-8 is considered an
453 /// implementation detail.
454#[track_caller]
455 #[inline]
456pub fn check_utf8_boundary(&self, index: usize) {
457if index == 0 {
458return;
459 }
460match self.bytes.get(index) {
461Some(0xED) => (), // Might be a surrogate
462Some(&b) if (bas i8) >= -0x40 => return,
463Some(_) => {
crate::panicking::panic_fmt(format_args!("byte index {0} is not a codepoint boundary",
index));
}panic!("byte index {index} is not a codepoint boundary"),
464Noneif index == self.len() => return,
465None => {
crate::panicking::panic_fmt(format_args!("byte index {0} is out of bounds",
index));
}panic!("byte index {index} is out of bounds"),
466 }
467if self.bytes[index + 1] >= 0xA0 {
468// There's a surrogate after index. Now check before index.
469if index >= 3 && self.bytes[index - 3] == 0xED && self.bytes[index - 2] >= 0xA0 {
470{
crate::panicking::panic_fmt(format_args!("byte index {0} lies between surrogate codepoints",
index));
};panic!("byte index {index} lies between surrogate codepoints");
471 }
472 }
473 }
474}
475476/// Copied from core::str::raw::slice_unchecked
477#[inline]
478unsafe fn slice_unchecked(s: &Wtf8, begin: usize, end: usize) -> &Wtf8 {
479// SAFETY: memory layout of a &[u8] and &Wtf8 are the same
480unsafe {
481let len = end - begin;
482let start = s.as_bytes().as_ptr().add(begin);
483Wtf8::from_bytes_unchecked(slice::from_raw_parts(start, len))
484 }
485}
486487#[inline(never)]
488fn slice_error_fail(s: &Wtf8, begin: usize, end: usize) -> ! {
489let len = s.len();
490if begin > len {
491{
crate::panicking::panic_fmt(format_args!("start byte index {0} is out of bounds for string of length {1}",
begin, len));
};panic!("start byte index {begin} is out of bounds for string of length {len}");
492 }
493if end > len {
494{
crate::panicking::panic_fmt(format_args!("end byte index {0} is out of bounds for string of length {1}",
end, len));
};panic!("end byte index {end} is out of bounds for string of length {len}");
495 }
496if begin > end {
497{
crate::panicking::panic_fmt(format_args!("byte range starts at {0} but ends at {1}",
begin, end));
};panic!("byte range starts at {begin} but ends at {end}");
498 }
499if !s.is_code_point_boundary(begin) {
500{
crate::panicking::panic_fmt(format_args!("byte index {0} is not a code point boundary",
begin));
};panic!("byte index {begin} is not a code point boundary");
501 }
502{
crate::panicking::panic_fmt(format_args!("byte index {0} is not a code point boundary",
end));
};panic!("byte index {end} is not a code point boundary");
503}
504505/// Iterator for the code points of a WTF-8 string.
506///
507/// Created with the method `.code_points()`.
508#[derive(#[automatically_derived]
impl<'a> crate::clone::Clone for Wtf8CodePoints<'a> {
#[inline]
fn clone(&self) -> Wtf8CodePoints<'a> {
Wtf8CodePoints { bytes: crate::clone::Clone::clone(&self.bytes) }
}
}Clone)]
509#[doc(hidden)]
510pub struct Wtf8CodePoints<'a> {
511 bytes: slice::Iter<'a, u8>,
512}
513514impl Iteratorfor Wtf8CodePoints<'_> {
515type Item = CodePoint;
516517#[inline]
518fn next(&mut self) -> Option<CodePoint> {
519// SAFETY: `self.bytes` has been created from a WTF-8 string
520unsafe { next_code_point(&mut self.bytes).map(|c| CodePoint::from_u32_unchecked(c)) }
521 }
522523#[inline]
524fn size_hint(&self) -> (usize, Option<usize>) {
525let len = self.bytes.len();
526 (len.saturating_add(3) / 4, Some(len))
527 }
528}
529530impl fmt::Debugfor Wtf8CodePoints<'_> {
531fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
532f.debug_tuple("Wtf8CodePoints")
533// SAFETY: We always leave the string in a valid state after each iteration.
534.field(&unsafe { Wtf8::from_bytes_unchecked(self.bytes.as_slice()) })
535 .finish()
536 }
537}
538539/// Generates a wide character sequence for potentially ill-formed UTF-16.
540#[stable(feature = "rust1", since = "1.0.0")]
541#[derive(#[automatically_derived]
#[stable(feature = "rust1", since = "1.0.0")]
impl<'a> crate::clone::Clone for EncodeWide<'a> {
#[inline]
fn clone(&self) -> EncodeWide<'a> {
EncodeWide {
code_points: crate::clone::Clone::clone(&self.code_points),
extra: crate::clone::Clone::clone(&self.extra),
}
}
}Clone)]
542#[doc(hidden)]
543pub struct EncodeWide<'a> {
544 code_points: Wtf8CodePoints<'a>,
545 extra: u16,
546}
547548// Copied from libunicode/u_str.rs
549#[stable(feature = "rust1", since = "1.0.0")]
550impl Iteratorfor EncodeWide<'_> {
551type Item = u16;
552553#[inline]
554fn next(&mut self) -> Option<u16> {
555if self.extra != 0 {
556let tmp = self.extra;
557self.extra = 0;
558return Some(tmp);
559 }
560561let mut buf = [0; char::MAX_LEN_UTF16];
562self.code_points.next().map(|code_point| {
563let n = encode_utf16_raw(code_point.to_u32(), &mut buf).len();
564if n == 2 {
565self.extra = buf[1];
566 }
567buf[0]
568 })
569 }
570571#[inline]
572fn size_hint(&self) -> (usize, Option<usize>) {
573let (low, high) = self.code_points.size_hint();
574let ext = (self.extra != 0) as usize;
575// every code point gets either one u16 or two u16,
576 // so this iterator is between 1 or 2 times as
577 // long as the underlying iterator.
578(low + ext, high.and_then(|n| n.checked_mul(2)).and_then(|n| n.checked_add(ext)))
579 }
580}
581582#[stable(feature = "encode_wide_fused_iterator", since = "1.62.0")]
583impl FusedIteratorfor EncodeWide<'_> {}
584585#[stable(feature = "encode_wide_debug", since = "1.92.0")]
586impl fmt::Debugfor EncodeWide<'_> {
587fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
588struct CodeUnit(u16);
589impl fmt::Debugfor CodeUnit {
590fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
591// This output attempts to balance readability with precision.
592 // Render characters which take only one WTF-16 code unit using
593 // `char` syntax and everything else as code units with hex
594 // integer syntax (including paired and unpaired surrogate
595 // halves). Since Rust has no `char`-like type for WTF-16, this
596 // isn't perfect, so if this output isn't suitable, it is open
597 // to being changed (see #140153).
598match char::from_u32(self.0 as u32) {
599Some(c) => f.write_fmt(format_args!("{0:?}", c))write!(f, "{c:?}"),
600None => f.write_fmt(format_args!("0x{0:04X}", self.0))write!(f, "0x{:04X}", self.0),
601 }
602 }
603 }
604605f.write_fmt(format_args!("EncodeWide("))write!(f, "EncodeWide(")?;
606f.debug_list().entries(self.clone().map(CodeUnit)).finish()?;
607f.write_fmt(format_args!(")"))write!(f, ")")?;
608Ok(())
609 }
610}
611612impl Hashfor CodePoint {
613#[inline]
614fn hash<H: Hasher>(&self, state: &mut H) {
615self.0.hash(state)
616 }
617}
618619impl Hashfor Wtf8 {
620#[inline]
621fn hash<H: Hasher>(&self, state: &mut H) {
622state.write(&self.bytes);
6230xfeu8.hash(state)
624 }
625}
626627#[unstable(feature = "clone_to_uninit", issue = "126799")]
628unsafe impl CloneToUninitfor Wtf8 {
629#[inline]
630 #[cfg_attr(debug_assertions, track_caller)]
631unsafe fn clone_to_uninit(&self, dst: *mut u8) {
632// SAFETY: we're just a transparent wrapper around [u8]
633unsafe { self.bytes.clone_to_uninit(dst) }
634 }
635}