core/char/
convert.rs

1//! Character conversions.
2
3use crate::char::TryFromCharError;
4use crate::error::Error;
5use crate::fmt;
6use crate::mem::transmute;
7use crate::str::FromStr;
8use crate::ub_checks::assert_unsafe_precondition;
9
10/// Converts a `u32` to a `char`. See [`char::from_u32`].
11#[must_use]
12#[inline]
13pub(super) const fn from_u32(i: u32) -> Option<char> {
14    // FIXME(const-hack): once Result::ok is const fn, use it here
15    match char_try_from_u32(i) {
16        Ok(c) => Some(c),
17        Err(_) => None,
18    }
19}
20
21/// Converts a `u32` to a `char`, ignoring validity. See [`char::from_u32_unchecked`].
22#[inline]
23#[must_use]
24#[allow(unnecessary_transmutes)]
25#[track_caller]
26pub(super) const unsafe fn from_u32_unchecked(i: u32) -> char {
27    // SAFETY: the caller must guarantee that `i` is a valid char value.
28    unsafe {
29        assert_unsafe_precondition!(
30            check_language_ub,
31            "invalid value for `char`",
32            (i: u32 = i) => char_try_from_u32(i).is_ok()
33        );
34        transmute(i)
35    }
36}
37
38#[stable(feature = "char_convert", since = "1.13.0")]
39#[rustc_const_unstable(feature = "const_try", issue = "74935")]
40impl const From<char> for u32 {
41    /// Converts a [`char`] into a [`u32`].
42    ///
43    /// # Examples
44    ///
45    /// ```
46    /// let c = 'c';
47    /// let u = u32::from(c);
48    /// assert!(4 == size_of_val(&u))
49    /// ```
50    #[inline]
51    fn from(c: char) -> Self {
52        c as u32
53    }
54}
55
56#[stable(feature = "more_char_conversions", since = "1.51.0")]
57#[rustc_const_unstable(feature = "const_try", issue = "74935")]
58impl const From<char> for u64 {
59    /// Converts a [`char`] into a [`u64`].
60    ///
61    /// # Examples
62    ///
63    /// ```
64    /// let c = '👤';
65    /// let u = u64::from(c);
66    /// assert!(8 == size_of_val(&u))
67    /// ```
68    #[inline]
69    fn from(c: char) -> Self {
70        // The char is casted to the value of the code point, then zero-extended to 64 bit.
71        // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
72        c as u64
73    }
74}
75
76#[stable(feature = "more_char_conversions", since = "1.51.0")]
77#[rustc_const_unstable(feature = "const_try", issue = "74935")]
78impl const From<char> for u128 {
79    /// Converts a [`char`] into a [`u128`].
80    ///
81    /// # Examples
82    ///
83    /// ```
84    /// let c = '⚙';
85    /// let u = u128::from(c);
86    /// assert!(16 == size_of_val(&u))
87    /// ```
88    #[inline]
89    fn from(c: char) -> Self {
90        // The char is casted to the value of the code point, then zero-extended to 128 bit.
91        // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
92        c as u128
93    }
94}
95
96/// Maps a `char` with code point in U+0000..=U+00FF to a byte in 0x00..=0xFF with same value,
97/// failing if the code point is greater than U+00FF.
98///
99/// See [`impl From<u8> for char`](char#impl-From<u8>-for-char) for details on the encoding.
100#[stable(feature = "u8_from_char", since = "1.59.0")]
101impl TryFrom<char> for u8 {
102    type Error = TryFromCharError;
103
104    /// Tries to convert a [`char`] into a [`u8`].
105    ///
106    /// # Examples
107    ///
108    /// ```
109    /// let a = 'ÿ'; // U+00FF
110    /// let b = 'Ā'; // U+0100
111    /// assert_eq!(u8::try_from(a), Ok(0xFF_u8));
112    /// assert!(u8::try_from(b).is_err());
113    /// ```
114    #[inline]
115    fn try_from(c: char) -> Result<u8, Self::Error> {
116        u8::try_from(u32::from(c)).map_err(|_| TryFromCharError(()))
117    }
118}
119
120/// Maps a `char` with code point in U+0000..=U+FFFF to a `u16` in 0x0000..=0xFFFF with same value,
121/// failing if the code point is greater than U+FFFF.
122///
123/// This corresponds to the UCS-2 encoding, as specified in ISO/IEC 10646:2003.
124#[stable(feature = "u16_from_char", since = "1.74.0")]
125impl TryFrom<char> for u16 {
126    type Error = TryFromCharError;
127
128    /// Tries to convert a [`char`] into a [`u16`].
129    ///
130    /// # Examples
131    ///
132    /// ```
133    /// let trans_rights = '⚧'; // U+26A7
134    /// let ninjas = '🥷'; // U+1F977
135    /// assert_eq!(u16::try_from(trans_rights), Ok(0x26A7_u16));
136    /// assert!(u16::try_from(ninjas).is_err());
137    /// ```
138    #[inline]
139    fn try_from(c: char) -> Result<u16, Self::Error> {
140        u16::try_from(u32::from(c)).map_err(|_| TryFromCharError(()))
141    }
142}
143
144/// Maps a byte in 0x00..=0xFF to a `char` whose code point has the same value, in U+0000..=U+00FF.
145///
146/// Unicode is designed such that this effectively decodes bytes
147/// with the character encoding that IANA calls ISO-8859-1.
148/// This encoding is compatible with ASCII.
149///
150/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen),
151/// which leaves some "blanks", byte values that are not assigned to any character.
152/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
153///
154/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
155/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
156/// to punctuation and various Latin characters.
157///
158/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
159/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
160/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
161/// C0 and C1 control codes.
162#[stable(feature = "char_convert", since = "1.13.0")]
163#[rustc_const_unstable(feature = "const_try", issue = "74935")]
164impl const From<u8> for char {
165    /// Converts a [`u8`] into a [`char`].
166    ///
167    /// # Examples
168    ///
169    /// ```
170    /// let u = 32 as u8;
171    /// let c = char::from(u);
172    /// assert!(4 == size_of_val(&c))
173    /// ```
174    #[inline]
175    fn from(i: u8) -> Self {
176        i as char
177    }
178}
179
180/// An error which can be returned when parsing a char.
181///
182/// This `struct` is created when using the [`char::from_str`] method.
183#[stable(feature = "char_from_str", since = "1.20.0")]
184#[derive(Clone, Debug, PartialEq, Eq)]
185pub struct ParseCharError {
186    kind: CharErrorKind,
187}
188
189#[derive(Copy, Clone, Debug, PartialEq, Eq)]
190enum CharErrorKind {
191    EmptyString,
192    TooManyChars,
193}
194
195#[stable(feature = "char_from_str", since = "1.20.0")]
196impl Error for ParseCharError {
197    #[allow(deprecated)]
198    fn description(&self) -> &str {
199        match self.kind {
200            CharErrorKind::EmptyString => "cannot parse char from empty string",
201            CharErrorKind::TooManyChars => "too many characters in string",
202        }
203    }
204}
205
206#[stable(feature = "char_from_str", since = "1.20.0")]
207impl fmt::Display for ParseCharError {
208    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
209        #[allow(deprecated)]
210        self.description().fmt(f)
211    }
212}
213
214#[stable(feature = "char_from_str", since = "1.20.0")]
215impl FromStr for char {
216    type Err = ParseCharError;
217
218    #[inline]
219    fn from_str(s: &str) -> Result<Self, Self::Err> {
220        let mut chars = s.chars();
221        match (chars.next(), chars.next()) {
222            (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }),
223            (Some(c), None) => Ok(c),
224            _ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }),
225        }
226    }
227}
228
229#[inline]
230#[allow(unnecessary_transmutes)]
231const fn char_try_from_u32(i: u32) -> Result<char, CharTryFromError> {
232    // This is an optimized version of the check
233    // (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF),
234    // which can also be written as
235    // i >= 0x110000 || (i >= 0xD800 && i < 0xE000).
236    //
237    // The XOR with 0xD800 permutes the ranges such that 0xD800..0xE000 is
238    // mapped to 0x0000..0x0800, while keeping all the high bits outside 0xFFFF the same.
239    // In particular, numbers >= 0x110000 stay in this range.
240    //
241    // Subtracting 0x800 causes 0x0000..0x0800 to wrap, meaning that a single
242    // unsigned comparison against 0x110000 - 0x800 will detect both the wrapped
243    // surrogate range as well as the numbers originally larger than 0x110000.
244    //
245    if (i ^ 0xD800).wrapping_sub(0x800) >= 0x110000 - 0x800 {
246        Err(CharTryFromError(()))
247    } else {
248        // SAFETY: checked that it's a legal unicode value
249        Ok(unsafe { transmute(i) })
250    }
251}
252
253#[stable(feature = "try_from", since = "1.34.0")]
254#[rustc_const_unstable(feature = "const_try", issue = "74935")]
255impl const TryFrom<u32> for char {
256    type Error = CharTryFromError;
257
258    #[inline]
259    fn try_from(i: u32) -> Result<Self, Self::Error> {
260        char_try_from_u32(i)
261    }
262}
263
264/// The error type returned when a conversion from [`prim@u32`] to [`prim@char`] fails.
265///
266/// This `struct` is created by the [`char::try_from<u32>`](char#impl-TryFrom<u32>-for-char) method.
267/// See its documentation for more.
268#[stable(feature = "try_from", since = "1.34.0")]
269#[derive(Copy, Clone, Debug, PartialEq, Eq)]
270pub struct CharTryFromError(());
271
272#[stable(feature = "try_from", since = "1.34.0")]
273impl fmt::Display for CharTryFromError {
274    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
275        "converted integer out of range for `char`".fmt(f)
276    }
277}
278
279/// Converts a digit in the given radix to a `char`. See [`char::from_digit`].
280#[inline]
281#[must_use]
282pub(super) const fn from_digit(num: u32, radix: u32) -> Option<char> {
283    if radix > 36 {
284        panic!("from_digit: radix is too high (maximum 36)");
285    }
286    if num < radix {
287        let num = num as u8;
288        if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) }
289    } else {
290        None
291    }
292}