1//! Character conversions.
23use crate::char::TryFromCharError;
4use crate::error::Error;
5use crate::fmt;
6use crate::mem::transmute;
7use crate::str::FromStr;
8use crate::ub_checks::assert_unsafe_precondition;
910/// Converts a `u32` to a `char`. See [`char::from_u32`].
11#[must_use]
12#[inline]
13pub(super) const fn from_u32(i: u32) -> Option<char> {
14// FIXME(const-hack): once Result::ok is const fn, use it here
15match char_try_from_u32(i) {
16Ok(c) => Some(c),
17Err(_) => None,
18 }
19}
2021/// Converts a `u32` to a `char`, ignoring validity. See [`char::from_u32_unchecked`].
22#[inline]
23#[must_use]
24#[allow(unnecessary_transmutes)]
25#[track_caller]
26pub(super) const unsafe fn from_u32_unchecked(i: u32) -> char {
27// SAFETY: the caller must guarantee that `i` is a valid char value.
28unsafe {
29{
#[rustc_no_mir_inline]
#[inline]
#[rustc_nounwind]
#[track_caller]
const fn precondition_check(i: u32) {
if !char_try_from_u32(i).is_ok() {
let msg =
"unsafe precondition(s) violated: invalid value for `char`\n\nThis indicates a bug in the program. This Undefined Behavior check is optional, and cannot be relied on for safety.";
::core::panicking::panic_nounwind_fmt(::core::fmt::Arguments::from_str(msg),
false);
}
}
if ::core::ub_checks::check_language_ub() { precondition_check(i); }
};assert_unsafe_precondition!(
30 check_language_ub,
31"invalid value for `char`",
32 (i: u32 = i) => char_try_from_u32(i).is_ok()
33 );
34transmute(i)
35 }
36}
3738#[stable(feature = "char_convert", since = "1.13.0")]
39#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
40impl const From<char> for u32 {
41/// Converts a [`char`] into a [`u32`].
42 ///
43 /// # Examples
44 ///
45 /// ```
46 /// let c = 'c';
47 /// let u = u32::from(c);
48 ///
49 /// assert!(4 == size_of_val(&u))
50 /// ```
51#[inline]
52fn from(c: char) -> Self {
53cas u3254 }
55}
5657#[stable(feature = "more_char_conversions", since = "1.51.0")]
58#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
59impl const From<char> for u64 {
60/// Converts a [`char`] into a [`u64`].
61 ///
62 /// # Examples
63 ///
64 /// ```
65 /// let c = '👤';
66 /// let u = u64::from(c);
67 ///
68 /// assert!(8 == size_of_val(&u))
69 /// ```
70#[inline]
71fn from(c: char) -> Self {
72// The char is casted to the value of the code point, then zero-extended to 64 bit.
73 // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
74cas u6475 }
76}
7778#[stable(feature = "more_char_conversions", since = "1.51.0")]
79#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
80impl const From<char> for u128 {
81/// Converts a [`char`] into a [`u128`].
82 ///
83 /// # Examples
84 ///
85 /// ```
86 /// let c = '⚙';
87 /// let u = u128::from(c);
88 ///
89 /// assert!(16 == size_of_val(&u))
90 /// ```
91#[inline]
92fn from(c: char) -> Self {
93// The char is casted to the value of the code point, then zero-extended to 128 bit.
94 // See [https://doc.rust-lang.org/reference/expressions/operator-expr.html#semantics]
95cas u12896 }
97}
9899/// Maps a `char` with a code point from U+0000 to U+00FF (inclusive) to a byte in `0x00..=0xFF` with
100/// the same value, failing if the code point is greater than U+00FF.
101///
102/// See [`impl From<u8> for char`](char#impl-From<u8>-for-char) for details on the encoding.
103#[stable(feature = "u8_from_char", since = "1.59.0")]
104#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
105impl const TryFrom<char> for u8 {
106type Error = TryFromCharError;
107108/// Tries to convert a [`char`] into a [`u8`].
109 ///
110 /// # Examples
111 ///
112 /// ```
113 /// let a = 'ÿ'; // U+00FF
114 /// let b = 'Ā'; // U+0100
115 ///
116 /// assert_eq!(u8::try_from(a), Ok(0xFF_u8));
117 /// assert!(u8::try_from(b).is_err());
118 /// ```
119#[inline]
120fn try_from(c: char) -> Result<u8, Self::Error> {
121// FIXME(const-hack): this should use map_err instead
122match u8::try_from(u32::from(c)) {
123Ok(b) => Ok(b),
124Err(_) => Err(TryFromCharError(())),
125 }
126 }
127}
128129/// Maps a `char` with a code point from U+0000 to U+FFFF (inclusive) to a `u16` in `0x0000..=0xFFFF`
130/// with the same value, failing if the code point is greater than U+FFFF.
131///
132/// This corresponds to the UCS-2 encoding, as specified in ISO/IEC 10646:2003.
133#[stable(feature = "u16_from_char", since = "1.74.0")]
134#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
135impl const TryFrom<char> for u16 {
136type Error = TryFromCharError;
137138/// Tries to convert a [`char`] into a [`u16`].
139 ///
140 /// # Examples
141 ///
142 /// ```
143 /// let trans_rights = '⚧'; // U+26A7
144 /// let ninjas = '🥷'; // U+1F977
145 ///
146 /// assert_eq!(u16::try_from(trans_rights), Ok(0x26A7_u16));
147 /// assert!(u16::try_from(ninjas).is_err());
148 /// ```
149#[inline]
150fn try_from(c: char) -> Result<u16, Self::Error> {
151// FIXME(const-hack): this should use map_err instead
152match u16::try_from(u32::from(c)) {
153Ok(x) => Ok(x),
154Err(_) => Err(TryFromCharError(())),
155 }
156 }
157}
158159/// Maps a `char` with a code point from U+0000 to U+10FFFF (inclusive) to a `usize` in
160/// `0x0000..=0x10FFFF` with the same value, failing if the final value is unrepresentable by
161/// `usize`.
162///
163/// Generally speaking, this conversion can be seen as obtaining the character's corresponding
164/// UTF-32 code point to the extent representable by pointer addresses.
165#[stable(feature = "usize_try_from_char", since = "1.94.0")]
166#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
167impl const TryFrom<char> for usize {
168type Error = TryFromCharError;
169170/// Tries to convert a [`char`] into a [`usize`].
171 ///
172 /// # Examples
173 ///
174 /// ```
175 /// let a = '\u{FFFF}'; // Always succeeds.
176 /// let b = '\u{10FFFF}'; // Conditionally succeeds.
177 ///
178 /// assert_eq!(usize::try_from(a), Ok(0xFFFF));
179 ///
180 /// if size_of::<usize>() >= size_of::<u32>() {
181 /// assert_eq!(usize::try_from(b), Ok(0x10FFFF));
182 /// } else {
183 /// assert!(matches!(usize::try_from(b), Err(_)));
184 /// }
185 /// ```
186#[inline]
187fn try_from(c: char) -> Result<usize, Self::Error> {
188// FIXME(const-hack): this should use map_err instead
189match usize::try_from(u32::from(c)) {
190Ok(x) => Ok(x),
191Err(_) => Err(TryFromCharError(())),
192 }
193 }
194}
195196/// Maps a byte in `0x00..=0xFF` to a `char` whose code point has the same value from U+0000 to U+00FF
197/// (inclusive).
198///
199/// Unicode is designed such that this effectively decodes bytes
200/// with the character encoding that IANA calls ISO-8859-1.
201/// This encoding is compatible with ASCII.
202///
203/// Note that this is different from ISO/IEC 8859-1 a.k.a. ISO 8859-1 (with one less hyphen),
204/// which leaves some "blanks", byte values that are not assigned to any character.
205/// ISO-8859-1 (the IANA one) assigns them to the C0 and C1 control codes.
206///
207/// Note that this is *also* different from Windows-1252 a.k.a. code page 1252,
208/// which is a superset ISO/IEC 8859-1 that assigns some (not all!) blanks
209/// to punctuation and various Latin characters.
210///
211/// To confuse things further, [on the Web](https://encoding.spec.whatwg.org/)
212/// `ascii`, `iso-8859-1`, and `windows-1252` are all aliases
213/// for a superset of Windows-1252 that fills the remaining blanks with corresponding
214/// C0 and C1 control codes.
215#[stable(feature = "char_convert", since = "1.13.0")]
216#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
217impl const From<u8> for char {
218/// Converts a [`u8`] into a [`char`].
219 ///
220 /// # Examples
221 ///
222 /// ```
223 /// let u = 32 as u8;
224 /// let c = char::from(u);
225 ///
226 /// assert!(4 == size_of_val(&c))
227 /// ```
228#[inline]
229fn from(i: u8) -> Self {
230ias char231 }
232}
233234/// An error which can be returned when parsing a char.
235///
236/// This `struct` is created when using the [`char::from_str`] method.
237#[stable(feature = "char_from_str", since = "1.20.0")]
238#[derive(#[automatically_derived]
#[stable(feature = "char_from_str", since = "1.20.0")]
impl crate::clone::Clone for ParseCharError {
#[inline]
fn clone(&self) -> ParseCharError {
ParseCharError { kind: crate::clone::Clone::clone(&self.kind) }
}
}Clone, #[automatically_derived]
#[stable(feature = "char_from_str", since = "1.20.0")]
impl crate::fmt::Debug for ParseCharError {
#[inline]
fn fmt(&self, f: &mut crate::fmt::Formatter) -> crate::fmt::Result {
crate::fmt::Formatter::debug_struct_field1_finish(f, "ParseCharError",
"kind", &&self.kind)
}
}Debug, #[automatically_derived]
#[stable(feature = "char_from_str", since = "1.20.0")]
impl crate::cmp::PartialEq for ParseCharError {
#[inline]
fn eq(&self, other: &ParseCharError) -> bool { self.kind == other.kind }
}PartialEq, #[automatically_derived]
#[stable(feature = "char_from_str", since = "1.20.0")]
impl crate::cmp::Eq for ParseCharError {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_fields_are_eq(&self) {
let _: crate::cmp::AssertParamIsEq<CharErrorKind>;
}
}Eq)]
239pub struct ParseCharError {
240 kind: CharErrorKind,
241}
242243#[derive(#[automatically_derived]
impl crate::marker::Copy for CharErrorKind { }Copy, #[automatically_derived]
impl crate::clone::Clone for CharErrorKind {
#[inline]
fn clone(&self) -> CharErrorKind { *self }
}Clone, #[automatically_derived]
impl crate::fmt::Debug for CharErrorKind {
#[inline]
fn fmt(&self, f: &mut crate::fmt::Formatter) -> crate::fmt::Result {
crate::fmt::Formatter::write_str(f,
match self {
CharErrorKind::EmptyString => "EmptyString",
CharErrorKind::TooManyChars => "TooManyChars",
})
}
}Debug, #[automatically_derived]
impl crate::cmp::PartialEq for CharErrorKind {
#[inline]
fn eq(&self, other: &CharErrorKind) -> bool {
let __self_discr = crate::intrinsics::discriminant_value(self);
let __arg1_discr = crate::intrinsics::discriminant_value(other);
__self_discr == __arg1_discr
}
}PartialEq, #[automatically_derived]
impl crate::cmp::Eq for CharErrorKind {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_fields_are_eq(&self) {}
}Eq)]
244enum CharErrorKind {
245 EmptyString,
246 TooManyChars,
247}
248249#[stable(feature = "char_from_str", since = "1.20.0")]
250impl Errorfor ParseCharError {}
251252#[stable(feature = "char_from_str", since = "1.20.0")]
253impl fmt::Displayfor ParseCharError {
254fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
255match self.kind {
256 CharErrorKind::EmptyString => "cannot parse char from empty string",
257 CharErrorKind::TooManyChars => "too many characters in string",
258 }
259 .fmt(f)
260 }
261}
262263#[stable(feature = "char_from_str", since = "1.20.0")]
264impl FromStrfor char {
265type Err = ParseCharError;
266267#[inline]
268fn from_str(s: &str) -> Result<Self, Self::Err> {
269let mut chars = s.chars();
270match (chars.next(), chars.next()) {
271 (None, _) => Err(ParseCharError { kind: CharErrorKind::EmptyString }),
272 (Some(c), None) => Ok(c),
273_ => Err(ParseCharError { kind: CharErrorKind::TooManyChars }),
274 }
275 }
276}
277278#[inline]
279#[allow(unnecessary_transmutes)]
280const fn char_try_from_u32(i: u32) -> Result<char, CharTryFromError> {
281// This is an optimized version of the check
282 // (i > MAX as u32) || (i >= 0xD800 && i <= 0xDFFF),
283 // which can also be written as
284 // i >= 0x110000 || (i >= 0xD800 && i < 0xE000).
285 //
286 // The XOR with 0xD800 permutes the ranges such that 0xD800..0xE000 is
287 // mapped to 0x0000..0x0800, while keeping all the high bits outside 0xFFFF the same.
288 // In particular, numbers >= 0x110000 stay in this range.
289 //
290 // Subtracting 0x800 causes 0x0000..0x0800 to wrap, meaning that a single
291 // unsigned comparison against 0x110000 - 0x800 will detect both the wrapped
292 // surrogate range as well as the numbers originally larger than 0x110000.
293if (i ^ 0xD800).wrapping_sub(0x800) >= 0x110000 - 0x800 {
294Err(CharTryFromError(()))
295 } else {
296// SAFETY: checked that it's a legal unicode value
297Ok(unsafe { transmute(i) })
298 }
299}
300301#[stable(feature = "try_from", since = "1.34.0")]
302#[rustc_const_unstable(feature = "const_convert", issue = "143773")]
303impl const TryFrom<u32> for char {
304type Error = CharTryFromError;
305306#[inline]
307fn try_from(i: u32) -> Result<Self, Self::Error> {
308char_try_from_u32(i)
309 }
310}
311312/// The error type returned when a conversion from [`prim@u32`] to [`prim@char`] fails.
313///
314/// This `struct` is created by the [`char::try_from<u32>`](char#impl-TryFrom<u32>-for-char) method.
315/// See its documentation for more.
316#[stable(feature = "try_from", since = "1.34.0")]
317#[derive(#[automatically_derived]
#[stable(feature = "try_from", since = "1.34.0")]
impl crate::marker::Copy for CharTryFromError { }Copy, #[automatically_derived]
#[stable(feature = "try_from", since = "1.34.0")]
impl crate::clone::Clone for CharTryFromError {
#[inline]
fn clone(&self) -> CharTryFromError {
let _: crate::clone::AssertParamIsClone<()>;
*self
}
}Clone, #[automatically_derived]
#[stable(feature = "try_from", since = "1.34.0")]
impl crate::fmt::Debug for CharTryFromError {
#[inline]
fn fmt(&self, f: &mut crate::fmt::Formatter) -> crate::fmt::Result {
crate::fmt::Formatter::debug_tuple_field1_finish(f,
"CharTryFromError", &&self.0)
}
}Debug, #[automatically_derived]
#[stable(feature = "try_from", since = "1.34.0")]
impl crate::cmp::PartialEq for CharTryFromError {
#[inline]
fn eq(&self, other: &CharTryFromError) -> bool { self.0 == other.0 }
}PartialEq, #[automatically_derived]
#[stable(feature = "try_from", since = "1.34.0")]
impl crate::cmp::Eq for CharTryFromError {
#[inline]
#[doc(hidden)]
#[coverage(off)]
fn assert_fields_are_eq(&self) { let _: crate::cmp::AssertParamIsEq<()>; }
}Eq)]
318pub struct CharTryFromError(());
319320#[stable(feature = "try_from", since = "1.34.0")]
321impl fmt::Displayfor CharTryFromError {
322fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
323"converted integer out of range for `char`".fmt(f)
324 }
325}
326327/// Converts a digit in the given radix to a `char`. See [`char::from_digit`].
328#[inline]
329#[must_use]
330pub(super) const fn from_digit(num: u32, radix: u32) -> Option<char> {
331if radix > 36 {
332{
crate::panicking::panic_fmt(format_args!("from_digit: radix is too high (maximum 36)"));
};panic!("from_digit: radix is too high (maximum 36)");
333 }
334if num < radix {
335let num = numas u8;
336if num < 10 { Some((b'0' + num) as char) } else { Some((b'a' + num - 10) as char) }
337 } else {
338None339 }
340}