Skip to main content

core/num/imp/dec2flt/
parse.rs

1//! Functions to parse floating-point numbers.
2
3use dec2flt::common::{ByteSlice, is_8digits};
4use dec2flt::decimal::Decimal;
5
6use crate::num::imp::{Float, dec2flt};
7
8const MIN_19DIGIT_INT: u64 = 100_0000_0000_0000_0000;
9
10/// Parse 8 digits, loaded as bytes in little-endian order.
11///
12/// This uses the trick where every digit is in [0x030, 0x39],
13/// and therefore can be parsed in 3 multiplications, much
14/// faster than the normal 8.
15///
16/// This is based off the algorithm described in "Fast numeric string to
17/// int", available here: <https://johnnylee-sde.github.io/Fast-numeric-string-to-int/>.
18fn parse_8digits(mut v: u64) -> u64 {
19    const MASK: u64 = 0x0000_00FF_0000_00FF;
20    const MUL1: u64 = 0x000F_4240_0000_0064;
21    const MUL2: u64 = 0x0000_2710_0000_0001;
22    v -= 0x3030_3030_3030_3030;
23    v = (v * 10) + (v >> 8); // will not overflow, fits in 63 bits
24    let v1 = (v & MASK).wrapping_mul(MUL1);
25    let v2 = ((v >> 16) & MASK).wrapping_mul(MUL2);
26    ((v1.wrapping_add(v2) >> 32) as u32) as u64
27}
28
29/// Parse digits until a non-digit character is found.
30fn try_parse_digits(mut s: &[u8], mut x: u64) -> (&[u8], u64) {
31    // may cause overflows, to be handled later
32
33    while s.len() >= 8 {
34        let num = s.read_u64();
35        if is_8digits(num) {
36            x = x.wrapping_mul(1_0000_0000).wrapping_add(parse_8digits(num));
37            s = &s[8..];
38        } else {
39            break;
40        }
41    }
42
43    s = s.parse_digits(|digit| {
44        x = x.wrapping_mul(10).wrapping_add(digit as _);
45    });
46
47    (s, x)
48}
49
50/// Parse up to 19 digits (the max that can be stored in a 64-bit integer).
51fn try_parse_19digits(s_ref: &mut &[u8], x: &mut u64) {
52    let mut s = *s_ref;
53
54    while *x < MIN_19DIGIT_INT {
55        if let Some((c, s_next)) = s.split_first() {
56            let digit = c.wrapping_sub(b'0');
57
58            if digit < 10 {
59                *x = (*x * 10) + digit as u64; // no overflows here
60                s = s_next;
61            } else {
62                break;
63            }
64        } else {
65            break;
66        }
67    }
68
69    *s_ref = s;
70}
71
72/// Parse the scientific notation component of a float.
73fn parse_scientific(s_ref: &mut &[u8]) -> Option<i64> {
74    let mut exponent = 0i64;
75    let mut negative = false;
76
77    let mut s = *s_ref;
78
79    if let Some((&c, s_next)) = s.split_first() {
80        negative = c == b'-';
81        if c == b'-' || c == b'+' {
82            s = s_next;
83        }
84    }
85
86    if #[allow(non_exhaustive_omitted_patterns)] match s.first() {
    Some(&x) if x.is_ascii_digit() => true,
    _ => false,
}matches!(s.first(), Some(&x) if x.is_ascii_digit()) {
87        *s_ref = s.parse_digits(|digit| {
88            // no overflows here, saturate well before overflow
89            if exponent < 0x10000 {
90                exponent = 10 * exponent + digit as i64;
91            }
92        });
93        if negative { Some(-exponent) } else { Some(exponent) }
94    } else {
95        *s_ref = s;
96        None
97    }
98}
99
100/// Parse a partial, non-special floating point number.
101///
102/// This creates a representation of the float as the
103/// significant digits and the decimal exponent.
104fn parse_partial_number(mut s: &[u8]) -> Option<(Decimal, usize)> {
105    if true {
    if !!s.is_empty() {
        crate::panicking::panic("assertion failed: !s.is_empty()")
    };
};debug_assert!(!s.is_empty());
106
107    // parse initial digits before dot
108    let mut mantissa = 0_u64;
109    let start = s;
110    let tmp = try_parse_digits(s, mantissa);
111    s = tmp.0;
112    mantissa = tmp.1;
113    let mut n_digits = s.offset_from(start);
114
115    // handle dot with the following digits
116    let mut n_after_dot = 0;
117    let mut exponent = 0_i64;
118    let int_end = s;
119
120    if let Some((&b'.', s_next)) = s.split_first() {
121        s = s_next;
122        let before = s;
123        let tmp = try_parse_digits(s, mantissa);
124        s = tmp.0;
125        mantissa = tmp.1;
126        n_after_dot = s.offset_from(before);
127        exponent = -n_after_dot as i64;
128    }
129
130    n_digits += n_after_dot;
131    if n_digits == 0 {
132        return None;
133    }
134
135    // handle scientific format
136    let mut exp_number = 0_i64;
137    if let Some((&c, s_next)) = s.split_first() {
138        if c == b'e' || c == b'E' {
139            s = s_next;
140            // If None, we have no trailing digits after exponent, or an invalid float.
141            exp_number = parse_scientific(&mut s)?;
142            exponent += exp_number;
143        }
144    }
145
146    let len = s.offset_from(start) as _;
147
148    // handle uncommon case with many digits
149    if n_digits <= 19 {
150        return Some((Decimal { exponent, mantissa, negative: false, many_digits: false }, len));
151    }
152
153    n_digits -= 19;
154    let mut many_digits = false;
155    let mut p = start;
156    while let Some((&c, p_next)) = p.split_first() {
157        if c == b'.' || c == b'0' {
158            n_digits -= c.saturating_sub(b'0' - 1) as isize;
159            p = p_next;
160        } else {
161            break;
162        }
163    }
164    if n_digits > 0 {
165        // at this point we have more than 19 significant digits, let's try again
166        many_digits = true;
167        mantissa = 0;
168        let mut s = start;
169        try_parse_19digits(&mut s, &mut mantissa);
170        exponent = if mantissa >= MIN_19DIGIT_INT {
171            // big int
172            int_end.offset_from(s)
173        } else {
174            s = &s[1..];
175            let before = s;
176            try_parse_19digits(&mut s, &mut mantissa);
177            -s.offset_from(before)
178        } as i64;
179        // add back the explicit part
180        exponent += exp_number;
181    }
182
183    Some((Decimal { exponent, mantissa, negative: false, many_digits }, len))
184}
185
186/// Try to parse a non-special floating point number,
187/// as well as two slices with integer and fractional parts
188/// and the parsed exponent.
189pub fn parse_number(s: &[u8]) -> Option<Decimal> {
190    if let Some((float, rest)) = parse_partial_number(s) {
191        if rest == s.len() {
192            return Some(float);
193        }
194    }
195    None
196}
197
198/// Try to parse a special, non-finite float.
199pub(crate) fn parse_inf_nan<F: Float>(s: &[u8], negative: bool) -> Option<F> {
200    // Since a valid string has at most the length 8, we can load
201    // all relevant characters into a u64 and work from there.
202    // This also generates much better code.
203
204    let mut register;
205    let len: usize;
206
207    // All valid strings are either of length 8 or 3.
208    if s.len() == 8 {
209        register = s.read_u64();
210        len = 8;
211    } else if s.len() == 3 {
212        let a = s[0] as u64;
213        let b = s[1] as u64;
214        let c = s[2] as u64;
215        register = (c << 16) | (b << 8) | a;
216        len = 3;
217    } else {
218        return None;
219    }
220
221    // Clear out the bits which turn ASCII uppercase characters into
222    // lowercase characters. The resulting string is all uppercase.
223    // What happens to other characters is irrelevant.
224    register &= 0xDFDFDFDFDFDFDFDF;
225
226    // u64 values corresponding to relevant cases
227    const INF_3: u64 = 0x464E49; // "INF"
228    const INF_8: u64 = 0x5954494E49464E49; // "INFINITY"
229    const NAN: u64 = 0x4E414E; // "NAN"
230
231    // Match register value to constant to parse string.
232    // Also match on the string length to catch edge cases
233    // like "inf\0\0\0\0\0".
234    let float = match (register, len) {
235        (INF_3, 3) => F::INFINITY,
236        (INF_8, 8) => F::INFINITY,
237        (NAN, 3) => F::NAN,
238        _ => return None,
239    };
240
241    if negative { Some(-float) } else { Some(float) }
242}