core/num/imp/dec2flt/
decimal_seq.rs

1//! Arbitrary-precision decimal type used by fallback algorithms.
2//!
3//! This is only used if the fast-path (native floats) and
4//! the Eisel-Lemire algorithm are unable to unambiguously
5//! determine the float.
6//!
7//! The technique used is "Simple Decimal Conversion", developed
8//! by Nigel Tao and Ken Thompson. A detailed description of the
9//! algorithm can be found in "ParseNumberF64 by Simple Decimal Conversion",
10//! available online: <https://nigeltao.github.io/blog/2020/parse-number-f64-simple.html>.
11
12use dec2flt::common::{ByteSlice, is_8digits};
13
14use crate::num::imp::dec2flt;
15
16/// A decimal floating-point number, represented as a sequence of decimal digits.
17#[derive(#[automatically_derived]
impl crate::clone::Clone for DecimalSeq {
    #[inline]
    fn clone(&self) -> DecimalSeq {
        DecimalSeq {
            num_digits: crate::clone::Clone::clone(&self.num_digits),
            decimal_point: crate::clone::Clone::clone(&self.decimal_point),
            truncated: crate::clone::Clone::clone(&self.truncated),
            digits: crate::clone::Clone::clone(&self.digits),
        }
    }
}Clone, #[automatically_derived]
impl crate::fmt::Debug for DecimalSeq {
    #[inline]
    fn fmt(&self, f: &mut crate::fmt::Formatter) -> crate::fmt::Result {
        crate::fmt::Formatter::debug_struct_field4_finish(f, "DecimalSeq",
            "num_digits", &self.num_digits, "decimal_point",
            &self.decimal_point, "truncated", &self.truncated, "digits",
            &&self.digits)
    }
}Debug, #[automatically_derived]
impl crate::cmp::PartialEq for DecimalSeq {
    #[inline]
    fn eq(&self, other: &DecimalSeq) -> bool {
        self.decimal_point == other.decimal_point &&
                    self.truncated == other.truncated &&
                self.num_digits == other.num_digits &&
            self.digits == other.digits
    }
}PartialEq)]
18pub struct DecimalSeq {
19    /// The number of significant digits in the decimal.
20    pub num_digits: usize,
21    /// The offset of the decimal point in the significant digits.
22    pub decimal_point: i32,
23    /// If the number of significant digits stored in the decimal is truncated.
24    pub truncated: bool,
25    /// Buffer of the raw digits, in the range [0, 9].
26    pub digits: [u8; Self::MAX_DIGITS],
27}
28
29impl Default for DecimalSeq {
30    fn default() -> Self {
31        Self { num_digits: 0, decimal_point: 0, truncated: false, digits: [0; Self::MAX_DIGITS] }
32    }
33}
34
35impl DecimalSeq {
36    /// The maximum number of digits required to unambiguously round up to a 64-bit float.
37    ///
38    /// For an IEEE 754 binary64 float, this required 767 digits. So we store the max digits + 1.
39    ///
40    /// We can exactly represent a float in radix `b` from radix 2 if
41    /// `b` is divisible by 2. This function calculates the exact number of
42    /// digits required to exactly represent that float.
43    ///
44    /// According to the "Handbook of Floating Point Arithmetic",
45    /// for IEEE754, with `emin` being the min exponent, `p2` being the
46    /// precision, and `b` being the radix, the number of digits follows as:
47    ///
48    /// `−emin + p2 + ⌊(emin + 1) log(2, b) − log(1 − 2^(−p2), b)⌋`
49    ///
50    /// For f32, this follows as:
51    ///     emin = -126
52    ///     p2 = 24
53    ///
54    /// For f64, this follows as:
55    ///     emin = -1022
56    ///     p2 = 53
57    ///
58    /// In Python:
59    ///     `-emin + p2 + math.floor((emin+ 1)*math.log(2, b)-math.log(1-2**(-p2), b))`
60    pub const MAX_DIGITS: usize = 768;
61
62    /// The max decimal digits that can be exactly represented in a 64-bit integer.
63    pub(super) const MAX_DIGITS_WITHOUT_OVERFLOW: usize = 19;
64    pub(super) const DECIMAL_POINT_RANGE: i32 = 2047;
65
66    /// Append a digit to the buffer if it fits.
67    // FIXME(tgross35): it may be better for this to return an option
68    // FIXME(tgross35): incrementing the digit counter even if we don't push anything
69    // seems incorrect.
70    pub(super) fn try_add_digit(&mut self, digit: u8) {
71        if self.num_digits < Self::MAX_DIGITS {
72            self.digits[self.num_digits] = digit;
73        }
74        self.num_digits += 1;
75    }
76
77    /// Trim trailing zeros from the buffer.
78    // FIXME(tgross35): this could be `.rev().position()` if perf is okay
79    pub fn trim(&mut self) {
80        // All of the following calls to `DecimalSeq::trim` can't panic because:
81        //
82        //  1. `parse_decimal` sets `num_digits` to a max of `DecimalSeq::MAX_DIGITS`.
83        //  2. `right_shift` sets `num_digits` to `write_index`, which is bounded by `num_digits`.
84        //  3. `left_shift` `num_digits` to a max of `DecimalSeq::MAX_DIGITS`.
85        //
86        // Trim is only called in `right_shift` and `left_shift`.
87        if true {
    if !(self.num_digits <= Self::MAX_DIGITS) {
        crate::panicking::panic("assertion failed: self.num_digits <= Self::MAX_DIGITS")
    };
};debug_assert!(self.num_digits <= Self::MAX_DIGITS);
88        while self.num_digits != 0 && self.digits[self.num_digits - 1] == 0 {
89            self.num_digits -= 1;
90        }
91    }
92
93    pub(super) fn round(&self) -> u64 {
94        if self.num_digits == 0 || self.decimal_point < 0 {
95            return 0;
96        } else if self.decimal_point >= Self::MAX_DIGITS_WITHOUT_OVERFLOW as i32 {
97            return 0xFFFF_FFFF_FFFF_FFFF_u64;
98        }
99
100        let dp = self.decimal_point as usize;
101        let mut n = 0_u64;
102
103        for i in 0..dp {
104            n *= 10;
105            if i < self.num_digits {
106                n += self.digits[i] as u64;
107            }
108        }
109
110        let mut round_up = false;
111
112        if dp < self.num_digits {
113            round_up = self.digits[dp] >= 5;
114            if self.digits[dp] == 5 && dp + 1 == self.num_digits {
115                round_up = self.truncated || ((dp != 0) && (1 & self.digits[dp - 1] != 0))
116            }
117        }
118
119        if round_up {
120            n += 1;
121        }
122        n
123    }
124
125    /// Computes decimal * 2^shift.
126    pub(super) fn left_shift(&mut self, shift: usize) {
127        if self.num_digits == 0 {
128            return;
129        }
130        let num_new_digits = number_of_digits_decimal_left_shift(self, shift);
131        let mut read_index = self.num_digits;
132        let mut write_index = self.num_digits + num_new_digits;
133        let mut n = 0_u64;
134
135        while read_index != 0 {
136            read_index -= 1;
137            write_index -= 1;
138            n += (self.digits[read_index] as u64) << shift;
139            let quotient = n / 10;
140            let remainder = n - (10 * quotient);
141            if write_index < Self::MAX_DIGITS {
142                self.digits[write_index] = remainder as u8;
143            } else if remainder > 0 {
144                self.truncated = true;
145            }
146            n = quotient;
147        }
148
149        while n > 0 {
150            write_index -= 1;
151            let quotient = n / 10;
152            let remainder = n - (10 * quotient);
153            if write_index < Self::MAX_DIGITS {
154                self.digits[write_index] = remainder as u8;
155            } else if remainder > 0 {
156                self.truncated = true;
157            }
158            n = quotient;
159        }
160
161        self.num_digits += num_new_digits;
162
163        if self.num_digits > Self::MAX_DIGITS {
164            self.num_digits = Self::MAX_DIGITS;
165        }
166
167        self.decimal_point += num_new_digits as i32;
168        self.trim();
169    }
170
171    /// Computes decimal * 2^-shift.
172    pub(super) fn right_shift(&mut self, shift: usize) {
173        let mut read_index = 0;
174        let mut write_index = 0;
175        let mut n = 0_u64;
176        while (n >> shift) == 0 {
177            if read_index < self.num_digits {
178                n = (10 * n) + self.digits[read_index] as u64;
179                read_index += 1;
180            } else if n == 0 {
181                return;
182            } else {
183                while (n >> shift) == 0 {
184                    n *= 10;
185                    read_index += 1;
186                }
187                break;
188            }
189        }
190        self.decimal_point -= read_index as i32 - 1;
191        if self.decimal_point < -Self::DECIMAL_POINT_RANGE {
192            // `self = Self::Default()`, but without the overhead of clearing `digits`.
193            self.num_digits = 0;
194            self.decimal_point = 0;
195            self.truncated = false;
196            return;
197        }
198        let mask = (1_u64 << shift) - 1;
199        while read_index < self.num_digits {
200            let new_digit = (n >> shift) as u8;
201            n = (10 * (n & mask)) + self.digits[read_index] as u64;
202            read_index += 1;
203            self.digits[write_index] = new_digit;
204            write_index += 1;
205        }
206        while n > 0 {
207            let new_digit = (n >> shift) as u8;
208            n = 10 * (n & mask);
209            if write_index < Self::MAX_DIGITS {
210                self.digits[write_index] = new_digit;
211                write_index += 1;
212            } else if new_digit > 0 {
213                self.truncated = true;
214            }
215        }
216        self.num_digits = write_index;
217        self.trim();
218    }
219}
220
221/// Parse a big integer representation of the float as a decimal.
222pub fn parse_decimal_seq(mut s: &[u8]) -> DecimalSeq {
223    let mut d = DecimalSeq::default();
224    let start = s;
225
226    while let Some((&b'0', s_next)) = s.split_first() {
227        s = s_next;
228    }
229
230    s = s.parse_digits(|digit| d.try_add_digit(digit));
231
232    if let Some((b'.', s_next)) = s.split_first() {
233        s = s_next;
234        let first = s;
235        // Skip leading zeros.
236        if d.num_digits == 0 {
237            while let Some((&b'0', s_next)) = s.split_first() {
238                s = s_next;
239            }
240        }
241        while s.len() >= 8 && d.num_digits + 8 < DecimalSeq::MAX_DIGITS {
242            let v = s.read_u64();
243            if !is_8digits(v) {
244                break;
245            }
246            d.digits[d.num_digits..].write_u64(v - 0x3030_3030_3030_3030);
247            d.num_digits += 8;
248            s = &s[8..];
249        }
250        s = s.parse_digits(|digit| d.try_add_digit(digit));
251        d.decimal_point = s.len() as i32 - first.len() as i32;
252    }
253
254    if d.num_digits != 0 {
255        // Ignore the trailing zeros if there are any
256        let mut n_trailing_zeros = 0;
257        for &c in start[..(start.len() - s.len())].iter().rev() {
258            if c == b'0' {
259                n_trailing_zeros += 1;
260            } else if c != b'.' {
261                break;
262            }
263        }
264        d.decimal_point += n_trailing_zeros as i32;
265        d.num_digits -= n_trailing_zeros;
266        d.decimal_point += d.num_digits as i32;
267        if d.num_digits > DecimalSeq::MAX_DIGITS {
268            d.truncated = true;
269            d.num_digits = DecimalSeq::MAX_DIGITS;
270        }
271    }
272
273    if let Some((&ch, s_next)) = s.split_first() {
274        if ch == b'e' || ch == b'E' {
275            s = s_next;
276            let mut neg_exp = false;
277            if let Some((&ch, s_next)) = s.split_first() {
278                neg_exp = ch == b'-';
279                if ch == b'-' || ch == b'+' {
280                    s = s_next;
281                }
282            }
283            let mut exp_num = 0_i32;
284
285            s.parse_digits(|digit| {
286                if exp_num < 0x10000 {
287                    exp_num = 10 * exp_num + digit as i32;
288                }
289            });
290
291            d.decimal_point += if neg_exp { -exp_num } else { exp_num };
292        }
293    }
294
295    for i in d.num_digits..DecimalSeq::MAX_DIGITS_WITHOUT_OVERFLOW {
296        d.digits[i] = 0;
297    }
298
299    d
300}
301
302fn number_of_digits_decimal_left_shift(d: &DecimalSeq, mut shift: usize) -> usize {
303    #[rustfmt::skip]
304    const TABLE: [u16; 65] = [
305        0x0000, 0x0800, 0x0801, 0x0803, 0x1006, 0x1009, 0x100D, 0x1812, 0x1817, 0x181D, 0x2024,
306        0x202B, 0x2033, 0x203C, 0x2846, 0x2850, 0x285B, 0x3067, 0x3073, 0x3080, 0x388E, 0x389C,
307        0x38AB, 0x38BB, 0x40CC, 0x40DD, 0x40EF, 0x4902, 0x4915, 0x4929, 0x513E, 0x5153, 0x5169,
308        0x5180, 0x5998, 0x59B0, 0x59C9, 0x61E3, 0x61FD, 0x6218, 0x6A34, 0x6A50, 0x6A6D, 0x6A8B,
309        0x72AA, 0x72C9, 0x72E9, 0x7B0A, 0x7B2B, 0x7B4D, 0x8370, 0x8393, 0x83B7, 0x83DC, 0x8C02,
310        0x8C28, 0x8C4F, 0x9477, 0x949F, 0x94C8, 0x9CF2, 0x051C, 0x051C, 0x051C, 0x051C,
311    ];
312    #[rustfmt::skip]
313    const TABLE_POW5: [u8; 0x051C] = [
314        5, 2, 5, 1, 2, 5, 6, 2, 5, 3, 1, 2, 5, 1, 5, 6, 2, 5, 7, 8, 1, 2, 5, 3, 9, 0, 6, 2, 5, 1,
315        9, 5, 3, 1, 2, 5, 9, 7, 6, 5, 6, 2, 5, 4, 8, 8, 2, 8, 1, 2, 5, 2, 4, 4, 1, 4, 0, 6, 2, 5,
316        1, 2, 2, 0, 7, 0, 3, 1, 2, 5, 6, 1, 0, 3, 5, 1, 5, 6, 2, 5, 3, 0, 5, 1, 7, 5, 7, 8, 1, 2,
317        5, 1, 5, 2, 5, 8, 7, 8, 9, 0, 6, 2, 5, 7, 6, 2, 9, 3, 9, 4, 5, 3, 1, 2, 5, 3, 8, 1, 4, 6,
318        9, 7, 2, 6, 5, 6, 2, 5, 1, 9, 0, 7, 3, 4, 8, 6, 3, 2, 8, 1, 2, 5, 9, 5, 3, 6, 7, 4, 3, 1,
319        6, 4, 0, 6, 2, 5, 4, 7, 6, 8, 3, 7, 1, 5, 8, 2, 0, 3, 1, 2, 5, 2, 3, 8, 4, 1, 8, 5, 7, 9,
320        1, 0, 1, 5, 6, 2, 5, 1, 1, 9, 2, 0, 9, 2, 8, 9, 5, 5, 0, 7, 8, 1, 2, 5, 5, 9, 6, 0, 4, 6,
321        4, 4, 7, 7, 5, 3, 9, 0, 6, 2, 5, 2, 9, 8, 0, 2, 3, 2, 2, 3, 8, 7, 6, 9, 5, 3, 1, 2, 5, 1,
322        4, 9, 0, 1, 1, 6, 1, 1, 9, 3, 8, 4, 7, 6, 5, 6, 2, 5, 7, 4, 5, 0, 5, 8, 0, 5, 9, 6, 9, 2,
323        3, 8, 2, 8, 1, 2, 5, 3, 7, 2, 5, 2, 9, 0, 2, 9, 8, 4, 6, 1, 9, 1, 4, 0, 6, 2, 5, 1, 8, 6,
324        2, 6, 4, 5, 1, 4, 9, 2, 3, 0, 9, 5, 7, 0, 3, 1, 2, 5, 9, 3, 1, 3, 2, 2, 5, 7, 4, 6, 1, 5,
325        4, 7, 8, 5, 1, 5, 6, 2, 5, 4, 6, 5, 6, 6, 1, 2, 8, 7, 3, 0, 7, 7, 3, 9, 2, 5, 7, 8, 1, 2,
326        5, 2, 3, 2, 8, 3, 0, 6, 4, 3, 6, 5, 3, 8, 6, 9, 6, 2, 8, 9, 0, 6, 2, 5, 1, 1, 6, 4, 1, 5,
327        3, 2, 1, 8, 2, 6, 9, 3, 4, 8, 1, 4, 4, 5, 3, 1, 2, 5, 5, 8, 2, 0, 7, 6, 6, 0, 9, 1, 3, 4,
328        6, 7, 4, 0, 7, 2, 2, 6, 5, 6, 2, 5, 2, 9, 1, 0, 3, 8, 3, 0, 4, 5, 6, 7, 3, 3, 7, 0, 3, 6,
329        1, 3, 2, 8, 1, 2, 5, 1, 4, 5, 5, 1, 9, 1, 5, 2, 2, 8, 3, 6, 6, 8, 5, 1, 8, 0, 6, 6, 4, 0,
330        6, 2, 5, 7, 2, 7, 5, 9, 5, 7, 6, 1, 4, 1, 8, 3, 4, 2, 5, 9, 0, 3, 3, 2, 0, 3, 1, 2, 5, 3,
331        6, 3, 7, 9, 7, 8, 8, 0, 7, 0, 9, 1, 7, 1, 2, 9, 5, 1, 6, 6, 0, 1, 5, 6, 2, 5, 1, 8, 1, 8,
332        9, 8, 9, 4, 0, 3, 5, 4, 5, 8, 5, 6, 4, 7, 5, 8, 3, 0, 0, 7, 8, 1, 2, 5, 9, 0, 9, 4, 9, 4,
333        7, 0, 1, 7, 7, 2, 9, 2, 8, 2, 3, 7, 9, 1, 5, 0, 3, 9, 0, 6, 2, 5, 4, 5, 4, 7, 4, 7, 3, 5,
334        0, 8, 8, 6, 4, 6, 4, 1, 1, 8, 9, 5, 7, 5, 1, 9, 5, 3, 1, 2, 5, 2, 2, 7, 3, 7, 3, 6, 7, 5,
335        4, 4, 3, 2, 3, 2, 0, 5, 9, 4, 7, 8, 7, 5, 9, 7, 6, 5, 6, 2, 5, 1, 1, 3, 6, 8, 6, 8, 3, 7,
336        7, 2, 1, 6, 1, 6, 0, 2, 9, 7, 3, 9, 3, 7, 9, 8, 8, 2, 8, 1, 2, 5, 5, 6, 8, 4, 3, 4, 1, 8,
337        8, 6, 0, 8, 0, 8, 0, 1, 4, 8, 6, 9, 6, 8, 9, 9, 4, 1, 4, 0, 6, 2, 5, 2, 8, 4, 2, 1, 7, 0,
338        9, 4, 3, 0, 4, 0, 4, 0, 0, 7, 4, 3, 4, 8, 4, 4, 9, 7, 0, 7, 0, 3, 1, 2, 5, 1, 4, 2, 1, 0,
339        8, 5, 4, 7, 1, 5, 2, 0, 2, 0, 0, 3, 7, 1, 7, 4, 2, 2, 4, 8, 5, 3, 5, 1, 5, 6, 2, 5, 7, 1,
340        0, 5, 4, 2, 7, 3, 5, 7, 6, 0, 1, 0, 0, 1, 8, 5, 8, 7, 1, 1, 2, 4, 2, 6, 7, 5, 7, 8, 1, 2,
341        5, 3, 5, 5, 2, 7, 1, 3, 6, 7, 8, 8, 0, 0, 5, 0, 0, 9, 2, 9, 3, 5, 5, 6, 2, 1, 3, 3, 7, 8,
342        9, 0, 6, 2, 5, 1, 7, 7, 6, 3, 5, 6, 8, 3, 9, 4, 0, 0, 2, 5, 0, 4, 6, 4, 6, 7, 7, 8, 1, 0,
343        6, 6, 8, 9, 4, 5, 3, 1, 2, 5, 8, 8, 8, 1, 7, 8, 4, 1, 9, 7, 0, 0, 1, 2, 5, 2, 3, 2, 3, 3,
344        8, 9, 0, 5, 3, 3, 4, 4, 7, 2, 6, 5, 6, 2, 5, 4, 4, 4, 0, 8, 9, 2, 0, 9, 8, 5, 0, 0, 6, 2,
345        6, 1, 6, 1, 6, 9, 4, 5, 2, 6, 6, 7, 2, 3, 6, 3, 2, 8, 1, 2, 5, 2, 2, 2, 0, 4, 4, 6, 0, 4,
346        9, 2, 5, 0, 3, 1, 3, 0, 8, 0, 8, 4, 7, 2, 6, 3, 3, 3, 6, 1, 8, 1, 6, 4, 0, 6, 2, 5, 1, 1,
347        1, 0, 2, 2, 3, 0, 2, 4, 6, 2, 5, 1, 5, 6, 5, 4, 0, 4, 2, 3, 6, 3, 1, 6, 6, 8, 0, 9, 0, 8,
348        2, 0, 3, 1, 2, 5, 5, 5, 5, 1, 1, 1, 5, 1, 2, 3, 1, 2, 5, 7, 8, 2, 7, 0, 2, 1, 1, 8, 1, 5,
349        8, 3, 4, 0, 4, 5, 4, 1, 0, 1, 5, 6, 2, 5, 2, 7, 7, 5, 5, 5, 7, 5, 6, 1, 5, 6, 2, 8, 9, 1,
350        3, 5, 1, 0, 5, 9, 0, 7, 9, 1, 7, 0, 2, 2, 7, 0, 5, 0, 7, 8, 1, 2, 5, 1, 3, 8, 7, 7, 7, 8,
351        7, 8, 0, 7, 8, 1, 4, 4, 5, 6, 7, 5, 5, 2, 9, 5, 3, 9, 5, 8, 5, 1, 1, 3, 5, 2, 5, 3, 9, 0,
352        6, 2, 5, 6, 9, 3, 8, 8, 9, 3, 9, 0, 3, 9, 0, 7, 2, 2, 8, 3, 7, 7, 6, 4, 7, 6, 9, 7, 9, 2,
353        5, 5, 6, 7, 6, 2, 6, 9, 5, 3, 1, 2, 5, 3, 4, 6, 9, 4, 4, 6, 9, 5, 1, 9, 5, 3, 6, 1, 4, 1,
354        8, 8, 8, 2, 3, 8, 4, 8, 9, 6, 2, 7, 8, 3, 8, 1, 3, 4, 7, 6, 5, 6, 2, 5, 1, 7, 3, 4, 7, 2,
355        3, 4, 7, 5, 9, 7, 6, 8, 0, 7, 0, 9, 4, 4, 1, 1, 9, 2, 4, 4, 8, 1, 3, 9, 1, 9, 0, 6, 7, 3,
356        8, 2, 8, 1, 2, 5, 8, 6, 7, 3, 6, 1, 7, 3, 7, 9, 8, 8, 4, 0, 3, 5, 4, 7, 2, 0, 5, 9, 6, 2,
357        2, 4, 0, 6, 9, 5, 9, 5, 3, 3, 6, 9, 1, 4, 0, 6, 2, 5,
358    ];
359
360    shift &= 63;
361    let x_a = TABLE[shift];
362    let x_b = TABLE[shift + 1];
363    let num_new_digits = (x_a >> 11) as _;
364    let pow5_a = (0x7FF & x_a) as usize;
365    let pow5_b = (0x7FF & x_b) as usize;
366    let pow5 = &TABLE_POW5[pow5_a..];
367
368    for (i, &p5) in pow5.iter().enumerate().take(pow5_b - pow5_a) {
369        if i >= d.num_digits {
370            return num_new_digits - 1;
371        } else if d.digits[i] == p5 {
372            continue;
373        } else if d.digits[i] < p5 {
374            return num_new_digits - 1;
375        } else {
376            return num_new_digits;
377        }
378    }
379
380    num_new_digits
381}
core/num/imp/dec2flt/decimal_seq.rs

core/num/imp/dec2flt/
decimal_seq.rs