Skip to main content

core/num/imp/flt2dec/strategy/
grisu.rs

1//! Rust adaptation of the Grisu3 algorithm described in "Printing Floating-Point Numbers Quickly
2//! and Accurately with Integers"[^1]. It uses about 1KB of precomputed table, and in turn, it's
3//! very quick for most inputs.
4//!
5//! [^1]: Florian Loitsch. 2010. Printing floating-point numbers quickly and
6//!   accurately with integers. SIGPLAN Not. 45, 6 (June 2010), 233-243.
7
8use flt2dec::{Decoded, MAX_SIG_DIGITS, round_up};
9
10use crate::mem::MaybeUninit;
11use crate::num::imp::diy_float::Fp;
12use crate::num::imp::flt2dec;
13
14// see the comments in `format_shortest_opt` for the rationale.
15#[doc(hidden)]
16pub const ALPHA: i16 = -60;
17#[doc(hidden)]
18pub const GAMMA: i16 = -32;
19
20/*
21# the following Python code generates this table:
22for i in xrange(-308, 333, 8):
23    if i >= 0: f = 10**i; e = 0
24    else: f = 2**(80-4*i) // 10**-i; e = 4 * i - 80
25    l = f.bit_length()
26    f = ((f << 64 >> (l-1)) + 1) >> 1; e += l - 64
27    print '    (%#018x, %5d, %4d),' % (f, e, i)
28*/
29
30#[doc(hidden)]
31pub static CACHED_POW10: [(u64, i16, i16); 81] = [
32    // (f, e, k)
33    (0xe61acf033d1a45df, -1087, -308),
34    (0xab70fe17c79ac6ca, -1060, -300),
35    (0xff77b1fcbebcdc4f, -1034, -292),
36    (0xbe5691ef416bd60c, -1007, -284),
37    (0x8dd01fad907ffc3c, -980, -276),
38    (0xd3515c2831559a83, -954, -268),
39    (0x9d71ac8fada6c9b5, -927, -260),
40    (0xea9c227723ee8bcb, -901, -252),
41    (0xaecc49914078536d, -874, -244),
42    (0x823c12795db6ce57, -847, -236),
43    (0xc21094364dfb5637, -821, -228),
44    (0x9096ea6f3848984f, -794, -220),
45    (0xd77485cb25823ac7, -768, -212),
46    (0xa086cfcd97bf97f4, -741, -204),
47    (0xef340a98172aace5, -715, -196),
48    (0xb23867fb2a35b28e, -688, -188),
49    (0x84c8d4dfd2c63f3b, -661, -180),
50    (0xc5dd44271ad3cdba, -635, -172),
51    (0x936b9fcebb25c996, -608, -164),
52    (0xdbac6c247d62a584, -582, -156),
53    (0xa3ab66580d5fdaf6, -555, -148),
54    (0xf3e2f893dec3f126, -529, -140),
55    (0xb5b5ada8aaff80b8, -502, -132),
56    (0x87625f056c7c4a8b, -475, -124),
57    (0xc9bcff6034c13053, -449, -116),
58    (0x964e858c91ba2655, -422, -108),
59    (0xdff9772470297ebd, -396, -100),
60    (0xa6dfbd9fb8e5b88f, -369, -92),
61    (0xf8a95fcf88747d94, -343, -84),
62    (0xb94470938fa89bcf, -316, -76),
63    (0x8a08f0f8bf0f156b, -289, -68),
64    (0xcdb02555653131b6, -263, -60),
65    (0x993fe2c6d07b7fac, -236, -52),
66    (0xe45c10c42a2b3b06, -210, -44),
67    (0xaa242499697392d3, -183, -36),
68    (0xfd87b5f28300ca0e, -157, -28),
69    (0xbce5086492111aeb, -130, -20),
70    (0x8cbccc096f5088cc, -103, -12),
71    (0xd1b71758e219652c, -77, -4),
72    (0x9c40000000000000, -50, 4),
73    (0xe8d4a51000000000, -24, 12),
74    (0xad78ebc5ac620000, 3, 20),
75    (0x813f3978f8940984, 30, 28),
76    (0xc097ce7bc90715b3, 56, 36),
77    (0x8f7e32ce7bea5c70, 83, 44),
78    (0xd5d238a4abe98068, 109, 52),
79    (0x9f4f2726179a2245, 136, 60),
80    (0xed63a231d4c4fb27, 162, 68),
81    (0xb0de65388cc8ada8, 189, 76),
82    (0x83c7088e1aab65db, 216, 84),
83    (0xc45d1df942711d9a, 242, 92),
84    (0x924d692ca61be758, 269, 100),
85    (0xda01ee641a708dea, 295, 108),
86    (0xa26da3999aef774a, 322, 116),
87    (0xf209787bb47d6b85, 348, 124),
88    (0xb454e4a179dd1877, 375, 132),
89    (0x865b86925b9bc5c2, 402, 140),
90    (0xc83553c5c8965d3d, 428, 148),
91    (0x952ab45cfa97a0b3, 455, 156),
92    (0xde469fbd99a05fe3, 481, 164),
93    (0xa59bc234db398c25, 508, 172),
94    (0xf6c69a72a3989f5c, 534, 180),
95    (0xb7dcbf5354e9bece, 561, 188),
96    (0x88fcf317f22241e2, 588, 196),
97    (0xcc20ce9bd35c78a5, 614, 204),
98    (0x98165af37b2153df, 641, 212),
99    (0xe2a0b5dc971f303a, 667, 220),
100    (0xa8d9d1535ce3b396, 694, 228),
101    (0xfb9b7cd9a4a7443c, 720, 236),
102    (0xbb764c4ca7a44410, 747, 244),
103    (0x8bab8eefb6409c1a, 774, 252),
104    (0xd01fef10a657842c, 800, 260),
105    (0x9b10a4e5e9913129, 827, 268),
106    (0xe7109bfba19c0c9d, 853, 276),
107    (0xac2820d9623bf429, 880, 284),
108    (0x80444b5e7aa7cf85, 907, 292),
109    (0xbf21e44003acdd2d, 933, 300),
110    (0x8e679c2f5e44ff8f, 960, 308),
111    (0xd433179d9c8cb841, 986, 316),
112    (0x9e19db92b4e31ba9, 1013, 324),
113    (0xeb96bf6ebadf77d9, 1039, 332),
114];
115
116#[doc(hidden)]
117pub const CACHED_POW10_FIRST_E: i16 = -1087;
118#[doc(hidden)]
119pub const CACHED_POW10_LAST_E: i16 = 1039;
120
121#[doc(hidden)]
122pub fn cached_power(alpha: i16, gamma: i16) -> (i16, Fp) {
123    let offset = CACHED_POW10_FIRST_E as i32;
124    let range = (CACHED_POW10.len() as i32) - 1;
125    let domain = (CACHED_POW10_LAST_E - CACHED_POW10_FIRST_E) as i32;
126    let idx = ((gamma as i32) - offset) * range / domain;
127    let (f, e, k) = CACHED_POW10[idx as usize];
128    if true {
    if !(alpha <= e && e <= gamma) {
        crate::panicking::panic("assertion failed: alpha <= e && e <= gamma")
    };
};debug_assert!(alpha <= e && e <= gamma);
129    (k, Fp { f, e })
130}
131
132/// Given `x > 0`, returns `(k, 10^k)` such that `10^k <= x < 10^(k+1)`.
133#[doc(hidden)]
134pub fn max_pow10_no_more_than(x: u32) -> (u8, u32) {
135    if true {
    if !(x > 0) { crate::panicking::panic("assertion failed: x > 0") };
};debug_assert!(x > 0);
136
137    const X9: u32 = 10_0000_0000;
138    const X8: u32 = 1_0000_0000;
139    const X7: u32 = 1000_0000;
140    const X6: u32 = 100_0000;
141    const X5: u32 = 10_0000;
142    const X4: u32 = 1_0000;
143    const X3: u32 = 1000;
144    const X2: u32 = 100;
145    const X1: u32 = 10;
146
147    if x < X4 {
148        if x < X2 {
149            if x < X1 { (0, 1) } else { (1, X1) }
150        } else {
151            if x < X3 { (2, X2) } else { (3, X3) }
152        }
153    } else {
154        if x < X6 {
155            if x < X5 { (4, X4) } else { (5, X5) }
156        } else if x < X8 {
157            if x < X7 { (6, X6) } else { (7, X7) }
158        } else {
159            if x < X9 { (8, X8) } else { (9, X9) }
160        }
161    }
162}
163
164/// The shortest mode implementation for Grisu.
165///
166/// It returns `None` when it would return an inexact representation otherwise.
167pub fn format_shortest_opt<'a>(
168    d: &Decoded,
169    buf: &'a mut [MaybeUninit<u8>],
170) -> Option<(/*digits*/ &'a [u8], /*exp*/ i16)> {
171    if !(d.mant > 0) { crate::panicking::panic("assertion failed: d.mant > 0") };assert!(d.mant > 0);
172    if !(d.minus > 0) {
    crate::panicking::panic("assertion failed: d.minus > 0")
};assert!(d.minus > 0);
173    if !(d.plus > 0) { crate::panicking::panic("assertion failed: d.plus > 0") };assert!(d.plus > 0);
174    if !d.mant.checked_add(d.plus).is_some() {
    crate::panicking::panic("assertion failed: d.mant.checked_add(d.plus).is_some()")
};assert!(d.mant.checked_add(d.plus).is_some());
175    if !d.mant.checked_sub(d.minus).is_some() {
    crate::panicking::panic("assertion failed: d.mant.checked_sub(d.minus).is_some()")
};assert!(d.mant.checked_sub(d.minus).is_some());
176    if !(buf.len() >= MAX_SIG_DIGITS) {
    crate::panicking::panic("assertion failed: buf.len() >= MAX_SIG_DIGITS")
};assert!(buf.len() >= MAX_SIG_DIGITS);
177    if !(d.mant + d.plus < (1 << 61)) {
    crate::panicking::panic("assertion failed: d.mant + d.plus < (1 << 61)")
};assert!(d.mant + d.plus < (1 << 61)); // we need at least three bits of additional precision
178
179    // start with the normalized values with the shared exponent
180    let plus = Fp { f: d.mant + d.plus, e: d.exp }.normalize();
181    let minus = Fp { f: d.mant - d.minus, e: d.exp }.normalize_to(plus.e);
182    let v = Fp { f: d.mant, e: d.exp }.normalize_to(plus.e);
183
184    // find any `cached = 10^minusk` such that `ALPHA <= minusk + plus.e + 64 <= GAMMA`.
185    // since `plus` is normalized, this means `2^(62 + ALPHA) <= plus * cached < 2^(64 + GAMMA)`;
186    // given our choices of `ALPHA` and `GAMMA`, this puts `plus * cached` into `[4, 2^32)`.
187    //
188    // it is obviously desirable to maximize `GAMMA - ALPHA`,
189    // so that we don't need many cached powers of 10, but there are some considerations:
190    //
191    // 1. we want to keep `floor(plus * cached)` within `u32` since it needs a costly division.
192    //    (this is not really avoidable, remainder is required for accuracy estimation.)
193    // 2. the remainder of `floor(plus * cached)` repeatedly gets multiplied by 10,
194    //    and it should not overflow.
195    //
196    // the first gives `64 + GAMMA <= 32`, while the second gives `10 * 2^-ALPHA <= 2^64`;
197    // -60 and -32 is the maximal range with this constraint, and V8 also uses them.
198    let (minusk, cached) = cached_power(ALPHA - plus.e - 64, GAMMA - plus.e - 64);
199
200    // scale fps. this gives the maximal error of 1 ulp (proved from Theorem 5.1).
201    let plus = plus.mul(cached);
202    let minus = minus.mul(cached);
203    let v = v.mul(cached);
204    if true {
    match (&plus.e, &minus.e) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = crate::panicking::AssertKind::Eq;
                crate::panicking::assert_failed(kind, &*left_val, &*right_val,
                    crate::option::Option::None);
            }
        }
    };
};debug_assert_eq!(plus.e, minus.e);
205    if true {
    match (&plus.e, &v.e) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = crate::panicking::AssertKind::Eq;
                crate::panicking::assert_failed(kind, &*left_val, &*right_val,
                    crate::option::Option::None);
            }
        }
    };
};debug_assert_eq!(plus.e, v.e);
206
207    //         +- actual range of minus
208    //   | <---|---------------------- unsafe region --------------------------> |
209    //   |     |                                                                 |
210    //   |  |<--->|  | <--------------- safe region ---------------> |           |
211    //   |  |     |  |                                               |           |
212    //   |1 ulp|1 ulp|                 |1 ulp|1 ulp|                 |1 ulp|1 ulp|
213    //   |<--->|<--->|                 |<--->|<--->|                 |<--->|<--->|
214    //   |-----|-----|-------...-------|-----|-----|-------...-------|-----|-----|
215    //   |   minus   |                 |     v     |                 |   plus    |
216    // minus1     minus0           v - 1 ulp   v + 1 ulp           plus0       plus1
217    //
218    // above `minus`, `v` and `plus` are *quantized* approximations (error < 1 ulp).
219    // as we don't know the error is positive or negative, we use two approximations spaced equally
220    // and have the maximal error of 2 ulps.
221    //
222    // the "unsafe region" is a liberal interval which we initially generate.
223    // the "safe region" is a conservative interval which we only accept.
224    // we start with the correct repr within the unsafe region, and try to find the closest repr
225    // to `v` which is also within the safe region. if we can't, we give up.
226    let plus1 = plus.f + 1;
227    //  let plus0 = plus.f - 1; // only for explanation
228    //  let minus0 = minus.f + 1; // only for explanation
229    let minus1 = minus.f - 1;
230    let e = -plus.e as usize; // shared exponent
231
232    // divide `plus1` into integral and fractional parts.
233    // integral parts are guaranteed to fit in u32, since cached power guarantees `plus < 2^32`
234    // and normalized `plus.f` is always less than `2^64 - 2^4` due to the precision requirement.
235    let plus1int = (plus1 >> e) as u32;
236    let plus1frac = plus1 & ((1 << e) - 1);
237
238    // calculate the largest `10^max_kappa` no more than `plus1` (thus `plus1 < 10^(max_kappa+1)`).
239    // this is an upper bound of `kappa` below.
240    let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(plus1int);
241
242    let mut i = 0;
243    let exp = max_kappa as i16 - minusk + 1;
244
245    // Theorem 6.2: if `k` is the greatest integer s.t. `0 <= y mod 10^k <= y - x`,
246    //              then `V = floor(y / 10^k) * 10^k` is in `[x, y]` and one of the shortest
247    //              representations (with the minimal number of significant digits) in that range.
248    //
249    // find the digit length `kappa` between `(minus1, plus1)` as per Theorem 6.2.
250    // Theorem 6.2 can be adopted to exclude `x` by requiring `y mod 10^k < y - x` instead.
251    // (e.g., `x` = 32000, `y` = 32777; `kappa` = 2 since `y mod 10^3 = 777 < y - x = 777`.)
252    // the algorithm relies on the later verification phase to exclude `y`.
253    let delta1 = plus1 - minus1;
254    //  let delta1int = (delta1 >> e) as usize; // only for explanation
255    let delta1frac = delta1 & ((1 << e) - 1);
256
257    // render integral parts, while checking for the accuracy at each step.
258    let mut ten_kappa = max_ten_kappa; // 10^kappa
259    let mut remainder = plus1int; // digits yet to be rendered
260    loop {
261        // we always have at least one digit to render, as `plus1 >= 10^kappa`
262        // invariants:
263        // - `delta1int <= remainder < 10^(kappa+1)`
264        // - `plus1int = d[0..n-1] * 10^(kappa+1) + remainder`
265        //   (it follows that `remainder = plus1int % 10^(kappa+1)`)
266
267        // divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
268        let q = remainder / ten_kappa;
269        let r = remainder % ten_kappa;
270        if true {
    if !(q < 10) { crate::panicking::panic("assertion failed: q < 10") };
};debug_assert!(q < 10);
271        buf[i] = MaybeUninit::new(b'0' + q as u8);
272        i += 1;
273
274        let plus1rem = ((r as u64) << e) + plus1frac; // == (plus1 % 10^kappa) * 2^e
275        if plus1rem < delta1 {
276            // `plus1 % 10^kappa < delta1 = plus1 - minus1`; we've found the correct `kappa`.
277            let ten_kappa = (ten_kappa as u64) << e; // scale 10^kappa back to the shared exponent
278            return round_and_weed(
279                // SAFETY: we initialized that memory above.
280                unsafe { buf[..i].assume_init_mut() },
281                exp,
282                plus1rem,
283                delta1,
284                plus1 - v.f,
285                ten_kappa,
286                1,
287            );
288        }
289
290        // break the loop when we have rendered all integral digits.
291        // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
292        if i > max_kappa as usize {
293            if true {
    match (&ten_kappa, &1) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = crate::panicking::AssertKind::Eq;
                crate::panicking::assert_failed(kind, &*left_val, &*right_val,
                    crate::option::Option::None);
            }
        }
    };
};debug_assert_eq!(ten_kappa, 1);
294            break;
295        }
296
297        // restore invariants
298        ten_kappa /= 10;
299        remainder = r;
300    }
301
302    // render fractional parts, while checking for the accuracy at each step.
303    // this time we rely on repeated multiplications, as division will lose the precision.
304    let mut remainder = plus1frac;
305    let mut threshold = delta1frac;
306    let mut ulp = 1;
307    loop {
308        // the next digit should be significant as we've tested that before breaking out
309        // invariants, where `m = max_kappa + 1` (# of digits in the integral part):
310        // - `remainder < 2^e`
311        // - `plus1frac * 10^(n-m) = d[m..n-1] * 2^e + remainder`
312
313        remainder *= 10; // won't overflow, `2^e * 10 < 2^64`
314        threshold *= 10;
315        ulp *= 10;
316
317        // divide `remainder` by `10^kappa`.
318        // both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
319        let q = remainder >> e;
320        let r = remainder & ((1 << e) - 1);
321        if true {
    if !(q < 10) { crate::panicking::panic("assertion failed: q < 10") };
};debug_assert!(q < 10);
322        buf[i] = MaybeUninit::new(b'0' + q as u8);
323        i += 1;
324
325        if r < threshold {
326            let ten_kappa = 1 << e; // implicit divisor
327            return round_and_weed(
328                // SAFETY: we initialized that memory above.
329                unsafe { buf[..i].assume_init_mut() },
330                exp,
331                r,
332                threshold,
333                (plus1 - v.f) * ulp,
334                ten_kappa,
335                ulp,
336            );
337        }
338
339        // restore invariants
340        remainder = r;
341    }
342
343    // we've generated all significant digits of `plus1`, but not sure if it's the optimal one.
344    // for example, if `minus1` is 3.14153... and `plus1` is 3.14158..., there are 5 different
345    // shortest representation from 3.14154 to 3.14158 but we only have the greatest one.
346    // we have to successively decrease the last digit and check if this is the optimal repr.
347    // there are at most 9 candidates (..1 to ..9), so this is fairly quick. ("rounding" phase)
348    //
349    // the function checks if this "optimal" repr is actually within the ulp ranges,
350    // and also, it is possible that the "second-to-optimal" repr can actually be optimal
351    // due to the rounding error. in either cases this returns `None`. ("weeding" phase)
352    //
353    // all arguments here are scaled by the common (but implicit) value `k`, so that:
354    // - `remainder = (plus1 % 10^kappa) * k`
355    // - `threshold = (plus1 - minus1) * k` (and also, `remainder < threshold`)
356    // - `plus1v = (plus1 - v) * k` (and also, `threshold > plus1v` from prior invariants)
357    // - `ten_kappa = 10^kappa * k`
358    // - `ulp = 2^-e * k`
359    fn round_and_weed(
360        buf: &mut [u8],
361        exp: i16,
362        remainder: u64,
363        threshold: u64,
364        plus1v: u64,
365        ten_kappa: u64,
366        ulp: u64,
367    ) -> Option<(&[u8], i16)> {
368        if !!buf.is_empty() {
    crate::panicking::panic("assertion failed: !buf.is_empty()")
};assert!(!buf.is_empty());
369
370        // produce two approximations to `v` (actually `plus1 - v`) within 1.5 ulps.
371        // the resulting representation should be the closest representation to both.
372        //
373        // here `plus1 - v` is used since calculations are done with respect to `plus1`
374        // in order to avoid overflow/underflow (hence the seemingly swapped names).
375        let plus1v_down = plus1v + ulp; // plus1 - (v - 1 ulp)
376        let plus1v_up = plus1v - ulp; // plus1 - (v + 1 ulp)
377
378        // decrease the last digit and stop at the closest representation to `v + 1 ulp`.
379        let mut plus1w = remainder; // plus1w(n) = plus1 - w(n)
380        {
381            let last = buf.last_mut().unwrap();
382
383            // we work with the approximated digits `w(n)`, which is initially equal to `plus1 -
384            // plus1 % 10^kappa`. after running the loop body `n` times, `w(n) = plus1 -
385            // plus1 % 10^kappa - n * 10^kappa`. we set `plus1w(n) = plus1 - w(n) =
386            // plus1 % 10^kappa + n * 10^kappa` (thus `remainder = plus1w(0)`) to simplify checks.
387            // note that `plus1w(n)` is always increasing.
388            //
389            // we have three conditions to terminate. any of them will make the loop unable to
390            // proceed, but we then have at least one valid representation known to be closest to
391            // `v + 1 ulp` anyway. we will denote them as TC1 through TC3 for brevity.
392            //
393            // TC1: `w(n) <= v + 1 ulp`, i.e., this is the last repr that can be the closest one.
394            // this is equivalent to `plus1 - w(n) = plus1w(n) >= plus1 - (v + 1 ulp) = plus1v_up`.
395            // combined with TC2 (which checks if `w(n+1)` is valid), this prevents the possible
396            // overflow on the calculation of `plus1w(n)`.
397            //
398            // TC2: `w(n+1) < minus1`, i.e., the next repr definitely does not round to `v`.
399            // this is equivalent to `plus1 - w(n) + 10^kappa = plus1w(n) + 10^kappa >
400            // plus1 - minus1 = threshold`. the left hand side can overflow, but we know
401            // `threshold > plus1v`, so if TC1 is false, `threshold - plus1w(n) >
402            // threshold - (plus1v - 1 ulp) > 1 ulp` and we can safely test if
403            // `threshold - plus1w(n) < 10^kappa` instead.
404            //
405            // TC3: `abs(w(n) - (v + 1 ulp)) <= abs(w(n+1) - (v + 1 ulp))`, i.e., the next repr is
406            // no closer to `v + 1 ulp` than the current repr. given `z(n) = plus1v_up - plus1w(n)`,
407            // this becomes `abs(z(n)) <= abs(z(n+1))`. again assuming that TC1 is false, we have
408            // `z(n) > 0`. we have two cases to consider:
409            //
410            // - when `z(n+1) >= 0`: TC3 becomes `z(n) <= z(n+1)`. as `plus1w(n)` is increasing,
411            //   `z(n)` should be decreasing and this is clearly false.
412            // - when `z(n+1) < 0`:
413            //   - TC3a: the precondition is `plus1v_up < plus1w(n) + 10^kappa`. assuming TC2 is
414            //     false, `threshold >= plus1w(n) + 10^kappa` so it cannot overflow.
415            //   - TC3b: TC3 becomes `z(n) <= -z(n+1)`, i.e., `plus1v_up - plus1w(n) >=
416            //     plus1w(n+1) - plus1v_up = plus1w(n) + 10^kappa - plus1v_up`. the negated TC1
417            //     gives `plus1v_up > plus1w(n)`, so it cannot overflow or underflow when
418            //     combined with TC3a.
419            //
420            // consequently, we should stop when `TC1 || TC2 || (TC3a && TC3b)`. the following is
421            // equal to its inverse, `!TC1 && !TC2 && (!TC3a || !TC3b)`.
422            while plus1w < plus1v_up
423                && threshold - plus1w >= ten_kappa
424                && (plus1w + ten_kappa < plus1v_up
425                    || plus1v_up - plus1w >= plus1w + ten_kappa - plus1v_up)
426            {
427                *last -= 1;
428                if true {
    if !(*last > b'0') {
        crate::panicking::panic("assertion failed: *last > b\'0\'")
    };
};debug_assert!(*last > b'0'); // the shortest repr cannot end with `0`
429                plus1w += ten_kappa;
430            }
431        }
432
433        // check if this representation is also the closest representation to `v - 1 ulp`.
434        //
435        // this is simply same to the terminating conditions for `v + 1 ulp`, with all `plus1v_up`
436        // replaced by `plus1v_down` instead. overflow analysis equally holds.
437        if plus1w < plus1v_down
438            && threshold - plus1w >= ten_kappa
439            && (plus1w + ten_kappa < plus1v_down
440                || plus1v_down - plus1w >= plus1w + ten_kappa - plus1v_down)
441        {
442            return None;
443        }
444
445        // now we have the closest representation to `v` between `plus1` and `minus1`.
446        // this is too liberal, though, so we reject any `w(n)` not between `plus0` and `minus0`,
447        // i.e., `plus1 - plus1w(n) <= minus0` or `plus1 - plus1w(n) >= plus0`. we utilize the facts
448        // that `threshold = plus1 - minus1` and `plus1 - plus0 = minus0 - minus1 = 2 ulp`.
449        if 2 * ulp <= plus1w && plus1w <= threshold - 4 * ulp { Some((buf, exp)) } else { None }
450    }
451}
452
453/// The shortest mode implementation for Grisu with Dragon fallback.
454///
455/// This should be used for most cases.
456pub fn format_shortest<'a>(
457    d: &Decoded,
458    buf: &'a mut [MaybeUninit<u8>],
459) -> (/*digits*/ &'a [u8], /*exp*/ i16) {
460    use flt2dec::strategy::dragon::format_shortest as fallback;
461    // SAFETY: The borrow checker is not smart enough to let us use `buf`
462    // in the second branch, so we launder the lifetime here. But we only re-use
463    // `buf` if `format_shortest_opt` returned `None` so this is okay.
464    match format_shortest_opt(d, unsafe { &mut *(buf as *mut _) }) {
465        Some(ret) => ret,
466        None => fallback(d, buf),
467    }
468}
469
470/// The exact and fixed mode implementation for Grisu.
471///
472/// It returns `None` when it would return an inexact representation otherwise.
473pub fn format_exact_opt<'a>(
474    d: &Decoded,
475    buf: &'a mut [MaybeUninit<u8>],
476    limit: i16,
477) -> Option<(/*digits*/ &'a [u8], /*exp*/ i16)> {
478    if !(d.mant > 0) { crate::panicking::panic("assertion failed: d.mant > 0") };assert!(d.mant > 0);
479    if !(d.mant < (1 << 61)) {
    crate::panicking::panic("assertion failed: d.mant < (1 << 61)")
};assert!(d.mant < (1 << 61)); // we need at least three bits of additional precision
480    if !!buf.is_empty() {
    crate::panicking::panic("assertion failed: !buf.is_empty()")
};assert!(!buf.is_empty());
481
482    // normalize and scale `v`.
483    let v = Fp { f: d.mant, e: d.exp }.normalize();
484    let (minusk, cached) = cached_power(ALPHA - v.e - 64, GAMMA - v.e - 64);
485    let v = v.mul(cached);
486
487    // divide `v` into integral and fractional parts.
488    let e = -v.e as usize;
489    let vint = (v.f >> e) as u32;
490    let vfrac = v.f & ((1 << e) - 1);
491
492    let requested_digits = buf.len();
493
494    const POW10_UP_TO_9: [u32; 10] =
495        [1, 10, 100, 1000, 10_000, 100_000, 1_000_000, 10_000_000, 100_000_000, 1_000_000_000];
496
497    // We deviate from the original algorithm here and do some early checks to determine if we can satisfy requested_digits.
498    // If we determine that we can't, we exit early and avoid most of the heavy lifting that the algorithm otherwise does.
499    //
500    // When vfrac is zero, we can easily determine if vint can satisfy requested digits:
501    //      If requested_digits >= 11, vint is not able to exhaust the count by itself since 10^(11 -1) > u32 max value >= vint.
502    //      If vint < 10^(requested_digits - 1), vint cannot exhaust the count.
503    //      Otherwise, vint might be able to exhaust the count and we need to execute the rest of the code.
504    if (vfrac == 0) && ((requested_digits >= 11) || (vint < POW10_UP_TO_9[requested_digits - 1])) {
505        return None;
506    }
507
508    // both old `v` and new `v` (scaled by `10^-k`) has an error of < 1 ulp (Theorem 5.1).
509    // as we don't know the error is positive or negative, we use two approximations
510    // spaced equally and have the maximal error of 2 ulps (same to the shortest case).
511    //
512    // the goal is to find the exactly rounded series of digits that are common to
513    // both `v - 1 ulp` and `v + 1 ulp`, so that we are maximally confident.
514    // if this is not possible, we don't know which one is the correct output for `v`,
515    // so we give up and fall back.
516    //
517    // `err` is defined as `1 ulp * 2^e` here (same to the ulp in `vfrac`),
518    // and we will scale it whenever `v` gets scaled.
519    let mut err = 1;
520
521    // calculate the largest `10^max_kappa` no more than `v` (thus `v < 10^(max_kappa+1)`).
522    // this is an upper bound of `kappa` below.
523    let (max_kappa, max_ten_kappa) = max_pow10_no_more_than(vint);
524
525    let mut i = 0;
526    let exp = max_kappa as i16 - minusk + 1;
527
528    // if we are working with the last-digit limitation, we need to shorten the buffer
529    // before the actual rendering in order to avoid double rounding.
530    // note that we have to enlarge the buffer again when rounding up happens!
531    let len = if exp <= limit {
532        // oops, we cannot even produce *one* digit.
533        // this is possible when, say, we've got something like 9.5 and it's being rounded to 10.
534        //
535        // in principle we can immediately call `possibly_round` with an empty buffer,
536        // but scaling `max_ten_kappa << e` by 10 can result in overflow.
537        // thus we are being sloppy here and widen the error range by a factor of 10.
538        // this will increase the false negative rate, but only very, *very* slightly;
539        // it can only matter noticeably when the mantissa is bigger than 60 bits.
540        //
541        // SAFETY: `len=0`, so the obligation of having initialized this memory is trivial.
542        return unsafe {
543            possibly_round(buf, 0, exp, limit, v.f / 10, (max_ten_kappa as u64) << e, err << e)
544        };
545    } else if ((exp as i32 - limit as i32) as usize) < buf.len() {
546        (exp - limit) as usize
547    } else {
548        buf.len()
549    };
550    if true {
    if !(len > 0) { crate::panicking::panic("assertion failed: len > 0") };
};debug_assert!(len > 0);
551
552    // render integral parts.
553    // the error is entirely fractional, so we don't need to check it in this part.
554    let mut kappa = max_kappa as i16;
555    let mut ten_kappa = max_ten_kappa; // 10^kappa
556    let mut remainder = vint; // digits yet to be rendered
557    loop {
558        // we always have at least one digit to render
559        // invariants:
560        // - `remainder < 10^(kappa+1)`
561        // - `vint = d[0..n-1] * 10^(kappa+1) + remainder`
562        //   (it follows that `remainder = vint % 10^(kappa+1)`)
563
564        // divide `remainder` by `10^kappa`. both are scaled by `2^-e`.
565        let q = remainder / ten_kappa;
566        let r = remainder % ten_kappa;
567        if true {
    if !(q < 10) { crate::panicking::panic("assertion failed: q < 10") };
};debug_assert!(q < 10);
568        buf[i] = MaybeUninit::new(b'0' + q as u8);
569        i += 1;
570
571        // is the buffer full? run the rounding pass with the remainder.
572        if i == len {
573            let vrem = ((r as u64) << e) + vfrac; // == (v % 10^kappa) * 2^e
574            // SAFETY: we have initialized `len` many bytes.
575            return unsafe {
576                possibly_round(buf, len, exp, limit, vrem, (ten_kappa as u64) << e, err << e)
577            };
578        }
579
580        // break the loop when we have rendered all integral digits.
581        // the exact number of digits is `max_kappa + 1` as `plus1 < 10^(max_kappa+1)`.
582        if i > max_kappa as usize {
583            if true {
    match (&ten_kappa, &1) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = crate::panicking::AssertKind::Eq;
                crate::panicking::assert_failed(kind, &*left_val, &*right_val,
                    crate::option::Option::None);
            }
        }
    };
};debug_assert_eq!(ten_kappa, 1);
584            if true {
    match (&kappa, &0) {
        (left_val, right_val) => {
            if !(*left_val == *right_val) {
                let kind = crate::panicking::AssertKind::Eq;
                crate::panicking::assert_failed(kind, &*left_val, &*right_val,
                    crate::option::Option::None);
            }
        }
    };
};debug_assert_eq!(kappa, 0);
585            break;
586        }
587
588        // restore invariants
589        kappa -= 1;
590        ten_kappa /= 10;
591        remainder = r;
592    }
593
594    // render fractional parts.
595    //
596    // in principle we can continue to the last available digit and check for the accuracy.
597    // unfortunately we are working with the finite-sized integers, so we need some criterion
598    // to detect the overflow. V8 uses `remainder > err`, which becomes false when
599    // the first `i` significant digits of `v - 1 ulp` and `v` differ. however this rejects
600    // too many otherwise valid input.
601    //
602    // since the later phase has a correct overflow detection, we instead use tighter criterion:
603    // we continue til `err` exceeds `10^kappa / 2`, so that the range between `v - 1 ulp` and
604    // `v + 1 ulp` definitely contains two or more rounded representations. this is same to
605    // the first two comparisons from `possibly_round`, for the reference.
606    let mut remainder = vfrac;
607    let maxerr = 1 << (e - 1);
608    while err < maxerr {
609        // invariants, where `m = max_kappa + 1` (# of digits in the integral part):
610        // - `remainder < 2^e`
611        // - `vfrac * 10^(n-m) = d[m..n-1] * 2^e + remainder`
612        // - `err = 10^(n-m)`
613
614        remainder *= 10; // won't overflow, `2^e * 10 < 2^64`
615        err *= 10; // won't overflow, `err * 10 < 2^e * 5 < 2^64`
616
617        // divide `remainder` by `10^kappa`.
618        // both are scaled by `2^e / 10^kappa`, so the latter is implicit here.
619        let q = remainder >> e;
620        let r = remainder & ((1 << e) - 1);
621        if true {
    if !(q < 10) { crate::panicking::panic("assertion failed: q < 10") };
};debug_assert!(q < 10);
622        buf[i] = MaybeUninit::new(b'0' + q as u8);
623        i += 1;
624
625        // is the buffer full? run the rounding pass with the remainder.
626        if i == len {
627            // SAFETY: we have initialized `len` many bytes.
628            return unsafe { possibly_round(buf, len, exp, limit, r, 1 << e, err) };
629        }
630
631        // restore invariants
632        remainder = r;
633    }
634
635    // further calculation is useless (`possibly_round` definitely fails), so we give up.
636    return None;
637
638    // we've generated all requested digits of `v`, which should be also same to corresponding
639    // digits of `v - 1 ulp`. now we check if there is a unique representation shared by
640    // both `v - 1 ulp` and `v + 1 ulp`; this can be either same to generated digits, or
641    // to the rounded-up version of those digits. if the range contains multiple representations
642    // of the same length, we cannot be sure and should return `None` instead.
643    //
644    // all arguments here are scaled by the common (but implicit) value `k`, so that:
645    // - `remainder = (v % 10^kappa) * k`
646    // - `ten_kappa = 10^kappa * k`
647    // - `ulp = 2^-e * k`
648    //
649    // SAFETY: the first `len` bytes of `buf` must be initialized.
650    unsafe fn possibly_round(
651        buf: &mut [MaybeUninit<u8>],
652        mut len: usize,
653        mut exp: i16,
654        limit: i16,
655        remainder: u64,
656        ten_kappa: u64,
657        ulp: u64,
658    ) -> Option<(&[u8], i16)> {
659        if true {
    if !(remainder < ten_kappa) {
        crate::panicking::panic("assertion failed: remainder < ten_kappa")
    };
};debug_assert!(remainder < ten_kappa);
660
661        //           10^kappa
662        //    :   :   :<->:   :
663        //    :   :   :   :   :
664        //    :|1 ulp|1 ulp|  :
665        //    :|<--->|<--->|  :
666        // ----|-----|-----|----
667        //     |     v     |
668        // v - 1 ulp   v + 1 ulp
669        //
670        // (for the reference, the dotted line indicates the exact value for
671        // possible representations in given number of digits.)
672        //
673        // error is too large that there are at least three possible representations
674        // between `v - 1 ulp` and `v + 1 ulp`. we cannot determine which one is correct.
675        if ulp >= ten_kappa {
676            return None;
677        }
678
679        //    10^kappa
680        //   :<------->:
681        //   :         :
682        //   : |1 ulp|1 ulp|
683        //   : |<--->|<--->|
684        // ----|-----|-----|----
685        //     |     v     |
686        // v - 1 ulp   v + 1 ulp
687        //
688        // in fact, 1/2 ulp is enough to introduce two possible representations.
689        // (remember that we need a unique representation for both `v - 1 ulp` and `v + 1 ulp`.)
690        // this won't overflow, as `ulp < ten_kappa` from the first check.
691        if ten_kappa - ulp <= ulp {
692            return None;
693        }
694
695        //     remainder
696        //       :<->|                           :
697        //       :   |                           :
698        //       :<--------- 10^kappa ---------->:
699        //     | :   |                           :
700        //     |1 ulp|1 ulp|                     :
701        //     |<--->|<--->|                     :
702        // ----|-----|-----|------------------------
703        //     |     v     |
704        // v - 1 ulp   v + 1 ulp
705        //
706        // if `v + 1 ulp` is closer to the rounded-down representation (which is already in `buf`),
707        // then we can safely return. note that `v - 1 ulp` *can* be less than the current
708        // representation, but as `1 ulp < 10^kappa / 2`, this condition is enough:
709        // the distance between `v - 1 ulp` and the current representation
710        // cannot exceed `10^kappa / 2`.
711        //
712        // the condition equals to `remainder + ulp < 10^kappa / 2`.
713        // since this can easily overflow, first check if `remainder < 10^kappa / 2`.
714        // we've already verified that `ulp < 10^kappa / 2`, so as long as
715        // `10^kappa` did not overflow after all, the second check is fine.
716        if ten_kappa - remainder > remainder && ten_kappa - 2 * remainder >= 2 * ulp {
717            // SAFETY: our caller initialized that memory.
718            return Some((unsafe { buf[..len].assume_init_ref() }, exp));
719        }
720
721        //   :<------- remainder ------>|   :
722        //   :                          |   :
723        //   :<--------- 10^kappa --------->:
724        //   :                    |     |   : |
725        //   :                    |1 ulp|1 ulp|
726        //   :                    |<--->|<--->|
727        // -----------------------|-----|-----|-----
728        //                        |     v     |
729        //                    v - 1 ulp   v + 1 ulp
730        //
731        // on the other hands, if `v - 1 ulp` is closer to the rounded-up representation,
732        // we should round up and return. for the same reason we don't need to check `v + 1 ulp`.
733        //
734        // the condition equals to `remainder - ulp >= 10^kappa / 2`.
735        // again we first check if `remainder > ulp` (note that this is not `remainder >= ulp`,
736        // as `10^kappa` is never zero). also note that `remainder - ulp <= 10^kappa`,
737        // so the second check does not overflow.
738        if remainder > ulp && ten_kappa - (remainder - ulp) <= remainder - ulp {
739            if let Some(c) =
740                // SAFETY: our caller must have initialized that memory.
741                round_up(unsafe { buf[..len].assume_init_mut() })
742            {
743                // only add an additional digit when we've been requested the fixed precision.
744                // we also need to check that, if the original buffer was empty,
745                // the additional digit can only be added when `exp == limit` (edge case).
746                exp += 1;
747                if exp > limit && len < buf.len() {
748                    buf[len] = MaybeUninit::new(c);
749                    len += 1;
750                }
751            }
752            // SAFETY: we and our caller initialized that memory.
753            return Some((unsafe { buf[..len].assume_init_ref() }, exp));
754        }
755
756        // otherwise we are doomed (i.e., some values between `v - 1 ulp` and `v + 1 ulp` are
757        // rounding down and others are rounding up) and give up.
758        None
759    }
760}
761
762/// The exact and fixed mode implementation for Grisu with Dragon fallback.
763///
764/// This should be used for most cases.
765pub fn format_exact<'a>(
766    d: &Decoded,
767    buf: &'a mut [MaybeUninit<u8>],
768    limit: i16,
769) -> (/*digits*/ &'a [u8], /*exp*/ i16) {
770    use flt2dec::strategy::dragon::format_exact as fallback;
771    // SAFETY: The borrow checker is not smart enough to let us use `buf`
772    // in the second branch, so we launder the lifetime here. But we only re-use
773    // `buf` if `format_exact_opt` returned `None` so this is okay.
774    match format_exact_opt(d, unsafe { &mut *(buf as *mut _) }, limit) {
775        Some(ret) => ret,
776        None => fallback(d, buf, limit),
777    }
778}