Write u128 using only two divisions

This commit is contained in:
David Tolnay 2017-09-16 13:18:37 -07:00
parent 23d280dc7a
commit 3e47651d90
No known key found for this signature in database
GPG Key ID: F9BA143B95FF6D82
2 changed files with 86 additions and 26 deletions

View File

@ -40,13 +40,7 @@ const MAX_LEN: usize = 40; // i128::MIN (including minus sign)
// Adaptation of the original implementation at
// https://github.com/rust-lang/rust/blob/b8214dc6c6fc20d0a660fb5700dca9ebf51ebe89/src/libcore/fmt/num.rs#L188-L266
macro_rules! impl_Integer {
($($t:ident),* as $conv_fn:ident) =>
(impl_Integer!(
$($t),* as $conv_fn,
|n:$conv_fn| (n / 10000, (n % 10000) as isize)
););
($($t:ident),* as $conv_fn:ident, $divmod_10000:expr) => ($(
($($t:ident),* as $conv_fn:ident) => ($(
impl Integer for $t {
fn write<W: io::Write>(self, mut wr: W) -> io::Result<usize> {
let mut buf = unsafe { mem::uninitialized() };
@ -74,13 +68,11 @@ macro_rules! impl_Integer {
// eagerly decode 4 characters at a time
if <$t>::max_value() as u64 >= 10000 {
while n >= 10000 {
// division with remainder on u128 is badly optimized by LLVM.
// see “udiv128.rs” for more info.
let (q, r) = $divmod_10000(n);
n = q;
let rem = (n % 10000) as isize;
n /= 10000;
let d1 = (r / 100) << 1;
let d2 = (r % 100) << 1;
let d1 = (rem / 100) << 1;
let d2 = (rem % 100) << 1;
curr -= 4;
ptr::copy_nonoverlapping(lut_ptr.offset(d1), buf_ptr.offset(curr), 2);
ptr::copy_nonoverlapping(lut_ptr.offset(d2), buf_ptr.offset(curr + 2), 2);
@ -128,5 +120,73 @@ impl_Integer!(isize, usize as u16);
impl_Integer!(isize, usize as u32);
#[cfg(target_pointer_width = "64")]
impl_Integer!(isize, usize as u64);
#[cfg(all(feature = "i128"))]
impl_Integer!(i128, u128 as u128, udiv128::udivmod_10000);
macro_rules! impl_Integer128 {
($($t:ident),*) => {$(
impl Integer for $t {
fn write<W: io::Write>(self, mut wr: W) -> io::Result<usize> {
let mut buf = unsafe { mem::uninitialized() };
let bytes = self.write_to(&mut buf);
try!(wr.write_all(bytes));
Ok(bytes.len())
}
}
impl IntegerPrivate for $t {
#[allow(unused_comparisons)]
fn write_to(self, buf: &mut [u8; MAX_LEN]) -> &[u8] {
let is_nonnegative = self >= 0;
let n = if is_nonnegative {
self as u128
} else {
// convert the negative num to positive by summing 1 to it's 2 complement
(!(self as u128)).wrapping_add(1)
};
let mut curr = buf.len() as isize;
let buf_ptr = buf.as_mut_ptr();
unsafe {
// Divide by 10^19 which is the highest power less than 2^64.
let (n, rem) = udiv128::udivmod_1e19(n);
curr -= rem.write_to(buf).len() as isize;
if n != 0 {
// Memset the base10 leading zeros of rem.
let target = buf.len() as isize - 19;
ptr::write_bytes(buf_ptr.offset(target), b'0', (curr - target) as usize);
curr = target;
// Divide by 10^19 again.
let (n, rem) = udiv128::udivmod_1e19(n);
let buf2 = buf_ptr.offset(curr - buf.len() as isize) as *mut _;
curr -= rem.write_to(&mut *buf2).len() as isize;
if n != 0 {
// Memset the leading zeros.
let target = buf.len() as isize - 38;
ptr::write_bytes(buf_ptr.offset(target), b'0', (curr - target) as usize);
curr = target;
// There is at most one digit left
// because u128::max / 10^19 / 10^19 is 3.
curr -= 1;
*buf_ptr.offset(curr) = (n as u8) + b'0';
}
}
if !is_nonnegative {
curr -= 1;
*buf_ptr.offset(curr) = b'-';
}
let len = buf.len() - curr as usize;
slice::from_raw_parts(buf_ptr.offset(curr), len)
}
}
}
)*};
}
#[cfg(all(feature = "i128"))]
impl_Integer128!(i128, u128);

View File

@ -21,25 +21,25 @@
// (https://github.com/rust-lang/rust/issues/44545) and to allow function
// inlining which doesnt happen with the intrinsic.
pub fn udivmod_10000(n: u128) -> (u128, isize) {
pub fn udivmod_1e19(n: u128) -> (u128, u64) {
let d = 10_000_000_000_000_000_000_u64; // 10^19
let high = (n >> 64) as u64;
if high == 0 {
let low = n as u64;
return ((low / 10000) as u128, (low % 10000) as isize);
return ((low / d) as u128, low % d);
}
let leading_zeros_10000 = 114;
debug_assert_eq!(leading_zeros_10000, 10000u128.leading_zeros());
let sr = 1 + leading_zeros_10000 - high.leading_zeros();
let sr = 65 - high.leading_zeros();
// 52 <= sr <= 115
// 2 <= sr <= 65
let mut q: u128 = n << (128 - sr);
let mut r: u128 = n >> sr;
let mut carry: u64 = 0;
// Don't use a range because they may generate references to memcpy in unoptimized code
//
// Loop invariants: r < 10000; carry is 0 or 1
// Loop invariants: r < d; carry is 0 or 1
let mut i = 0;
while i < sr {
i += 1;
@ -49,14 +49,14 @@ pub fn udivmod_10000(n: u128) -> (u128, isize) {
q = (q << 1) | carry as u128;
// carry = 0
// if r >= 10000 {
// r -= 10000;
// if r >= d {
// r -= d;
// carry = 1;
// }
let s = 10000u128.wrapping_sub(r).wrapping_sub(1) as i128 >> 127;
let s = (d as u128).wrapping_sub(r).wrapping_sub(1) as i128 >> 127;
carry = (s & 1) as u64;
r -= 10000u128 & s as u128;
r -= (d as u128) & s as u128;
}
((q << 1) | carry as u128, r as isize)
((q << 1) | carry as u128, r as u64)
}