Write u128 using only two divisions
This commit is contained in:
parent
23d280dc7a
commit
3e47651d90
88
src/lib.rs
88
src/lib.rs
|
@ -40,13 +40,7 @@ const MAX_LEN: usize = 40; // i128::MIN (including minus sign)
|
||||||
// Adaptation of the original implementation at
|
// Adaptation of the original implementation at
|
||||||
// https://github.com/rust-lang/rust/blob/b8214dc6c6fc20d0a660fb5700dca9ebf51ebe89/src/libcore/fmt/num.rs#L188-L266
|
// https://github.com/rust-lang/rust/blob/b8214dc6c6fc20d0a660fb5700dca9ebf51ebe89/src/libcore/fmt/num.rs#L188-L266
|
||||||
macro_rules! impl_Integer {
|
macro_rules! impl_Integer {
|
||||||
($($t:ident),* as $conv_fn:ident) =>
|
($($t:ident),* as $conv_fn:ident) => ($(
|
||||||
(impl_Integer!(
|
|
||||||
$($t),* as $conv_fn,
|
|
||||||
|n:$conv_fn| (n / 10000, (n % 10000) as isize)
|
|
||||||
););
|
|
||||||
|
|
||||||
($($t:ident),* as $conv_fn:ident, $divmod_10000:expr) => ($(
|
|
||||||
impl Integer for $t {
|
impl Integer for $t {
|
||||||
fn write<W: io::Write>(self, mut wr: W) -> io::Result<usize> {
|
fn write<W: io::Write>(self, mut wr: W) -> io::Result<usize> {
|
||||||
let mut buf = unsafe { mem::uninitialized() };
|
let mut buf = unsafe { mem::uninitialized() };
|
||||||
|
@ -74,13 +68,11 @@ macro_rules! impl_Integer {
|
||||||
// eagerly decode 4 characters at a time
|
// eagerly decode 4 characters at a time
|
||||||
if <$t>::max_value() as u64 >= 10000 {
|
if <$t>::max_value() as u64 >= 10000 {
|
||||||
while n >= 10000 {
|
while n >= 10000 {
|
||||||
// division with remainder on u128 is badly optimized by LLVM.
|
let rem = (n % 10000) as isize;
|
||||||
// see “udiv128.rs” for more info.
|
n /= 10000;
|
||||||
let (q, r) = $divmod_10000(n);
|
|
||||||
n = q;
|
|
||||||
|
|
||||||
let d1 = (r / 100) << 1;
|
let d1 = (rem / 100) << 1;
|
||||||
let d2 = (r % 100) << 1;
|
let d2 = (rem % 100) << 1;
|
||||||
curr -= 4;
|
curr -= 4;
|
||||||
ptr::copy_nonoverlapping(lut_ptr.offset(d1), buf_ptr.offset(curr), 2);
|
ptr::copy_nonoverlapping(lut_ptr.offset(d1), buf_ptr.offset(curr), 2);
|
||||||
ptr::copy_nonoverlapping(lut_ptr.offset(d2), buf_ptr.offset(curr + 2), 2);
|
ptr::copy_nonoverlapping(lut_ptr.offset(d2), buf_ptr.offset(curr + 2), 2);
|
||||||
|
@ -128,5 +120,73 @@ impl_Integer!(isize, usize as u16);
|
||||||
impl_Integer!(isize, usize as u32);
|
impl_Integer!(isize, usize as u32);
|
||||||
#[cfg(target_pointer_width = "64")]
|
#[cfg(target_pointer_width = "64")]
|
||||||
impl_Integer!(isize, usize as u64);
|
impl_Integer!(isize, usize as u64);
|
||||||
|
|
||||||
#[cfg(all(feature = "i128"))]
|
#[cfg(all(feature = "i128"))]
|
||||||
impl_Integer!(i128, u128 as u128, udiv128::udivmod_10000);
|
macro_rules! impl_Integer128 {
|
||||||
|
($($t:ident),*) => {$(
|
||||||
|
impl Integer for $t {
|
||||||
|
fn write<W: io::Write>(self, mut wr: W) -> io::Result<usize> {
|
||||||
|
let mut buf = unsafe { mem::uninitialized() };
|
||||||
|
let bytes = self.write_to(&mut buf);
|
||||||
|
try!(wr.write_all(bytes));
|
||||||
|
Ok(bytes.len())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IntegerPrivate for $t {
|
||||||
|
#[allow(unused_comparisons)]
|
||||||
|
fn write_to(self, buf: &mut [u8; MAX_LEN]) -> &[u8] {
|
||||||
|
let is_nonnegative = self >= 0;
|
||||||
|
let n = if is_nonnegative {
|
||||||
|
self as u128
|
||||||
|
} else {
|
||||||
|
// convert the negative num to positive by summing 1 to it's 2 complement
|
||||||
|
(!(self as u128)).wrapping_add(1)
|
||||||
|
};
|
||||||
|
let mut curr = buf.len() as isize;
|
||||||
|
let buf_ptr = buf.as_mut_ptr();
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
// Divide by 10^19 which is the highest power less than 2^64.
|
||||||
|
let (n, rem) = udiv128::udivmod_1e19(n);
|
||||||
|
curr -= rem.write_to(buf).len() as isize;
|
||||||
|
|
||||||
|
if n != 0 {
|
||||||
|
// Memset the base10 leading zeros of rem.
|
||||||
|
let target = buf.len() as isize - 19;
|
||||||
|
ptr::write_bytes(buf_ptr.offset(target), b'0', (curr - target) as usize);
|
||||||
|
curr = target;
|
||||||
|
|
||||||
|
// Divide by 10^19 again.
|
||||||
|
let (n, rem) = udiv128::udivmod_1e19(n);
|
||||||
|
let buf2 = buf_ptr.offset(curr - buf.len() as isize) as *mut _;
|
||||||
|
curr -= rem.write_to(&mut *buf2).len() as isize;
|
||||||
|
|
||||||
|
if n != 0 {
|
||||||
|
// Memset the leading zeros.
|
||||||
|
let target = buf.len() as isize - 38;
|
||||||
|
ptr::write_bytes(buf_ptr.offset(target), b'0', (curr - target) as usize);
|
||||||
|
curr = target;
|
||||||
|
|
||||||
|
// There is at most one digit left
|
||||||
|
// because u128::max / 10^19 / 10^19 is 3.
|
||||||
|
curr -= 1;
|
||||||
|
*buf_ptr.offset(curr) = (n as u8) + b'0';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !is_nonnegative {
|
||||||
|
curr -= 1;
|
||||||
|
*buf_ptr.offset(curr) = b'-';
|
||||||
|
}
|
||||||
|
|
||||||
|
let len = buf.len() - curr as usize;
|
||||||
|
slice::from_raw_parts(buf_ptr.offset(curr), len)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
)*};
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(all(feature = "i128"))]
|
||||||
|
impl_Integer128!(i128, u128);
|
||||||
|
|
|
@ -21,25 +21,25 @@
|
||||||
// (https://github.com/rust-lang/rust/issues/44545) and to allow function
|
// (https://github.com/rust-lang/rust/issues/44545) and to allow function
|
||||||
// inlining which doesn’t happen with the intrinsic.
|
// inlining which doesn’t happen with the intrinsic.
|
||||||
|
|
||||||
pub fn udivmod_10000(n: u128) -> (u128, isize) {
|
pub fn udivmod_1e19(n: u128) -> (u128, u64) {
|
||||||
|
let d = 10_000_000_000_000_000_000_u64; // 10^19
|
||||||
|
|
||||||
let high = (n >> 64) as u64;
|
let high = (n >> 64) as u64;
|
||||||
if high == 0 {
|
if high == 0 {
|
||||||
let low = n as u64;
|
let low = n as u64;
|
||||||
return ((low / 10000) as u128, (low % 10000) as isize);
|
return ((low / d) as u128, low % d);
|
||||||
}
|
}
|
||||||
|
|
||||||
let leading_zeros_10000 = 114;
|
let sr = 65 - high.leading_zeros();
|
||||||
debug_assert_eq!(leading_zeros_10000, 10000u128.leading_zeros());
|
|
||||||
let sr = 1 + leading_zeros_10000 - high.leading_zeros();
|
|
||||||
|
|
||||||
// 52 <= sr <= 115
|
// 2 <= sr <= 65
|
||||||
let mut q: u128 = n << (128 - sr);
|
let mut q: u128 = n << (128 - sr);
|
||||||
let mut r: u128 = n >> sr;
|
let mut r: u128 = n >> sr;
|
||||||
let mut carry: u64 = 0;
|
let mut carry: u64 = 0;
|
||||||
|
|
||||||
// Don't use a range because they may generate references to memcpy in unoptimized code
|
// Don't use a range because they may generate references to memcpy in unoptimized code
|
||||||
//
|
//
|
||||||
// Loop invariants: r < 10000; carry is 0 or 1
|
// Loop invariants: r < d; carry is 0 or 1
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
while i < sr {
|
while i < sr {
|
||||||
i += 1;
|
i += 1;
|
||||||
|
@ -49,14 +49,14 @@ pub fn udivmod_10000(n: u128) -> (u128, isize) {
|
||||||
q = (q << 1) | carry as u128;
|
q = (q << 1) | carry as u128;
|
||||||
|
|
||||||
// carry = 0
|
// carry = 0
|
||||||
// if r >= 10000 {
|
// if r >= d {
|
||||||
// r -= 10000;
|
// r -= d;
|
||||||
// carry = 1;
|
// carry = 1;
|
||||||
// }
|
// }
|
||||||
let s = 10000u128.wrapping_sub(r).wrapping_sub(1) as i128 >> 127;
|
let s = (d as u128).wrapping_sub(r).wrapping_sub(1) as i128 >> 127;
|
||||||
carry = (s & 1) as u64;
|
carry = (s & 1) as u64;
|
||||||
r -= 10000u128 & s as u128;
|
r -= (d as u128) & s as u128;
|
||||||
}
|
}
|
||||||
|
|
||||||
((q << 1) | carry as u128, r as isize)
|
((q << 1) | carry as u128, r as u64)
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue