From 23d280dc7a0eb0998145d872a67cce64e31e6d0a Mon Sep 17 00:00:00 2001 From: David Tolnay Date: Sat, 16 Sep 2017 11:02:22 -0700 Subject: [PATCH] Simplify udivmodti4 for our special case This isn't faster, just easier to understand. --- src/lib.rs | 20 ++--- src/udiv128.rs | 203 +++++-------------------------------------------- 2 files changed, 27 insertions(+), 196 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 564e987..e4af95f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -43,16 +43,10 @@ macro_rules! impl_Integer { ($($t:ident),* as $conv_fn:ident) => (impl_Integer!( $($t),* as $conv_fn, - (|n:$conv_fn, d:$conv_fn, rem:Option<&mut $conv_fn>| { - match rem { - Some(rem) => *rem = n % d, - _ => {}, - } - n / d - }) + |n:$conv_fn| (n / 10000, (n % 10000) as isize) );); - ($($t:ident),* as $conv_fn:ident, $divmod:expr) => ($( + ($($t:ident),* as $conv_fn:ident, $divmod_10000:expr) => ($( impl Integer for $t { fn write(self, mut wr: W) -> io::Result { let mut buf = unsafe { mem::uninitialized() }; @@ -80,13 +74,13 @@ macro_rules! impl_Integer { // eagerly decode 4 characters at a time if <$t>::max_value() as u64 >= 10000 { while n >= 10000 { - let mut rem = 0; // division with remainder on u128 is badly optimized by LLVM. // see “udiv128.rs” for more info. - n = $divmod(n, 10000, Some(&mut rem)); + let (q, r) = $divmod_10000(n); + n = q; - let d1 = (rem as isize / 100) << 1; - let d2 = (rem as isize % 100) << 1; + let d1 = (r / 100) << 1; + let d2 = (r % 100) << 1; curr -= 4; ptr::copy_nonoverlapping(lut_ptr.offset(d1), buf_ptr.offset(curr), 2); ptr::copy_nonoverlapping(lut_ptr.offset(d2), buf_ptr.offset(curr + 2), 2); @@ -135,4 +129,4 @@ impl_Integer!(isize, usize as u32); #[cfg(target_pointer_width = "64")] impl_Integer!(isize, usize as u64); #[cfg(all(feature = "i128"))] -impl_Integer!(i128, u128 as u128, udiv128::udivmodti4); +impl_Integer!(i128, u128 as u128, udiv128::udivmod_10000); diff --git a/src/udiv128.rs b/src/udiv128.rs index b1aae8c..cc95d1a 100644 --- a/src/udiv128.rs +++ b/src/udiv128.rs @@ -21,205 +21,42 @@ // (https://github.com/rust-lang/rust/issues/44545) and to allow function // inlining which doesn’t happen with the intrinsic. -const BITS: u32 = 128; -const BITS_HALF: u32 = 64; - -trait LargeInt { - fn low(self) -> u64; - fn high(self) -> u64; - fn from_parts(low: u64, high: u64) -> Self; -} - -trait Int { - fn aborting_div(self, other: Self) -> Self; - fn aborting_rem(self, other: Self) -> Self; -} - -impl LargeInt for u128 { - fn low(self) -> u64 { - self as u64 +pub fn udivmod_10000(n: u128) -> (u128, isize) { + let high = (n >> 64) as u64; + if high == 0 { + let low = n as u64; + return ((low / 10000) as u128, (low % 10000) as isize); } - fn high(self) -> u64 { - (self >> 64) as u64 - } + let leading_zeros_10000 = 114; + debug_assert_eq!(leading_zeros_10000, 10000u128.leading_zeros()); + let sr = 1 + leading_zeros_10000 - high.leading_zeros(); - fn from_parts(low: u64, high: u64) -> u128 { - low as u128 | ((high as u128) << 64) - } -} - -impl Int for u64 { - fn aborting_div(self, other: u64) -> u64 { - ::checked_div(self, other).unwrap() - } - - fn aborting_rem(self, other: u64) -> u64 { - ::checked_rem(self, other).unwrap() - } -} - -pub fn udivmodti4(n: u128, d: u128, rem: Option<&mut u128>) -> u128 { - // NOTE X is unknown, K != 0 - if n.high() == 0 { - if d.high() == 0 { - // 0 X - // --- - // 0 X - - if let Some(rem) = rem { - *rem = ::from(n.low().aborting_rem(d.low())); - } - return ::from(n.low().aborting_div(d.low())) - } else { - // 0 X - // --- - // K X - if let Some(rem) = rem { - *rem = n; - } - return 0; - }; - } - - let mut sr; - let mut q; - let mut r; - - if d.low() == 0 { - if d.high() == 0 { - // K X - // --- - // 0 0 - // NOTE This should be unreachable in safe Rust because the program will panic before - // this intrinsic is called - unreachable!(); - } - - if n.low() == 0 { - // K 0 - // --- - // K 0 - if let Some(rem) = rem { - *rem = ::from_parts(0, n.high().aborting_rem(d.high())); - } - return ::from(n.high().aborting_div(d.high())) - } - - // K K - // --- - // K 0 - - if d.high().is_power_of_two() { - if let Some(rem) = rem { - *rem = ::from_parts(n.low(), n.high() & (d.high() - 1)); - } - return ::from(n.high() >> d.high().trailing_zeros()); - } - - sr = d.high().leading_zeros().wrapping_sub(n.high().leading_zeros()); - - // D > N - if sr > BITS_HALF - 2 { - if let Some(rem) = rem { - *rem = n; - } - return 0; - } - - sr += 1; - - // 1 <= sr <= BITS_HALF - 1 - q = n << (BITS - sr); - r = n >> sr; - } else if d.high() == 0 { - // K X - // --- - // 0 K - if d.low().is_power_of_two() { - if let Some(rem) = rem { - *rem = ::from(n.low() & (d.low() - 1)); - } - - if d.low() == 1 { - return n; - } else { - let sr = d.low().trailing_zeros(); - return n >> sr; - }; - } - - sr = 1 + BITS_HALF + d.low().leading_zeros() - n.high().leading_zeros(); - - // 2 <= sr <= u64::BITS - 1 - q = n << (BITS - sr); - r = n >> sr; - } else { - // K X - // --- - // K K - sr = d.high().leading_zeros().wrapping_sub(n.high().leading_zeros()); - - // D > N - if sr > BITS_HALF - 1 { - if let Some(rem) = rem { - *rem = n; - } - return 0; - } - - sr += 1; - - // 1 <= sr <= BITS_HALF - q = n << (BITS - sr); - r = n >> sr; - } - - // Not a special case - // q and r are initialized with - // q = n << (u64::BITS - sr) - // r = n >> sr - // 1 <= sr <= u64::BITS - 1 - let mut carry = 0; + // 52 <= sr <= 115 + let mut q: u128 = n << (128 - sr); + let mut r: u128 = n >> sr; + let mut carry: u64 = 0; // Don't use a range because they may generate references to memcpy in unoptimized code + // + // Loop invariants: r < 10000; carry is 0 or 1 let mut i = 0; while i < sr { i += 1; // r:q = ((r:q) << 1) | carry - r = (r << 1) | (q >> (BITS - 1)); + r = (r << 1) | (q >> 127); q = (q << 1) | carry as u128; // carry = 0 - // if r >= d { - // r -= d; + // if r >= 10000 { + // r -= 10000; // carry = 1; // } - let s = (d.wrapping_sub(r).wrapping_sub(1)) as i128 >> (BITS - 1); + let s = 10000u128.wrapping_sub(r).wrapping_sub(1) as i128 >> 127; carry = (s & 1) as u64; - r -= d & s as u128; + r -= 10000u128 & s as u128; } - if let Some(rem) = rem { - *rem = r; - } - (q << 1) | carry as u128 -} - -#[cfg(test)] -#[test] -fn test_udivmodti4() { - let primes = [ - 3, 7, 31, 73, 127, 179, 233, 283, 353, - 419, 467, 547, 607, 661, 739, 811, 877, 947, - ]; - - for (i, d) in (0..128).cycle().zip(primes.iter().cycle()).take(1_000) { - let n = 1u128 << i; - let mut rem = 0; - let q = udivmodti4(n, *d, Some(&mut rem)); - assert_eq!(q, n / d); - assert_eq!(rem, n % d); - } + ((q << 1) | carry as u128, r as isize) }