Merge pull request 27 from Kogia-sima/perf/optimize-udivmod
This commit is contained in:
commit
2886a30f1c
|
@ -1,61 +1,45 @@
|
||||||
// The code in this file is based on Rust's compiler-builtins crate. The Rust
|
/// Multiply unsigned 128 bit integers, return upper 128 bits of the result
|
||||||
// compiler automatically links programs against this crate for target-specific
|
#[inline]
|
||||||
// runtime support. We have copied the implementation of `__udivmodti4()` which
|
fn u128_mulhi(x: u128, y: u128) -> u128 {
|
||||||
// is an intrinsic implementing division with remainder for architectures
|
let x_lo = x as u64;
|
||||||
// without 128-bit integers. This implementation works around some poor codegen
|
let x_hi = (x >> 64) as u64;
|
||||||
// by LLVM (https://github.com/rust-lang/rust/issues/44545) and allows for
|
let y_lo = y as u64;
|
||||||
// inlining which does not happen with the intrinsic.
|
let y_hi = (y >> 64) as u64;
|
||||||
//
|
|
||||||
// The compiler-builtins crate carries the following license, which is available
|
|
||||||
// in full at:
|
|
||||||
// https://github.com/rust-lang-nursery/compiler-builtins/blob/master/LICENSE.TXT
|
|
||||||
//
|
|
||||||
// ---
|
|
||||||
//
|
|
||||||
// Copyright 2009-2016 compiler-builtins Developers
|
|
||||||
//
|
|
||||||
// The compiler-builtins crate is dual licensed under both the University of
|
|
||||||
// Illinois "BSD-Like" license and the MIT license. As a user of this code you
|
|
||||||
// may choose to use it under either license. As a contributor, you agree to
|
|
||||||
// allow your code to be used under both.
|
|
||||||
|
|
||||||
|
// handle possibility of overflow
|
||||||
|
let carry = (x_lo as u128 * y_lo as u128) >> 64;
|
||||||
|
let m = x_lo as u128 * y_hi as u128 + carry;
|
||||||
|
let high1 = m >> 64;
|
||||||
|
|
||||||
|
let m_lo = m as u64;
|
||||||
|
let high2 = x_hi as u128 * y_lo as u128 + m_lo as u128 >> 64;
|
||||||
|
|
||||||
|
x_hi as u128 * y_hi as u128 + high1 + high2
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Divide `n` by 1e19 and return quotient and remainder
|
||||||
|
///
|
||||||
|
/// Integer division algorithm is based on the following paper:
|
||||||
|
///
|
||||||
|
/// T. Granlund and P. Montgomery, “Division by Invariant Integers Using Multiplication”
|
||||||
|
/// in Proc. of the SIGPLAN94 Conference on Programming Language Design and
|
||||||
|
/// Implementation, 1994, pp. 61–72
|
||||||
|
///
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn udivmod_1e19(n: u128) -> (u128, u64) {
|
pub fn udivmod_1e19(n: u128) -> (u128, u64) {
|
||||||
let d = 10_000_000_000_000_000_000_u64; // 10^19
|
let d = 10_000_000_000_000_000_000_u64; // 10^19
|
||||||
|
|
||||||
let high = (n >> 64) as u64;
|
let quot = if n < 1 << 83 {
|
||||||
if high == 0 {
|
((n >> 19) as u64 / (d >> 19)) as u128
|
||||||
let low = n as u64;
|
} else {
|
||||||
return ((low / d) as u128, low % d);
|
let factor =
|
||||||
}
|
(8507059173023461586_u64 as u128) << 64 | 10779635027931437427 as u128;
|
||||||
|
u128_mulhi(n, factor) >> 62
|
||||||
|
};
|
||||||
|
|
||||||
let sr = 65 - high.leading_zeros();
|
let rem = (n - quot * d as u128) as u64;
|
||||||
|
debug_assert_eq!(quot, n / d as u128);
|
||||||
|
debug_assert_eq!(rem as u128, n % d as u128);
|
||||||
|
|
||||||
// 2 <= sr <= 65
|
(quot, rem)
|
||||||
let mut q: u128 = n << (128 - sr);
|
|
||||||
let mut r: u128 = n >> sr;
|
|
||||||
let mut carry: u64 = 0;
|
|
||||||
|
|
||||||
// Don't use a range because they may generate references to memcpy in unoptimized code
|
|
||||||
//
|
|
||||||
// Loop invariants: r < d; carry is 0 or 1
|
|
||||||
let mut i = 0;
|
|
||||||
while i < sr {
|
|
||||||
i += 1;
|
|
||||||
|
|
||||||
// r:q = ((r:q) << 1) | carry
|
|
||||||
r = (r << 1) | (q >> 127);
|
|
||||||
q = (q << 1) | carry as u128;
|
|
||||||
|
|
||||||
// carry = 0
|
|
||||||
// if r >= d {
|
|
||||||
// r -= d;
|
|
||||||
// carry = 1;
|
|
||||||
// }
|
|
||||||
let s = (d as u128).wrapping_sub(r).wrapping_sub(1) as i128 >> 127;
|
|
||||||
carry = (s & 1) as u64;
|
|
||||||
r -= (d as u128) & s as u128;
|
|
||||||
}
|
|
||||||
|
|
||||||
((q << 1) | carry as u128, r as u64)
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue