From f96b2b2113fac3c1d283afa65cbca42750e000df Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Sun, 6 Sep 2020 22:44:08 -0700 Subject: [PATCH 1/4] benches: Add benchmarks This allows comparison of alternative implementaitons, and to the f64::sqrt() implementation. Signed-off-by: Joe Richey --- benches/sqrt.rs | 73 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 73 insertions(+) create mode 100644 benches/sqrt.rs diff --git a/benches/sqrt.rs b/benches/sqrt.rs new file mode 100644 index 0000000..693fc1c --- /dev/null +++ b/benches/sqrt.rs @@ -0,0 +1,73 @@ +#![feature(test)] + +extern crate test; +use test::{black_box, Bencher}; + +extern crate integer_sqrt; +use integer_sqrt::IntegerSquareRoot; + +// Use f64::sqrt to compute the integer sqrt +fn isqrt_via_f64(n: u64) -> u64 { + let cand = (n as f64).sqrt() as u64; + // Rounding can cause off-by-one errors + if let Some(prod) = cand.checked_mul(cand) { + if prod <= n { + return cand; + } + } + cand - 1 +} + +#[bench] +fn isqrt_small(b: &mut Bencher) { + let small = 63u64; + b.iter(|| { + let n = black_box(small); + assert_eq!(n.integer_sqrt_checked(), Some(7)); + }) +} + +#[bench] +fn isqrt_med(b: &mut Bencher) { + let med = 10_000_000_000u64; // 10^10 + b.iter(|| { + let n = black_box(med); + assert_eq!(n.integer_sqrt_checked(), Some(100_000)); // 10^5 + }) +} + +#[bench] +fn isqrt_large(b: &mut Bencher) { + let large = u64::MAX; + b.iter(|| { + let n = black_box(large); + assert_eq!(n.integer_sqrt_checked(), Some((1u64 << 32) - 1)); + }) +} + +#[bench] +fn isqrt_f64_small(b: &mut Bencher) { + let small = 63u64; + b.iter(|| { + let n = black_box(small); + assert_eq!(isqrt_via_f64(n), 7); + }) +} + +#[bench] +fn isqrt_f64_med(b: &mut Bencher) { + let med = 10_000_000_000u64; // 10^10 + b.iter(|| { + let n = black_box(med); + assert_eq!(isqrt_via_f64(n), 100_000); // 10^5 + }) +} + +#[bench] +fn isqrt_f64_large(b: &mut Bencher) { + let large = u64::MAX; + b.iter(|| { + let n = black_box(large); + assert_eq!(isqrt_via_f64(n), (1u64 << 32) - 1); + }) +} From 260148e029cd53adc42b1cd22988f64c3068d95f Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Mon, 7 Sep 2020 20:19:44 -0700 Subject: [PATCH 2/4] Simplify impl_isqrt macro We don't need multiple cases in the macro_rules! Signed-off-by: Joe Richey --- src/lib.rs | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index 0e637ce..ec0c129 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,11 +44,8 @@ pub trait IntegerSquareRoot { Self: Sized; } -// This could be more optimized macro_rules! impl_isqrt { - () => (); - ($t:ty) => {impl_isqrt!($t,);}; - ($t:ty, $($e:tt)*) => { + ($($t:ty)*) => { $( impl IntegerSquareRoot for $t { #[allow(unused_comparisons)] fn integer_sqrt_checked(&self) -> Option { @@ -86,12 +83,10 @@ macro_rules! impl_isqrt { Some(result) } } - - impl_isqrt!($($e)*); - }; + )* }; } -impl_isqrt!(usize, u128, u64, u32, u16, u8, isize, i128, i64, i32, i16, i8); +impl_isqrt!(usize u128 u64 u32 u16 u8 isize i128 i64 i32 i16 i8); #[cfg(test)] mod tests { From 0445b3e5c1972cfc0b62220f3c03750b0c263019 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Mon, 7 Sep 2020 20:10:09 -0700 Subject: [PATCH 3/4] Use improved algorithm that only uses shifts This new algorithm (taken from Wikipedia) only uses shifts, addditions, and subtrations. On my x86_64 machine, the benchmarks are over twice as fast. This also takes num-traits as a dependancy, so that the implementation can be a normal generic function, instread of a macro. Signed-off-by: Joe Richey --- Cargo.toml | 2 ++ src/lib.rs | 64 ++++++++++++++++++++++++++---------------------------- 2 files changed, 33 insertions(+), 33 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 61fd8ab..8aed8fe 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,3 +11,5 @@ keywords = ["integer", "square", "root", "isqrt", "sqrt"] categories = ["algorithms", "no-std"] license = "Apache-2.0/MIT" +[dependencies] +num-traits = "0.2" diff --git a/src/lib.rs b/src/lib.rs index ec0c129..bd65c40 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -44,43 +44,41 @@ pub trait IntegerSquareRoot { Self: Sized; } +#[inline(always)] +fn integer_sqrt_impl(mut n: T) -> Option { + // Hopefully this will be stripped for unsigned numbers (impossible condition) + if n < T::zero() { + return None; + } + + // Compute bit, the largest power of 4 <= n + use core::mem::size_of; + let mut bit = T::one().unsigned_shl(size_of::() as u32 * 8 - 2); + while bit > n { + bit = bit.unsigned_shr(2); + } + + // Algorithm based on the implementation in: + // https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Binary_numeral_system_(base_2) + // Note that result/bit are logically unsigned (even if T is signed). + let mut result = T::zero(); + while bit != T::zero() { + if n >= (result + bit) { + n = n - (result + bit); + result = result.unsigned_shr(1) + bit; + } else { + result = result.unsigned_shr(1); + } + bit = bit.unsigned_shr(2); + } + Some(result) +} + macro_rules! impl_isqrt { ($($t:ty)*) => { $( impl IntegerSquareRoot for $t { - #[allow(unused_comparisons)] fn integer_sqrt_checked(&self) -> Option { - // Hopefully this will be stripped for unsigned numbers (impossible condition) - if *self < 0 { - return None - } - // Find greatest shift - let mut shift = 2; - let mut n_shifted = *self >> shift; - // We check for n_shifted being self, since some implementations of logical - // right shifting shift modulo the word size. - while n_shifted != 0 && n_shifted != *self { - shift = shift + 2; - n_shifted = self.wrapping_shr(shift); - } - shift = shift - 2; - - // Find digits of result. - let mut result = 0; - loop { - result = result << 1; - let candidate_result: $t = result + 1; - if let Some(cr_square) = candidate_result.checked_mul(candidate_result) { - if cr_square <= *self >> shift { - result = candidate_result; - } - } - if shift == 0 { - break; - } - shift = shift.saturating_sub(2); - } - - Some(result) + integer_sqrt_impl(*self) } } )* }; From 1e02bdb415e4738e8908d9051f162b06733dc662 Mon Sep 17 00:00:00 2001 From: Joe Richey Date: Mon, 7 Sep 2020 20:44:32 -0700 Subject: [PATCH 4/4] Use leading_zeros to compute initial bit This increases performance on processors with lzcnt instructions. Signed-off-by: Joe Richey --- src/lib.rs | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/src/lib.rs b/src/lib.rs index bd65c40..9d93c55 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -46,17 +46,18 @@ pub trait IntegerSquareRoot { #[inline(always)] fn integer_sqrt_impl(mut n: T) -> Option { - // Hopefully this will be stripped for unsigned numbers (impossible condition) - if n < T::zero() { - return None; + use core::cmp::Ordering; + match n.cmp(&T::zero()) { + // Hopefully this will be stripped for unsigned numbers (impossible condition) + Ordering::Less => return None, + Ordering::Equal => return Some(T::zero()), + _ => {} } // Compute bit, the largest power of 4 <= n - use core::mem::size_of; - let mut bit = T::one().unsigned_shl(size_of::() as u32 * 8 - 2); - while bit > n { - bit = bit.unsigned_shr(2); - } + let max_shift: u32 = T::zero().leading_zeros() - 1; + let shift: u32 = (max_shift - n.leading_zeros()) & !1; + let mut bit = T::one().unsigned_shl(shift); // Algorithm based on the implementation in: // https://en.wikipedia.org/wiki/Methods_of_computing_square_roots#Binary_numeral_system_(base_2)