From eb66459ef401576352de4d3e72cd64b588b7dfb3 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Sun, 29 Dec 2019 15:05:01 -0800 Subject: [PATCH] Fold the `.round()` method on SIMD vectors into float-to-int conversion. This drops our requirements from SSE4.1 to SSE2. Closes #241. --- content/src/color.rs | 2 +- simd/src/arm/mod.rs | 17 ++++------------- simd/src/scalar/mod.rs | 31 +++++++++---------------------- simd/src/x86/mod.rs | 14 ++------------ 4 files changed, 16 insertions(+), 48 deletions(-) diff --git a/content/src/color.rs b/content/src/color.rs index 5f3676be..c2090d90 100644 --- a/content/src/color.rs +++ b/content/src/color.rs @@ -96,7 +96,7 @@ impl ColorF { #[inline] pub fn to_u8(&self) -> ColorU { - let color = (self.0 * F32x4::splat(255.0)).round().to_i32x4(); + let color = (self.0 * F32x4::splat(255.0)).to_i32x4(); ColorU { r: color[0] as u8, g: color[1] as u8, b: color[2] as u8, a: color[3] as u8 } } diff --git a/simd/src/arm/mod.rs b/simd/src/arm/mod.rs index fe1013ca..a8503d56 100644 --- a/simd/src/arm/mod.rs +++ b/simd/src/arm/mod.rs @@ -73,11 +73,6 @@ impl F32x2 { unsafe { F32x2(ceil_v2f32(self.0)) } } - #[inline] - pub fn round(self) -> F32x2 { - unsafe { F32x2(round_v2f32(self.0)) } - } - #[inline] pub fn sqrt(self) -> F32x2 { unsafe { F32x2(sqrt_v2f32(self.0)) } @@ -112,9 +107,10 @@ impl F32x2 { self.concat_xy_xy(F32x2::default()) } + /// Converts these packed floats to integers via rounding. #[inline] pub fn to_i32x2(self) -> I32x2 { - unsafe { I32x2(simd_cast(self.0)) } + unsafe { I32x2(simd_cast(round_v2f32(self.0))) } } #[inline] @@ -252,11 +248,6 @@ impl F32x4 { unsafe { F32x4(ceil_v4f32(self.0)) } } - #[inline] - pub fn round(self) -> F32x4 { - unsafe { F32x4(round_v4f32(self.0)) } - } - #[inline] pub fn sqrt(self) -> F32x4 { unsafe { F32x4(sqrt_v4f32(self.0)) } @@ -330,10 +321,10 @@ impl F32x4 { // Conversions - // Converts these packed floats to integers. + /// Converts these packed floats to integers via rounding. #[inline] pub fn to_i32x4(self) -> I32x4 { - unsafe { I32x4(simd_cast(self.0)) } + unsafe { I32x4(round_v4f32(simd_cast(self.0))) } } } diff --git a/simd/src/scalar/mod.rs b/simd/src/scalar/mod.rs index 4fd81b1c..e2a4a646 100644 --- a/simd/src/scalar/mod.rs +++ b/simd/src/scalar/mod.rs @@ -70,11 +70,6 @@ impl F32x2 { F32x2([self[0].ceil(), self[1].ceil()]) } - #[inline] - pub fn round(self) -> F32x2 { - F32x2([self[0].round(), self[1].round()]) - } - #[inline] pub fn sqrt(self) -> F32x2 { F32x2([self[0].sqrt(), self[1].sqrt()]) @@ -121,14 +116,16 @@ impl F32x2 { F32x4([self[0] as f32, self[1] as f32, 0.0, 0.0]) } + /// Converts these packed floats to integers via rounding. #[inline] pub fn to_i32x2(self) -> I32x2 { - I32x2([self[0] as i32, self[1] as i32]) + I32x2([self[0].round() as i32, self[1].round() as i32]) } + /// Converts these packed floats to integers via rounding. #[inline] pub fn to_i32x4(self) -> I32x4 { - I32x4([self[0] as i32, self[1] as i32, 0, 0]) + I32x4([self[0].round() as i32, self[1].round() as i32, 0, 0]) } // Swizzle @@ -258,16 +255,6 @@ impl F32x4 { ]) } - #[inline] - pub fn round(self) -> F32x4 { - F32x4([ - self[0].round(), - self[1].round(), - self[2].round(), - self[3].round(), - ]) - } - #[inline] pub fn sqrt(self) -> F32x4 { F32x4([ @@ -320,14 +307,14 @@ impl F32x4 { ]) } - // Converts these packed floats to integers. + /// Converts these packed floats to integers via rounding. #[inline] pub fn to_i32x4(self) -> I32x4 { I32x4([ - self[0] as i32, - self[1] as i32, - self[2] as i32, - self[3] as i32, + self[0].round() as i32, + self[1].round() as i32, + self[2].round() as i32, + self[3].round() as i32, ]) } diff --git a/simd/src/x86/mod.rs b/simd/src/x86/mod.rs index 189b7c3a..0fa97916 100644 --- a/simd/src/x86/mod.rs +++ b/simd/src/x86/mod.rs @@ -8,7 +8,7 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::arch::x86_64::{self, __m128, __m128i, _MM_FROUND_TO_NEAREST_INT}; +use std::arch::x86_64::{self, __m128, __m128i}; use std::cmp::PartialEq; use std::fmt::{self, Debug, Formatter}; use std::mem; @@ -76,11 +76,6 @@ impl F32x2 { self.to_f32x4().ceil().xy() } - #[inline] - pub fn round(self) -> F32x2 { - self.to_f32x4().round().xy() - } - #[inline] pub fn sqrt(self) -> F32x2 { self.to_f32x4().sqrt().xy() @@ -261,11 +256,6 @@ impl F32x4 { unsafe { F32x4(x86_64::_mm_ceil_ps(self.0)) } } - #[inline] - pub fn round(self) -> F32x4 { - unsafe { F32x4(x86_64::_mm_round_ps(self.0, _MM_FROUND_TO_NEAREST_INT)) } - } - #[inline] pub fn sqrt(self) -> F32x4 { unsafe { F32x4(x86_64::_mm_sqrt_ps(self.0)) } @@ -303,7 +293,7 @@ impl F32x4 { // Conversions - /// Converts these packed floats to integers. + /// Converts these packed floats to integers via rounding. #[inline] pub fn to_i32x4(self) -> I32x4 { unsafe { I32x4(x86_64::_mm_cvtps_epi32(self.0)) }