Fold the `.round()` method on SIMD vectors into float-to-int conversion.

This drops our requirements from SSE4.1 to SSE2. Closes #241.
2019-12-29 15:05:01 -08:00 · 2019-12-29 15:05:01 -08:00 · eb66459ef4
parent 0bb6b88859
commit eb66459ef4
4 changed files with 16 additions and 48 deletions
--- a/content/src/color.rs
+++ b/content/src/color.rs
@ -96,7 +96,7 @@ impl ColorF {
    #[inline]
    pub fn to_u8(&self) -> ColorU {
-        let color = (self.0 * F32x4::splat(255.0)).round().to_i32x4();
+        let color = (self.0 * F32x4::splat(255.0)).to_i32x4();
        ColorU { r: color[0] as u8, g: color[1] as u8, b: color[2] as u8, a: color[3] as u8 }
    }
--- a/simd/src/arm/mod.rs
+++ b/simd/src/arm/mod.rs
@ -73,11 +73,6 @@ impl F32x2 {
        unsafe { F32x2(ceil_v2f32(self.0)) }
    }
    #[inline]
    pub fn round(self) -> F32x2 {
        unsafe { F32x2(round_v2f32(self.0)) }
    }
    #[inline]
    pub fn sqrt(self) -> F32x2 {
        unsafe { F32x2(sqrt_v2f32(self.0)) }
@ -112,9 +107,10 @@ impl F32x2 {
        self.concat_xy_xy(F32x2::default())
    }
    /// Converts these packed floats to integers via rounding.
    #[inline]
    pub fn to_i32x2(self) -> I32x2 {
-        unsafe { I32x2(simd_cast(self.0)) }
+        unsafe { I32x2(simd_cast(round_v2f32(self.0))) }
    }
    #[inline]
@ -252,11 +248,6 @@ impl F32x4 {
        unsafe { F32x4(ceil_v4f32(self.0)) }
    }
    #[inline]
    pub fn round(self) -> F32x4 {
        unsafe { F32x4(round_v4f32(self.0)) }
    }
    #[inline]
    pub fn sqrt(self) -> F32x4 {
        unsafe { F32x4(sqrt_v4f32(self.0)) }
@ -330,10 +321,10 @@ impl F32x4 {
    // Conversions
-    // Converts these packed floats to integers.
+    /// Converts these packed floats to integers via rounding.
    #[inline]
    pub fn to_i32x4(self) -> I32x4 {
-        unsafe { I32x4(simd_cast(self.0)) }
+        unsafe { I32x4(round_v4f32(simd_cast(self.0))) }
    }
 }
--- a/simd/src/scalar/mod.rs
+++ b/simd/src/scalar/mod.rs
@ -70,11 +70,6 @@ impl F32x2 {
        F32x2([self[0].ceil(), self[1].ceil()])
    }
    #[inline]
    pub fn round(self) -> F32x2 {
        F32x2([self[0].round(), self[1].round()])
    }
    #[inline]
    pub fn sqrt(self) -> F32x2 {
        F32x2([self[0].sqrt(), self[1].sqrt()])
@ -121,14 +116,16 @@ impl F32x2 {
        F32x4([self[0] as f32, self[1] as f32, 0.0, 0.0])
    }
    /// Converts these packed floats to integers via rounding.
    #[inline]
    pub fn to_i32x2(self) -> I32x2 {
-        I32x2([self[0] as i32, self[1] as i32])
+        I32x2([self[0].round() as i32, self[1].round() as i32])
    }
    /// Converts these packed floats to integers via rounding.
    #[inline]
    pub fn to_i32x4(self) -> I32x4 {
-        I32x4([self[0] as i32, self[1] as i32, 0, 0])
+        I32x4([self[0].round() as i32, self[1].round() as i32, 0, 0])
    }
    // Swizzle
@ -258,16 +255,6 @@ impl F32x4 {
        ])
    }
    #[inline]
    pub fn round(self) -> F32x4 {
        F32x4([
            self[0].round(),
            self[1].round(),
            self[2].round(),
            self[3].round(),
        ])
    }
    #[inline]
    pub fn sqrt(self) -> F32x4 {
        F32x4([
@ -320,14 +307,14 @@ impl F32x4 {
        ])
    }
-    // Converts these packed floats to integers.
+    /// Converts these packed floats to integers via rounding.
    #[inline]
    pub fn to_i32x4(self) -> I32x4 {
        I32x4([
-            self[0] as i32,
+            self[0].round() as i32,
-            self[1] as i32,
+            self[1].round() as i32,
-            self[2] as i32,
+            self[2].round() as i32,
-            self[3] as i32,
+            self[3].round() as i32,
        ])
    }
--- a/simd/src/x86/mod.rs
+++ b/simd/src/x86/mod.rs
@ -8,7 +8,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
-use std::arch::x86_64::{self, __m128, __m128i, _MM_FROUND_TO_NEAREST_INT};
+use std::arch::x86_64::{self, __m128, __m128i};
 use std::cmp::PartialEq;
 use std::fmt::{self, Debug, Formatter};
 use std::mem;
@ -76,11 +76,6 @@ impl F32x2 {
        self.to_f32x4().ceil().xy()
    }
    #[inline]
    pub fn round(self) -> F32x2 {
        self.to_f32x4().round().xy()
    }
    #[inline]
    pub fn sqrt(self) -> F32x2 {
        self.to_f32x4().sqrt().xy()
@ -261,11 +256,6 @@ impl F32x4 {
        unsafe { F32x4(x86_64::_mm_ceil_ps(self.0)) }
    }
    #[inline]
    pub fn round(self) -> F32x4 {
        unsafe { F32x4(x86_64::_mm_round_ps(self.0, _MM_FROUND_TO_NEAREST_INT)) }
    }
    #[inline]
    pub fn sqrt(self) -> F32x4 {
        unsafe { F32x4(x86_64::_mm_sqrt_ps(self.0)) }
@ -303,7 +293,7 @@ impl F32x4 {
    // Conversions
-    /// Converts these packed floats to integers.
+    /// Converts these packed floats to integers via rounding.
    #[inline]
    pub fn to_i32x4(self) -> I32x4 {
        unsafe { I32x4(x86_64::_mm_cvtps_epi32(self.0)) }