From eb66459ef401576352de4d3e72cd64b588b7dfb3 Mon Sep 17 00:00:00 2001
From: Patrick Walton <pcwalton@mimiga.net>
Date: Sun, 29 Dec 2019 15:05:01 -0800
Subject: [PATCH] Fold the `.round()` method on SIMD vectors into float-to-int
 conversion.

This drops our requirements from SSE4.1 to SSE2.

Closes #241.
---
 content/src/color.rs   |  2 +-
 simd/src/arm/mod.rs    | 17 ++++-------------
 simd/src/scalar/mod.rs | 31 +++++++++----------------------
 simd/src/x86/mod.rs    | 14 ++------------
 4 files changed, 16 insertions(+), 48 deletions(-)

diff --git a/content/src/color.rs b/content/src/color.rs
index 5f3676be..c2090d90 100644
--- a/content/src/color.rs
+++ b/content/src/color.rs
@@ -96,7 +96,7 @@ impl ColorF {
 
     #[inline]
     pub fn to_u8(&self) -> ColorU {
-        let color = (self.0 * F32x4::splat(255.0)).round().to_i32x4();
+        let color = (self.0 * F32x4::splat(255.0)).to_i32x4();
         ColorU { r: color[0] as u8, g: color[1] as u8, b: color[2] as u8, a: color[3] as u8 }
     }
 
diff --git a/simd/src/arm/mod.rs b/simd/src/arm/mod.rs
index fe1013ca..a8503d56 100644
--- a/simd/src/arm/mod.rs
+++ b/simd/src/arm/mod.rs
@@ -73,11 +73,6 @@ impl F32x2 {
         unsafe { F32x2(ceil_v2f32(self.0)) }
     }
 
-    #[inline]
-    pub fn round(self) -> F32x2 {
-        unsafe { F32x2(round_v2f32(self.0)) }
-    }
-
     #[inline]
     pub fn sqrt(self) -> F32x2 {
         unsafe { F32x2(sqrt_v2f32(self.0)) }
@@ -112,9 +107,10 @@ impl F32x2 {
         self.concat_xy_xy(F32x2::default())
     }
 
+    /// Converts these packed floats to integers via rounding.
     #[inline]
     pub fn to_i32x2(self) -> I32x2 {
-        unsafe { I32x2(simd_cast(self.0)) }
+        unsafe { I32x2(simd_cast(round_v2f32(self.0))) }
     }
 
     #[inline]
@@ -252,11 +248,6 @@ impl F32x4 {
         unsafe { F32x4(ceil_v4f32(self.0)) }
     }
 
-    #[inline]
-    pub fn round(self) -> F32x4 {
-        unsafe { F32x4(round_v4f32(self.0)) }
-    }
-
     #[inline]
     pub fn sqrt(self) -> F32x4 {
         unsafe { F32x4(sqrt_v4f32(self.0)) }
@@ -330,10 +321,10 @@ impl F32x4 {
 
     // Conversions
 
-    // Converts these packed floats to integers.
+    /// Converts these packed floats to integers via rounding.
     #[inline]
     pub fn to_i32x4(self) -> I32x4 {
-        unsafe { I32x4(simd_cast(self.0)) }
+        unsafe { I32x4(round_v4f32(simd_cast(self.0))) }
     }
 }
 
diff --git a/simd/src/scalar/mod.rs b/simd/src/scalar/mod.rs
index 4fd81b1c..e2a4a646 100644
--- a/simd/src/scalar/mod.rs
+++ b/simd/src/scalar/mod.rs
@@ -70,11 +70,6 @@ impl F32x2 {
         F32x2([self[0].ceil(), self[1].ceil()])
     }
 
-    #[inline]
-    pub fn round(self) -> F32x2 {
-        F32x2([self[0].round(), self[1].round()])
-    }
-
     #[inline]
     pub fn sqrt(self) -> F32x2 {
         F32x2([self[0].sqrt(), self[1].sqrt()])
@@ -121,14 +116,16 @@ impl F32x2 {
         F32x4([self[0] as f32, self[1] as f32, 0.0, 0.0])
     }
 
+    /// Converts these packed floats to integers via rounding.
     #[inline]
     pub fn to_i32x2(self) -> I32x2 {
-        I32x2([self[0] as i32, self[1] as i32])
+        I32x2([self[0].round() as i32, self[1].round() as i32])
     }
 
+    /// Converts these packed floats to integers via rounding.
     #[inline]
     pub fn to_i32x4(self) -> I32x4 {
-        I32x4([self[0] as i32, self[1] as i32, 0, 0])
+        I32x4([self[0].round() as i32, self[1].round() as i32, 0, 0])
     }
 
     // Swizzle
@@ -258,16 +255,6 @@ impl F32x4 {
         ])
     }
 
-    #[inline]
-    pub fn round(self) -> F32x4 {
-        F32x4([
-            self[0].round(),
-            self[1].round(),
-            self[2].round(),
-            self[3].round(),
-        ])
-    }
-
     #[inline]
     pub fn sqrt(self) -> F32x4 {
         F32x4([
@@ -320,14 +307,14 @@ impl F32x4 {
         ])
     }
 
-    // Converts these packed floats to integers.
+    /// Converts these packed floats to integers via rounding.
     #[inline]
     pub fn to_i32x4(self) -> I32x4 {
         I32x4([
-            self[0] as i32,
-            self[1] as i32,
-            self[2] as i32,
-            self[3] as i32,
+            self[0].round() as i32,
+            self[1].round() as i32,
+            self[2].round() as i32,
+            self[3].round() as i32,
         ])
     }
 
diff --git a/simd/src/x86/mod.rs b/simd/src/x86/mod.rs
index 189b7c3a..0fa97916 100644
--- a/simd/src/x86/mod.rs
+++ b/simd/src/x86/mod.rs
@@ -8,7 +8,7 @@
 // option. This file may not be copied, modified, or distributed
 // except according to those terms.
 
-use std::arch::x86_64::{self, __m128, __m128i, _MM_FROUND_TO_NEAREST_INT};
+use std::arch::x86_64::{self, __m128, __m128i};
 use std::cmp::PartialEq;
 use std::fmt::{self, Debug, Formatter};
 use std::mem;
@@ -76,11 +76,6 @@ impl F32x2 {
         self.to_f32x4().ceil().xy()
     }
 
-    #[inline]
-    pub fn round(self) -> F32x2 {
-        self.to_f32x4().round().xy()
-    }
-
     #[inline]
     pub fn sqrt(self) -> F32x2 {
         self.to_f32x4().sqrt().xy()
@@ -261,11 +256,6 @@ impl F32x4 {
         unsafe { F32x4(x86_64::_mm_ceil_ps(self.0)) }
     }
 
-    #[inline]
-    pub fn round(self) -> F32x4 {
-        unsafe { F32x4(x86_64::_mm_round_ps(self.0, _MM_FROUND_TO_NEAREST_INT)) }
-    }
-
     #[inline]
     pub fn sqrt(self) -> F32x4 {
         unsafe { F32x4(x86_64::_mm_sqrt_ps(self.0)) }
@@ -303,7 +293,7 @@ impl F32x4 {
 
     // Conversions
 
-    /// Converts these packed floats to integers.
+    /// Converts these packed floats to integers via rounding.
     #[inline]
     pub fn to_i32x4(self) -> I32x4 {
         unsafe { I32x4(x86_64::_mm_cvtps_epi32(self.0)) }