From 986792349fd9647ec6d1c6e94a929d2eb9d63365 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Sun, 29 Dec 2019 12:13:58 -0800 Subject: [PATCH] Use the SSE2 `_mm_movemask_ps` on x86 instead of the SSE4.1 `_mm_test_all_ones` and `_mm_test_all_zeros`. Partially addresses #241. --- geometry/src/rect.rs | 14 ++++--------- geometry/src/vector.rs | 4 ++-- renderer/src/sorted_vector.rs | 8 ++++++-- simd/src/arm/mod.rs | 30 ++++++++++++++++++++------- simd/src/extras.rs | 10 ++------- simd/src/scalar/mod.rs | 24 ++++++++++++++++++---- simd/src/x86/mod.rs | 38 +++++++++++++++++++++++++---------- 7 files changed, 84 insertions(+), 44 deletions(-) diff --git a/geometry/src/rect.rs b/geometry/src/rect.rs index 993ef3fc..106a2191 100644 --- a/geometry/src/rect.rs +++ b/geometry/src/rect.rs @@ -56,16 +56,13 @@ impl RectF { pub fn contains_point(&self, point: Vector2F) -> bool { // self.origin <= point && point <= self.lower_right let point = point.0.to_f32x4(); - self.0.concat_xy_xy(point).packed_le(point.concat_xy_zw(self.0)).is_all_ones() + self.0.concat_xy_xy(point).packed_le(point.concat_xy_zw(self.0)).all_true() } #[inline] pub fn contains_rect(&self, other: RectF) -> bool { // self.origin <= other.origin && other.lower_right <= self.lower_right - self.0 - .concat_xy_zw(other.0) - .packed_le(other.0.concat_xy_zw(self.0)) - .is_all_ones() + self.0.concat_xy_zw(other.0).packed_le(other.0.concat_xy_zw(self.0)).all_true() } #[inline] @@ -89,10 +86,7 @@ impl RectF { #[inline] pub fn intersects(&self, other: RectF) -> bool { // self.origin < other.lower_right && other.origin < self.lower_right - self.0 - .concat_xy_xy(other.0) - .packed_lt(other.0.concat_zw_zw(self.0)) - .is_all_ones() + self.0.concat_xy_xy(other.0).packed_lt(other.0.concat_zw_zw(self.0)).all_true() } #[inline] @@ -230,7 +224,7 @@ impl RectI { .0 .concat_xy_xy(point.0) .packed_le(point.0.concat_xy_xy(lower_right.0)) - .is_all_ones() + .all_true() } #[inline] diff --git a/geometry/src/vector.rs b/geometry/src/vector.rs index 80e1f1ef..a848e87c 100644 --- a/geometry/src/vector.rs +++ b/geometry/src/vector.rs @@ -148,7 +148,7 @@ impl Vector2F { impl PartialEq for Vector2F { #[inline] fn eq(&self, other: &Vector2F) -> bool { - self.0.packed_eq(other.0).is_all_ones() + self.0.packed_eq(other.0).all_true() } } @@ -261,7 +261,7 @@ impl Sub for Vector2I { impl PartialEq for Vector2I { #[inline] fn eq(&self, other: &Vector2I) -> bool { - self.0.packed_eq(other.0).is_all_ones() + self.0.packed_eq(other.0).all_true() } } diff --git a/renderer/src/sorted_vector.rs b/renderer/src/sorted_vector.rs index d0fa1172..1b77c25b 100644 --- a/renderer/src/sorted_vector.rs +++ b/renderer/src/sorted_vector.rs @@ -10,6 +10,9 @@ //! A vector that maintains sorted order with insertion sort. +use std::cmp::Ordering; +use std::convert; + #[derive(Clone, Debug)] pub struct SortedVector where @@ -29,8 +32,9 @@ where #[inline] pub fn push(&mut self, value: T) { - use std::cmp::Ordering; - let index = self.array.binary_search_by(|other| other.partial_cmp(&value).unwrap_or(Ordering::Less)).unwrap_or_else(|x| x); + let index = self.array.binary_search_by(|other| { + other.partial_cmp(&value).unwrap_or(Ordering::Less) + }).unwrap_or_else(convert::identity); self.array.insert(index, value); } diff --git a/simd/src/arm/mod.rs b/simd/src/arm/mod.rs index e1705812..fe1013ca 100644 --- a/simd/src/arm/mod.rs +++ b/simd/src/arm/mod.rs @@ -377,7 +377,7 @@ impl Debug for F32x4 { impl PartialEq for F32x4 { #[inline] fn eq(&self, other: &F32x4) -> bool { - self.packed_eq(*other).is_all_ones() + self.packed_eq(*other).all_true() } } @@ -452,7 +452,7 @@ impl Default for I32x2 { impl PartialEq for I32x2 { #[inline] fn eq(&self, other: &I32x2) -> bool { - self.packed_eq(*other).is_all_ones() + self.packed_eq(*other).all_true() } } @@ -636,7 +636,7 @@ impl Mul for I32x4 { impl PartialEq for I32x4 { #[inline] fn eq(&self, other: &I32x4) -> bool { - self.packed_eq(*other).is_all_ones() + self.packed_eq(*other).all_true() } } @@ -670,13 +670,21 @@ impl Shr for I32x4 { pub struct U32x2(pub uint32x2_t); impl U32x2 { + /// Returns true if both booleans in this vector are true. + /// + /// The result is *undefined* if both values in this vector are not booleans. A boolean is a + /// value with all bits set or all bits clear (i.e. !0 or 0). #[inline] - pub fn is_all_ones(&self) -> bool { + pub fn all_true(&self) -> bool { unsafe { aarch64::vminv_u32(self.0) == !0 } } + /// Returns true if both booleans in this vector are false. + /// + /// The result is *undefined* if both values in this vector are not booleans. A boolean is a + /// value with all bits set or all bits clear (i.e. !0 or 0). #[inline] - pub fn is_all_zeroes(&self) -> bool { + pub fn all_false(&self) -> bool { unsafe { aarch64::vmaxv_u32(self.0) == 0 } } } @@ -699,13 +707,21 @@ impl Index for U32x2 { pub struct U32x4(pub uint32x4_t); impl U32x4 { + /// Returns true if all four booleans in this vector are true. + /// + /// The result is *undefined* if all four values in this vector are not booleans. A boolean is + /// a value with all bits set or all bits clear (i.e. !0 or 0). #[inline] - pub fn is_all_ones(&self) -> bool { + pub fn all_true(&self) -> bool { unsafe { aarch64::vminvq_u32(self.0) == !0 } } + /// Returns true if all four booleans in this vector are false. + /// + /// The result is *undefined* if all four values in this vector are not booleans. A boolean is + /// a value with all bits set or all bits clear (i.e. !0 or 0). #[inline] - pub fn is_all_zeroes(&self) -> bool { + pub fn all_false(&self) -> bool { unsafe { aarch64::vmaxvq_u32(self.0) == 0 } } } diff --git a/simd/src/extras.rs b/simd/src/extras.rs index 55e265cf..1e247e4f 100644 --- a/simd/src/extras.rs +++ b/simd/src/extras.rs @@ -49,10 +49,7 @@ impl F32x2 { #[inline] pub fn approx_eq(self, other: F32x2, epsilon: f32) -> bool { - (self - other) - .abs() - .packed_gt(F32x2::splat(epsilon)) - .is_all_zeroes() + (self - other).abs().packed_gt(F32x2::splat(epsilon)).all_false() } } @@ -143,10 +140,7 @@ impl F32x4 { #[inline] pub fn approx_eq(self, other: F32x4, epsilon: f32) -> bool { - (self - other) - .abs() - .packed_gt(F32x4::splat(epsilon)) - .is_all_zeroes() + (self - other).abs().packed_gt(F32x4::splat(epsilon)).all_false() } } diff --git a/simd/src/scalar/mod.rs b/simd/src/scalar/mod.rs index 867dfe4b..21ee5b5f 100644 --- a/simd/src/scalar/mod.rs +++ b/simd/src/scalar/mod.rs @@ -700,13 +700,21 @@ impl Shr for I32x4 { pub struct U32x2(pub [u32; 2]); impl U32x2 { + /// Returns true if both booleans in this vector are true. + /// + /// The result is *undefined* if both values in this vector are not booleans. A boolean is a + /// value with all bits set or all bits clear (i.e. !0 or 0). #[inline] - pub fn is_all_ones(&self) -> bool { + pub fn all_true(&self) -> bool { self[0] == !0 && self[1] == !0 } + /// Returns true if both booleans in this vector are false. + /// + /// The result is *undefined* if both values in this vector are not booleans. A boolean is a + /// value with all bits set or all bits clear (i.e. !0 or 0). #[inline] - pub fn is_all_zeroes(&self) -> bool { + pub fn all_false(&self) -> bool { self[0] == 0 && self[1] == 0 } } @@ -725,13 +733,21 @@ impl Index for U32x2 { pub struct U32x4(pub [u32; 4]); impl U32x4 { + /// Returns true if all four booleans in this vector are true. + /// + /// The result is *undefined* if all four values in this vector are not booleans. A boolean is + /// a value with all bits set or all bits clear (i.e. !0 or 0). #[inline] - pub fn is_all_ones(&self) -> bool { + pub fn all_true(&self) -> bool { self[0] == !0 && self[1] == !0 && self[2] == !0 && self[3] == !0 } + /// Returns true if all four booleans in this vector are false. + /// + /// The result is *undefined* if all four values in this vector are not booleans. A boolean is + /// a value with all bits set or all bits clear (i.e. !0 or 0). #[inline] - pub fn is_all_zeroes(&self) -> bool { + pub fn all_false(&self) -> bool { self[0] == 0 && self[1] == 0 && self[2] == 0 && self[3] == 0 } } diff --git a/simd/src/x86/mod.rs b/simd/src/x86/mod.rs index 41e86e99..4d896e4e 100644 --- a/simd/src/x86/mod.rs +++ b/simd/src/x86/mod.rs @@ -172,7 +172,7 @@ impl Debug for F32x2 { impl PartialEq for F32x2 { #[inline] fn eq(&self, other: &F32x2) -> bool { - self.packed_eq(*other).is_all_ones() + self.packed_eq(*other).all_true() } } @@ -406,7 +406,7 @@ impl Debug for F32x4 { impl PartialEq for F32x4 { #[inline] fn eq(&self, other: &F32x4) -> bool { - self.packed_eq(*other).is_all_ones() + self.packed_eq(*other).all_true() } } @@ -563,7 +563,7 @@ impl Debug for I32x2 { impl PartialEq for I32x2 { #[inline] fn eq(&self, other: &I32x2) -> bool { - self.packed_eq(*other).is_all_ones() + self.packed_eq(*other).all_true() } } @@ -742,7 +742,7 @@ impl Debug for I32x4 { impl PartialEq for I32x4 { #[inline] fn eq(&self, other: &I32x4) -> bool { - self.packed_eq(*other).is_all_ones() + self.packed_eq(*other).all_true() } } @@ -752,13 +752,21 @@ impl PartialEq for I32x4 { pub struct U32x2(pub u64); impl U32x2 { + /// Returns true if both booleans in this vector are true. + /// + /// The result is *undefined* if both values in this vector are not booleans. A boolean is a + /// value with all bits set or all bits clear (i.e. !0 or 0). #[inline] - pub fn is_all_ones(self) -> bool { + pub fn all_true(self) -> bool { self.0 == !0 } + /// Returns true if both booleans in this vector are false. + /// + /// The result is *undefined* if both values in this vector are not booleans. A boolean is a + /// value with all bits set or all bits clear (i.e. !0 or 0). #[inline] - pub fn is_all_zeroes(self) -> bool { + pub fn all_false(self) -> bool { self.0 == 0 } } @@ -786,14 +794,22 @@ impl U32x4 { // Basic operations + /// Returns true if all four booleans in this vector are true. + /// + /// The result is *undefined* if all four values in this vector are not booleans. A boolean is + /// a value with all bits set or all bits clear (i.e. !0 or 0). #[inline] - pub fn is_all_ones(self) -> bool { - unsafe { x86_64::_mm_test_all_ones(self.0) != 0 } + pub fn all_true(self) -> bool { + unsafe { x86_64::_mm_movemask_ps(x86_64::_mm_castsi128_ps(self.0)) == 0x0f } } + /// Returns true if all four booleans in this vector are false. + /// + /// The result is *undefined* if all four values in this vector are not booleans. A boolean is + /// a value with all bits set or all bits clear (i.e. !0 or 0). #[inline] - pub fn is_all_zeroes(self) -> bool { - unsafe { x86_64::_mm_test_all_zeros(self.0, self.0) != 0 } + pub fn all_false(self) -> bool { + unsafe { x86_64::_mm_movemask_ps(x86_64::_mm_castsi128_ps(self.0)) == 0x00 } } // Extraction @@ -829,7 +845,7 @@ impl Index for U32x4 { impl PartialEq for U32x4 { #[inline] fn eq(&self, other: &U32x4) -> bool { - self.packed_eq(*other).is_all_ones() + self.packed_eq(*other).all_true() } }