Use the SSE2 `_mm_movemask_ps` on x86 instead of the SSE4.1 `_mm_test_all_ones`

and `_mm_test_all_zeros`.

Partially addresses #241.
This commit is contained in:
Patrick Walton 2019-12-29 12:13:58 -08:00
parent 7f6374f110
commit 986792349f
7 changed files with 84 additions and 44 deletions

View File

@ -56,16 +56,13 @@ impl RectF {
pub fn contains_point(&self, point: Vector2F) -> bool {
// self.origin <= point && point <= self.lower_right
let point = point.0.to_f32x4();
self.0.concat_xy_xy(point).packed_le(point.concat_xy_zw(self.0)).is_all_ones()
self.0.concat_xy_xy(point).packed_le(point.concat_xy_zw(self.0)).all_true()
}
#[inline]
pub fn contains_rect(&self, other: RectF) -> bool {
// self.origin <= other.origin && other.lower_right <= self.lower_right
self.0
.concat_xy_zw(other.0)
.packed_le(other.0.concat_xy_zw(self.0))
.is_all_ones()
self.0.concat_xy_zw(other.0).packed_le(other.0.concat_xy_zw(self.0)).all_true()
}
#[inline]
@ -89,10 +86,7 @@ impl RectF {
#[inline]
pub fn intersects(&self, other: RectF) -> bool {
// self.origin < other.lower_right && other.origin < self.lower_right
self.0
.concat_xy_xy(other.0)
.packed_lt(other.0.concat_zw_zw(self.0))
.is_all_ones()
self.0.concat_xy_xy(other.0).packed_lt(other.0.concat_zw_zw(self.0)).all_true()
}
#[inline]
@ -230,7 +224,7 @@ impl RectI {
.0
.concat_xy_xy(point.0)
.packed_le(point.0.concat_xy_xy(lower_right.0))
.is_all_ones()
.all_true()
}
#[inline]

View File

@ -148,7 +148,7 @@ impl Vector2F {
impl PartialEq for Vector2F {
#[inline]
fn eq(&self, other: &Vector2F) -> bool {
self.0.packed_eq(other.0).is_all_ones()
self.0.packed_eq(other.0).all_true()
}
}
@ -261,7 +261,7 @@ impl Sub<Vector2I> for Vector2I {
impl PartialEq for Vector2I {
#[inline]
fn eq(&self, other: &Vector2I) -> bool {
self.0.packed_eq(other.0).is_all_ones()
self.0.packed_eq(other.0).all_true()
}
}

View File

@ -10,6 +10,9 @@
//! A vector that maintains sorted order with insertion sort.
use std::cmp::Ordering;
use std::convert;
#[derive(Clone, Debug)]
pub struct SortedVector<T>
where
@ -29,8 +32,9 @@ where
#[inline]
pub fn push(&mut self, value: T) {
use std::cmp::Ordering;
let index = self.array.binary_search_by(|other| other.partial_cmp(&value).unwrap_or(Ordering::Less)).unwrap_or_else(|x| x);
let index = self.array.binary_search_by(|other| {
other.partial_cmp(&value).unwrap_or(Ordering::Less)
}).unwrap_or_else(convert::identity);
self.array.insert(index, value);
}

View File

@ -377,7 +377,7 @@ impl Debug for F32x4 {
impl PartialEq for F32x4 {
#[inline]
fn eq(&self, other: &F32x4) -> bool {
self.packed_eq(*other).is_all_ones()
self.packed_eq(*other).all_true()
}
}
@ -452,7 +452,7 @@ impl Default for I32x2 {
impl PartialEq for I32x2 {
#[inline]
fn eq(&self, other: &I32x2) -> bool {
self.packed_eq(*other).is_all_ones()
self.packed_eq(*other).all_true()
}
}
@ -636,7 +636,7 @@ impl Mul<I32x4> for I32x4 {
impl PartialEq for I32x4 {
#[inline]
fn eq(&self, other: &I32x4) -> bool {
self.packed_eq(*other).is_all_ones()
self.packed_eq(*other).all_true()
}
}
@ -670,13 +670,21 @@ impl Shr<I32x4> for I32x4 {
pub struct U32x2(pub uint32x2_t);
impl U32x2 {
/// Returns true if both booleans in this vector are true.
///
/// The result is *undefined* if both values in this vector are not booleans. A boolean is a
/// value with all bits set or all bits clear (i.e. !0 or 0).
#[inline]
pub fn is_all_ones(&self) -> bool {
pub fn all_true(&self) -> bool {
unsafe { aarch64::vminv_u32(self.0) == !0 }
}
/// Returns true if both booleans in this vector are false.
///
/// The result is *undefined* if both values in this vector are not booleans. A boolean is a
/// value with all bits set or all bits clear (i.e. !0 or 0).
#[inline]
pub fn is_all_zeroes(&self) -> bool {
pub fn all_false(&self) -> bool {
unsafe { aarch64::vmaxv_u32(self.0) == 0 }
}
}
@ -699,13 +707,21 @@ impl Index<usize> for U32x2 {
pub struct U32x4(pub uint32x4_t);
impl U32x4 {
/// Returns true if all four booleans in this vector are true.
///
/// The result is *undefined* if all four values in this vector are not booleans. A boolean is
/// a value with all bits set or all bits clear (i.e. !0 or 0).
#[inline]
pub fn is_all_ones(&self) -> bool {
pub fn all_true(&self) -> bool {
unsafe { aarch64::vminvq_u32(self.0) == !0 }
}
/// Returns true if all four booleans in this vector are false.
///
/// The result is *undefined* if all four values in this vector are not booleans. A boolean is
/// a value with all bits set or all bits clear (i.e. !0 or 0).
#[inline]
pub fn is_all_zeroes(&self) -> bool {
pub fn all_false(&self) -> bool {
unsafe { aarch64::vmaxvq_u32(self.0) == 0 }
}
}

View File

@ -49,10 +49,7 @@ impl F32x2 {
#[inline]
pub fn approx_eq(self, other: F32x2, epsilon: f32) -> bool {
(self - other)
.abs()
.packed_gt(F32x2::splat(epsilon))
.is_all_zeroes()
(self - other).abs().packed_gt(F32x2::splat(epsilon)).all_false()
}
}
@ -143,10 +140,7 @@ impl F32x4 {
#[inline]
pub fn approx_eq(self, other: F32x4, epsilon: f32) -> bool {
(self - other)
.abs()
.packed_gt(F32x4::splat(epsilon))
.is_all_zeroes()
(self - other).abs().packed_gt(F32x4::splat(epsilon)).all_false()
}
}

View File

@ -700,13 +700,21 @@ impl Shr<I32x4> for I32x4 {
pub struct U32x2(pub [u32; 2]);
impl U32x2 {
/// Returns true if both booleans in this vector are true.
///
/// The result is *undefined* if both values in this vector are not booleans. A boolean is a
/// value with all bits set or all bits clear (i.e. !0 or 0).
#[inline]
pub fn is_all_ones(&self) -> bool {
pub fn all_true(&self) -> bool {
self[0] == !0 && self[1] == !0
}
/// Returns true if both booleans in this vector are false.
///
/// The result is *undefined* if both values in this vector are not booleans. A boolean is a
/// value with all bits set or all bits clear (i.e. !0 or 0).
#[inline]
pub fn is_all_zeroes(&self) -> bool {
pub fn all_false(&self) -> bool {
self[0] == 0 && self[1] == 0
}
}
@ -725,13 +733,21 @@ impl Index<usize> for U32x2 {
pub struct U32x4(pub [u32; 4]);
impl U32x4 {
/// Returns true if all four booleans in this vector are true.
///
/// The result is *undefined* if all four values in this vector are not booleans. A boolean is
/// a value with all bits set or all bits clear (i.e. !0 or 0).
#[inline]
pub fn is_all_ones(&self) -> bool {
pub fn all_true(&self) -> bool {
self[0] == !0 && self[1] == !0 && self[2] == !0 && self[3] == !0
}
/// Returns true if all four booleans in this vector are false.
///
/// The result is *undefined* if all four values in this vector are not booleans. A boolean is
/// a value with all bits set or all bits clear (i.e. !0 or 0).
#[inline]
pub fn is_all_zeroes(&self) -> bool {
pub fn all_false(&self) -> bool {
self[0] == 0 && self[1] == 0 && self[2] == 0 && self[3] == 0
}
}

View File

@ -172,7 +172,7 @@ impl Debug for F32x2 {
impl PartialEq for F32x2 {
#[inline]
fn eq(&self, other: &F32x2) -> bool {
self.packed_eq(*other).is_all_ones()
self.packed_eq(*other).all_true()
}
}
@ -406,7 +406,7 @@ impl Debug for F32x4 {
impl PartialEq for F32x4 {
#[inline]
fn eq(&self, other: &F32x4) -> bool {
self.packed_eq(*other).is_all_ones()
self.packed_eq(*other).all_true()
}
}
@ -563,7 +563,7 @@ impl Debug for I32x2 {
impl PartialEq for I32x2 {
#[inline]
fn eq(&self, other: &I32x2) -> bool {
self.packed_eq(*other).is_all_ones()
self.packed_eq(*other).all_true()
}
}
@ -742,7 +742,7 @@ impl Debug for I32x4 {
impl PartialEq for I32x4 {
#[inline]
fn eq(&self, other: &I32x4) -> bool {
self.packed_eq(*other).is_all_ones()
self.packed_eq(*other).all_true()
}
}
@ -752,13 +752,21 @@ impl PartialEq for I32x4 {
pub struct U32x2(pub u64);
impl U32x2 {
/// Returns true if both booleans in this vector are true.
///
/// The result is *undefined* if both values in this vector are not booleans. A boolean is a
/// value with all bits set or all bits clear (i.e. !0 or 0).
#[inline]
pub fn is_all_ones(self) -> bool {
pub fn all_true(self) -> bool {
self.0 == !0
}
/// Returns true if both booleans in this vector are false.
///
/// The result is *undefined* if both values in this vector are not booleans. A boolean is a
/// value with all bits set or all bits clear (i.e. !0 or 0).
#[inline]
pub fn is_all_zeroes(self) -> bool {
pub fn all_false(self) -> bool {
self.0 == 0
}
}
@ -786,14 +794,22 @@ impl U32x4 {
// Basic operations
/// Returns true if all four booleans in this vector are true.
///
/// The result is *undefined* if all four values in this vector are not booleans. A boolean is
/// a value with all bits set or all bits clear (i.e. !0 or 0).
#[inline]
pub fn is_all_ones(self) -> bool {
unsafe { x86_64::_mm_test_all_ones(self.0) != 0 }
pub fn all_true(self) -> bool {
unsafe { x86_64::_mm_movemask_ps(x86_64::_mm_castsi128_ps(self.0)) == 0x0f }
}
/// Returns true if all four booleans in this vector are false.
///
/// The result is *undefined* if all four values in this vector are not booleans. A boolean is
/// a value with all bits set or all bits clear (i.e. !0 or 0).
#[inline]
pub fn is_all_zeroes(self) -> bool {
unsafe { x86_64::_mm_test_all_zeros(self.0, self.0) != 0 }
pub fn all_false(self) -> bool {
unsafe { x86_64::_mm_movemask_ps(x86_64::_mm_castsi128_ps(self.0)) == 0x00 }
}
// Extraction
@ -829,7 +845,7 @@ impl Index<usize> for U32x4 {
impl PartialEq for U32x4 {
#[inline]
fn eq(&self, other: &U32x4) -> bool {
self.packed_eq(*other).is_all_ones()
self.packed_eq(*other).all_true()
}
}