Use the SSE2 `_mm_movemask_ps` on x86 instead of the SSE4.1 `_mm_test_all_ones`
and `_mm_test_all_zeros`. Partially addresses #241.
This commit is contained in:
parent
7f6374f110
commit
986792349f
|
@ -56,16 +56,13 @@ impl RectF {
|
||||||
pub fn contains_point(&self, point: Vector2F) -> bool {
|
pub fn contains_point(&self, point: Vector2F) -> bool {
|
||||||
// self.origin <= point && point <= self.lower_right
|
// self.origin <= point && point <= self.lower_right
|
||||||
let point = point.0.to_f32x4();
|
let point = point.0.to_f32x4();
|
||||||
self.0.concat_xy_xy(point).packed_le(point.concat_xy_zw(self.0)).is_all_ones()
|
self.0.concat_xy_xy(point).packed_le(point.concat_xy_zw(self.0)).all_true()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn contains_rect(&self, other: RectF) -> bool {
|
pub fn contains_rect(&self, other: RectF) -> bool {
|
||||||
// self.origin <= other.origin && other.lower_right <= self.lower_right
|
// self.origin <= other.origin && other.lower_right <= self.lower_right
|
||||||
self.0
|
self.0.concat_xy_zw(other.0).packed_le(other.0.concat_xy_zw(self.0)).all_true()
|
||||||
.concat_xy_zw(other.0)
|
|
||||||
.packed_le(other.0.concat_xy_zw(self.0))
|
|
||||||
.is_all_ones()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -89,10 +86,7 @@ impl RectF {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn intersects(&self, other: RectF) -> bool {
|
pub fn intersects(&self, other: RectF) -> bool {
|
||||||
// self.origin < other.lower_right && other.origin < self.lower_right
|
// self.origin < other.lower_right && other.origin < self.lower_right
|
||||||
self.0
|
self.0.concat_xy_xy(other.0).packed_lt(other.0.concat_zw_zw(self.0)).all_true()
|
||||||
.concat_xy_xy(other.0)
|
|
||||||
.packed_lt(other.0.concat_zw_zw(self.0))
|
|
||||||
.is_all_ones()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -230,7 +224,7 @@ impl RectI {
|
||||||
.0
|
.0
|
||||||
.concat_xy_xy(point.0)
|
.concat_xy_xy(point.0)
|
||||||
.packed_le(point.0.concat_xy_xy(lower_right.0))
|
.packed_le(point.0.concat_xy_xy(lower_right.0))
|
||||||
.is_all_ones()
|
.all_true()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
|
|
@ -148,7 +148,7 @@ impl Vector2F {
|
||||||
impl PartialEq for Vector2F {
|
impl PartialEq for Vector2F {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn eq(&self, other: &Vector2F) -> bool {
|
fn eq(&self, other: &Vector2F) -> bool {
|
||||||
self.0.packed_eq(other.0).is_all_ones()
|
self.0.packed_eq(other.0).all_true()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -261,7 +261,7 @@ impl Sub<Vector2I> for Vector2I {
|
||||||
impl PartialEq for Vector2I {
|
impl PartialEq for Vector2I {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn eq(&self, other: &Vector2I) -> bool {
|
fn eq(&self, other: &Vector2I) -> bool {
|
||||||
self.0.packed_eq(other.0).is_all_ones()
|
self.0.packed_eq(other.0).all_true()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,6 +10,9 @@
|
||||||
|
|
||||||
//! A vector that maintains sorted order with insertion sort.
|
//! A vector that maintains sorted order with insertion sort.
|
||||||
|
|
||||||
|
use std::cmp::Ordering;
|
||||||
|
use std::convert;
|
||||||
|
|
||||||
#[derive(Clone, Debug)]
|
#[derive(Clone, Debug)]
|
||||||
pub struct SortedVector<T>
|
pub struct SortedVector<T>
|
||||||
where
|
where
|
||||||
|
@ -29,8 +32,9 @@ where
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn push(&mut self, value: T) {
|
pub fn push(&mut self, value: T) {
|
||||||
use std::cmp::Ordering;
|
let index = self.array.binary_search_by(|other| {
|
||||||
let index = self.array.binary_search_by(|other| other.partial_cmp(&value).unwrap_or(Ordering::Less)).unwrap_or_else(|x| x);
|
other.partial_cmp(&value).unwrap_or(Ordering::Less)
|
||||||
|
}).unwrap_or_else(convert::identity);
|
||||||
self.array.insert(index, value);
|
self.array.insert(index, value);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -377,7 +377,7 @@ impl Debug for F32x4 {
|
||||||
impl PartialEq for F32x4 {
|
impl PartialEq for F32x4 {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn eq(&self, other: &F32x4) -> bool {
|
fn eq(&self, other: &F32x4) -> bool {
|
||||||
self.packed_eq(*other).is_all_ones()
|
self.packed_eq(*other).all_true()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -452,7 +452,7 @@ impl Default for I32x2 {
|
||||||
impl PartialEq for I32x2 {
|
impl PartialEq for I32x2 {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn eq(&self, other: &I32x2) -> bool {
|
fn eq(&self, other: &I32x2) -> bool {
|
||||||
self.packed_eq(*other).is_all_ones()
|
self.packed_eq(*other).all_true()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -636,7 +636,7 @@ impl Mul<I32x4> for I32x4 {
|
||||||
impl PartialEq for I32x4 {
|
impl PartialEq for I32x4 {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn eq(&self, other: &I32x4) -> bool {
|
fn eq(&self, other: &I32x4) -> bool {
|
||||||
self.packed_eq(*other).is_all_ones()
|
self.packed_eq(*other).all_true()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -670,13 +670,21 @@ impl Shr<I32x4> for I32x4 {
|
||||||
pub struct U32x2(pub uint32x2_t);
|
pub struct U32x2(pub uint32x2_t);
|
||||||
|
|
||||||
impl U32x2 {
|
impl U32x2 {
|
||||||
|
/// Returns true if both booleans in this vector are true.
|
||||||
|
///
|
||||||
|
/// The result is *undefined* if both values in this vector are not booleans. A boolean is a
|
||||||
|
/// value with all bits set or all bits clear (i.e. !0 or 0).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_all_ones(&self) -> bool {
|
pub fn all_true(&self) -> bool {
|
||||||
unsafe { aarch64::vminv_u32(self.0) == !0 }
|
unsafe { aarch64::vminv_u32(self.0) == !0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if both booleans in this vector are false.
|
||||||
|
///
|
||||||
|
/// The result is *undefined* if both values in this vector are not booleans. A boolean is a
|
||||||
|
/// value with all bits set or all bits clear (i.e. !0 or 0).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_all_zeroes(&self) -> bool {
|
pub fn all_false(&self) -> bool {
|
||||||
unsafe { aarch64::vmaxv_u32(self.0) == 0 }
|
unsafe { aarch64::vmaxv_u32(self.0) == 0 }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -699,13 +707,21 @@ impl Index<usize> for U32x2 {
|
||||||
pub struct U32x4(pub uint32x4_t);
|
pub struct U32x4(pub uint32x4_t);
|
||||||
|
|
||||||
impl U32x4 {
|
impl U32x4 {
|
||||||
|
/// Returns true if all four booleans in this vector are true.
|
||||||
|
///
|
||||||
|
/// The result is *undefined* if all four values in this vector are not booleans. A boolean is
|
||||||
|
/// a value with all bits set or all bits clear (i.e. !0 or 0).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_all_ones(&self) -> bool {
|
pub fn all_true(&self) -> bool {
|
||||||
unsafe { aarch64::vminvq_u32(self.0) == !0 }
|
unsafe { aarch64::vminvq_u32(self.0) == !0 }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if all four booleans in this vector are false.
|
||||||
|
///
|
||||||
|
/// The result is *undefined* if all four values in this vector are not booleans. A boolean is
|
||||||
|
/// a value with all bits set or all bits clear (i.e. !0 or 0).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_all_zeroes(&self) -> bool {
|
pub fn all_false(&self) -> bool {
|
||||||
unsafe { aarch64::vmaxvq_u32(self.0) == 0 }
|
unsafe { aarch64::vmaxvq_u32(self.0) == 0 }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -49,10 +49,7 @@ impl F32x2 {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn approx_eq(self, other: F32x2, epsilon: f32) -> bool {
|
pub fn approx_eq(self, other: F32x2, epsilon: f32) -> bool {
|
||||||
(self - other)
|
(self - other).abs().packed_gt(F32x2::splat(epsilon)).all_false()
|
||||||
.abs()
|
|
||||||
.packed_gt(F32x2::splat(epsilon))
|
|
||||||
.is_all_zeroes()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -143,10 +140,7 @@ impl F32x4 {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn approx_eq(self, other: F32x4, epsilon: f32) -> bool {
|
pub fn approx_eq(self, other: F32x4, epsilon: f32) -> bool {
|
||||||
(self - other)
|
(self - other).abs().packed_gt(F32x4::splat(epsilon)).all_false()
|
||||||
.abs()
|
|
||||||
.packed_gt(F32x4::splat(epsilon))
|
|
||||||
.is_all_zeroes()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -700,13 +700,21 @@ impl Shr<I32x4> for I32x4 {
|
||||||
pub struct U32x2(pub [u32; 2]);
|
pub struct U32x2(pub [u32; 2]);
|
||||||
|
|
||||||
impl U32x2 {
|
impl U32x2 {
|
||||||
|
/// Returns true if both booleans in this vector are true.
|
||||||
|
///
|
||||||
|
/// The result is *undefined* if both values in this vector are not booleans. A boolean is a
|
||||||
|
/// value with all bits set or all bits clear (i.e. !0 or 0).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_all_ones(&self) -> bool {
|
pub fn all_true(&self) -> bool {
|
||||||
self[0] == !0 && self[1] == !0
|
self[0] == !0 && self[1] == !0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if both booleans in this vector are false.
|
||||||
|
///
|
||||||
|
/// The result is *undefined* if both values in this vector are not booleans. A boolean is a
|
||||||
|
/// value with all bits set or all bits clear (i.e. !0 or 0).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_all_zeroes(&self) -> bool {
|
pub fn all_false(&self) -> bool {
|
||||||
self[0] == 0 && self[1] == 0
|
self[0] == 0 && self[1] == 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -725,13 +733,21 @@ impl Index<usize> for U32x2 {
|
||||||
pub struct U32x4(pub [u32; 4]);
|
pub struct U32x4(pub [u32; 4]);
|
||||||
|
|
||||||
impl U32x4 {
|
impl U32x4 {
|
||||||
|
/// Returns true if all four booleans in this vector are true.
|
||||||
|
///
|
||||||
|
/// The result is *undefined* if all four values in this vector are not booleans. A boolean is
|
||||||
|
/// a value with all bits set or all bits clear (i.e. !0 or 0).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_all_ones(&self) -> bool {
|
pub fn all_true(&self) -> bool {
|
||||||
self[0] == !0 && self[1] == !0 && self[2] == !0 && self[3] == !0
|
self[0] == !0 && self[1] == !0 && self[2] == !0 && self[3] == !0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if all four booleans in this vector are false.
|
||||||
|
///
|
||||||
|
/// The result is *undefined* if all four values in this vector are not booleans. A boolean is
|
||||||
|
/// a value with all bits set or all bits clear (i.e. !0 or 0).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_all_zeroes(&self) -> bool {
|
pub fn all_false(&self) -> bool {
|
||||||
self[0] == 0 && self[1] == 0 && self[2] == 0 && self[3] == 0
|
self[0] == 0 && self[1] == 0 && self[2] == 0 && self[3] == 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -172,7 +172,7 @@ impl Debug for F32x2 {
|
||||||
impl PartialEq for F32x2 {
|
impl PartialEq for F32x2 {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn eq(&self, other: &F32x2) -> bool {
|
fn eq(&self, other: &F32x2) -> bool {
|
||||||
self.packed_eq(*other).is_all_ones()
|
self.packed_eq(*other).all_true()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -406,7 +406,7 @@ impl Debug for F32x4 {
|
||||||
impl PartialEq for F32x4 {
|
impl PartialEq for F32x4 {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn eq(&self, other: &F32x4) -> bool {
|
fn eq(&self, other: &F32x4) -> bool {
|
||||||
self.packed_eq(*other).is_all_ones()
|
self.packed_eq(*other).all_true()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -563,7 +563,7 @@ impl Debug for I32x2 {
|
||||||
impl PartialEq for I32x2 {
|
impl PartialEq for I32x2 {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn eq(&self, other: &I32x2) -> bool {
|
fn eq(&self, other: &I32x2) -> bool {
|
||||||
self.packed_eq(*other).is_all_ones()
|
self.packed_eq(*other).all_true()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -742,7 +742,7 @@ impl Debug for I32x4 {
|
||||||
impl PartialEq for I32x4 {
|
impl PartialEq for I32x4 {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn eq(&self, other: &I32x4) -> bool {
|
fn eq(&self, other: &I32x4) -> bool {
|
||||||
self.packed_eq(*other).is_all_ones()
|
self.packed_eq(*other).all_true()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -752,13 +752,21 @@ impl PartialEq for I32x4 {
|
||||||
pub struct U32x2(pub u64);
|
pub struct U32x2(pub u64);
|
||||||
|
|
||||||
impl U32x2 {
|
impl U32x2 {
|
||||||
|
/// Returns true if both booleans in this vector are true.
|
||||||
|
///
|
||||||
|
/// The result is *undefined* if both values in this vector are not booleans. A boolean is a
|
||||||
|
/// value with all bits set or all bits clear (i.e. !0 or 0).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_all_ones(self) -> bool {
|
pub fn all_true(self) -> bool {
|
||||||
self.0 == !0
|
self.0 == !0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if both booleans in this vector are false.
|
||||||
|
///
|
||||||
|
/// The result is *undefined* if both values in this vector are not booleans. A boolean is a
|
||||||
|
/// value with all bits set or all bits clear (i.e. !0 or 0).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_all_zeroes(self) -> bool {
|
pub fn all_false(self) -> bool {
|
||||||
self.0 == 0
|
self.0 == 0
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -786,14 +794,22 @@ impl U32x4 {
|
||||||
|
|
||||||
// Basic operations
|
// Basic operations
|
||||||
|
|
||||||
|
/// Returns true if all four booleans in this vector are true.
|
||||||
|
///
|
||||||
|
/// The result is *undefined* if all four values in this vector are not booleans. A boolean is
|
||||||
|
/// a value with all bits set or all bits clear (i.e. !0 or 0).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_all_ones(self) -> bool {
|
pub fn all_true(self) -> bool {
|
||||||
unsafe { x86_64::_mm_test_all_ones(self.0) != 0 }
|
unsafe { x86_64::_mm_movemask_ps(x86_64::_mm_castsi128_ps(self.0)) == 0x0f }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if all four booleans in this vector are false.
|
||||||
|
///
|
||||||
|
/// The result is *undefined* if all four values in this vector are not booleans. A boolean is
|
||||||
|
/// a value with all bits set or all bits clear (i.e. !0 or 0).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn is_all_zeroes(self) -> bool {
|
pub fn all_false(self) -> bool {
|
||||||
unsafe { x86_64::_mm_test_all_zeros(self.0, self.0) != 0 }
|
unsafe { x86_64::_mm_movemask_ps(x86_64::_mm_castsi128_ps(self.0)) == 0x00 }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extraction
|
// Extraction
|
||||||
|
@ -829,7 +845,7 @@ impl Index<usize> for U32x4 {
|
||||||
impl PartialEq for U32x4 {
|
impl PartialEq for U32x4 {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn eq(&self, other: &U32x4) -> bool {
|
fn eq(&self, other: &U32x4) -> bool {
|
||||||
self.packed_eq(*other).is_all_ones()
|
self.packed_eq(*other).all_true()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue