parent
ce3b40cd5c
commit
ee9187849b
|
@ -8,12 +8,20 @@
|
||||||
// option. This file may not be copied, modified, or distributed
|
// option. This file may not be copied, modified, or distributed
|
||||||
// except according to those terms.
|
// except according to those terms.
|
||||||
|
|
||||||
use std::arch::x86_64::{self, __m128, __m128i};
|
|
||||||
use std::cmp::PartialEq;
|
use std::cmp::PartialEq;
|
||||||
use std::fmt::{self, Debug, Formatter};
|
use std::fmt::{self, Debug, Formatter};
|
||||||
use std::mem;
|
use std::mem;
|
||||||
use std::ops::{Add, BitAnd, BitOr, BitXor, Index, IndexMut, Mul, Not, Shr, Sub};
|
use std::ops::{Add, BitAnd, BitOr, BitXor, Index, IndexMut, Mul, Not, Shr, Sub};
|
||||||
|
|
||||||
|
#[cfg(target_pointer_width = "32")]
|
||||||
|
use std::arch::x86::{__m128, __m128i};
|
||||||
|
#[cfg(target_pointer_width = "32")]
|
||||||
|
use std::arch::x86;
|
||||||
|
#[cfg(target_pointer_width = "64")]
|
||||||
|
use std::arch::x86_64::{__m128, __m128i};
|
||||||
|
#[cfg(target_pointer_width = "64")]
|
||||||
|
use std::arch::x86_64 as x86;
|
||||||
|
|
||||||
mod swizzle_f32x4;
|
mod swizzle_f32x4;
|
||||||
mod swizzle_i32x4;
|
mod swizzle_i32x4;
|
||||||
|
|
||||||
|
@ -107,7 +115,11 @@ impl F32x2 {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn to_f32x4(self) -> F32x4 {
|
pub fn to_f32x4(self) -> F32x4 {
|
||||||
unsafe { F32x4(x86_64::_mm_castsi128_ps(x86_64::_mm_cvtsi64_si128(self.0 as i64))) }
|
unsafe {
|
||||||
|
let mut result = F32x4::default();
|
||||||
|
*mem::transmute::<&mut __m128, &mut u64>(&mut result.0) = self.0;
|
||||||
|
result
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -207,30 +219,30 @@ impl F32x4 {
|
||||||
pub fn new(a: f32, b: f32, c: f32, d: f32) -> F32x4 {
|
pub fn new(a: f32, b: f32, c: f32, d: f32) -> F32x4 {
|
||||||
unsafe {
|
unsafe {
|
||||||
let vector = [a, b, c, d];
|
let vector = [a, b, c, d];
|
||||||
F32x4(x86_64::_mm_loadu_ps(vector.as_ptr()))
|
F32x4(x86::_mm_loadu_ps(vector.as_ptr()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn splat(x: f32) -> F32x4 {
|
pub fn splat(x: f32) -> F32x4 {
|
||||||
unsafe { F32x4(x86_64::_mm_set1_ps(x)) }
|
unsafe { F32x4(x86::_mm_set1_ps(x)) }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Basic operations
|
// Basic operations
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn approx_recip(self) -> F32x4 {
|
pub fn approx_recip(self) -> F32x4 {
|
||||||
unsafe { F32x4(x86_64::_mm_rcp_ps(self.0)) }
|
unsafe { F32x4(x86::_mm_rcp_ps(self.0)) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn min(self, other: F32x4) -> F32x4 {
|
pub fn min(self, other: F32x4) -> F32x4 {
|
||||||
unsafe { F32x4(x86_64::_mm_min_ps(self.0, other.0)) }
|
unsafe { F32x4(x86::_mm_min_ps(self.0, other.0)) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn max(self, other: F32x4) -> F32x4 {
|
pub fn max(self, other: F32x4) -> F32x4 {
|
||||||
unsafe { F32x4(x86_64::_mm_max_ps(self.0, other.0)) }
|
unsafe { F32x4(x86::_mm_max_ps(self.0, other.0)) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -241,24 +253,24 @@ impl F32x4 {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn abs(self) -> F32x4 {
|
pub fn abs(self) -> F32x4 {
|
||||||
unsafe {
|
unsafe {
|
||||||
let tmp = x86_64::_mm_srli_epi32(I32x4::splat(-1).0, 1);
|
let tmp = x86::_mm_srli_epi32(I32x4::splat(-1).0, 1);
|
||||||
F32x4(x86_64::_mm_and_ps(x86_64::_mm_castsi128_ps(tmp), self.0))
|
F32x4(x86::_mm_and_ps(x86::_mm_castsi128_ps(tmp), self.0))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn floor(self) -> F32x4 {
|
pub fn floor(self) -> F32x4 {
|
||||||
unsafe { F32x4(x86_64::_mm_floor_ps(self.0)) }
|
unsafe { F32x4(x86::_mm_floor_ps(self.0)) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn ceil(self) -> F32x4 {
|
pub fn ceil(self) -> F32x4 {
|
||||||
unsafe { F32x4(x86_64::_mm_ceil_ps(self.0)) }
|
unsafe { F32x4(x86::_mm_ceil_ps(self.0)) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn sqrt(self) -> F32x4 {
|
pub fn sqrt(self) -> F32x4 {
|
||||||
unsafe { F32x4(x86_64::_mm_sqrt_ps(self.0)) }
|
unsafe { F32x4(x86::_mm_sqrt_ps(self.0)) }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Packed comparisons
|
// Packed comparisons
|
||||||
|
@ -266,7 +278,7 @@ impl F32x4 {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn packed_eq(self, other: F32x4) -> U32x4 {
|
pub fn packed_eq(self, other: F32x4) -> U32x4 {
|
||||||
unsafe {
|
unsafe {
|
||||||
U32x4(x86_64::_mm_castps_si128(x86_64::_mm_cmpeq_ps(
|
U32x4(x86::_mm_castps_si128(x86::_mm_cmpeq_ps(
|
||||||
self.0, other.0,
|
self.0, other.0,
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
|
@ -275,7 +287,7 @@ impl F32x4 {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn packed_gt(self, other: F32x4) -> U32x4 {
|
pub fn packed_gt(self, other: F32x4) -> U32x4 {
|
||||||
unsafe {
|
unsafe {
|
||||||
U32x4(x86_64::_mm_castps_si128(x86_64::_mm_cmpgt_ps(
|
U32x4(x86::_mm_castps_si128(x86::_mm_cmpgt_ps(
|
||||||
self.0, other.0,
|
self.0, other.0,
|
||||||
)))
|
)))
|
||||||
}
|
}
|
||||||
|
@ -296,34 +308,37 @@ impl F32x4 {
|
||||||
/// Converts these packed floats to integers via rounding.
|
/// Converts these packed floats to integers via rounding.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn to_i32x4(self) -> I32x4 {
|
pub fn to_i32x4(self) -> I32x4 {
|
||||||
unsafe { I32x4(x86_64::_mm_cvtps_epi32(self.0)) }
|
unsafe { I32x4(x86::_mm_cvtps_epi32(self.0)) }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extraction
|
// Extraction
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn xy(self) -> F32x2 {
|
pub fn xy(self) -> F32x2 {
|
||||||
unsafe { F32x2(x86_64::_mm_cvtsi128_si64(x86_64::_mm_castps_si128(self.0)) as u64) }
|
unsafe {
|
||||||
|
let swizzled = self.0;
|
||||||
|
F32x2(*mem::transmute::<&__m128, &u64>(&swizzled))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn xw(self) -> F32x2 {
|
pub fn xw(self) -> F32x2 {
|
||||||
unsafe { F32x2(x86_64::_mm_cvtsi128_si64(x86_64::_mm_castps_si128(self.xwyz().0)) as u64) }
|
self.xwyz().xy()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn yx(self) -> F32x2 {
|
pub fn yx(self) -> F32x2 {
|
||||||
unsafe { F32x2(x86_64::_mm_cvtsi128_si64(x86_64::_mm_castps_si128(self.yxwz().0)) as u64) }
|
self.yxwz().xy()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn zy(self) -> F32x2 {
|
pub fn zy(self) -> F32x2 {
|
||||||
unsafe { F32x2(x86_64::_mm_cvtsi128_si64(x86_64::_mm_castps_si128(self.zyxw().0)) as u64) }
|
self.zyxw().xy()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn zw(self) -> F32x2 {
|
pub fn zw(self) -> F32x2 {
|
||||||
unsafe { F32x2(x86_64::_mm_cvtsi128_si64(x86_64::_mm_castps_si128(self.zwxy().0)) as u64) }
|
self.zwxy().xy()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Concatenations
|
// Concatenations
|
||||||
|
@ -331,43 +346,43 @@ impl F32x4 {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn concat_xy_xy(self, other: F32x4) -> F32x4 {
|
pub fn concat_xy_xy(self, other: F32x4) -> F32x4 {
|
||||||
unsafe {
|
unsafe {
|
||||||
let this = x86_64::_mm_castps_pd(self.0);
|
let this = x86::_mm_castps_pd(self.0);
|
||||||
let other = x86_64::_mm_castps_pd(other.0);
|
let other = x86::_mm_castps_pd(other.0);
|
||||||
let result = x86_64::_mm_unpacklo_pd(this, other);
|
let result = x86::_mm_unpacklo_pd(this, other);
|
||||||
F32x4(x86_64::_mm_castpd_ps(result))
|
F32x4(x86::_mm_castpd_ps(result))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn concat_xy_zw(self, other: F32x4) -> F32x4 {
|
pub fn concat_xy_zw(self, other: F32x4) -> F32x4 {
|
||||||
unsafe {
|
unsafe {
|
||||||
let this = x86_64::_mm_castps_pd(self.0);
|
let this = x86::_mm_castps_pd(self.0);
|
||||||
let other = x86_64::_mm_castps_pd(other.0);
|
let other = x86::_mm_castps_pd(other.0);
|
||||||
let result = x86_64::_mm_shuffle_pd(this, other, 0b10);
|
let result = x86::_mm_shuffle_pd(this, other, 0b10);
|
||||||
F32x4(x86_64::_mm_castpd_ps(result))
|
F32x4(x86::_mm_castpd_ps(result))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn concat_zw_zw(self, other: F32x4) -> F32x4 {
|
pub fn concat_zw_zw(self, other: F32x4) -> F32x4 {
|
||||||
unsafe {
|
unsafe {
|
||||||
let this = x86_64::_mm_castps_pd(self.0);
|
let this = x86::_mm_castps_pd(self.0);
|
||||||
let other = x86_64::_mm_castps_pd(other.0);
|
let other = x86::_mm_castps_pd(other.0);
|
||||||
let result = x86_64::_mm_unpackhi_pd(this, other);
|
let result = x86::_mm_unpackhi_pd(this, other);
|
||||||
F32x4(x86_64::_mm_castpd_ps(result))
|
F32x4(x86::_mm_castpd_ps(result))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn concat_wz_yx(self, other: F32x4) -> F32x4 {
|
pub fn concat_wz_yx(self, other: F32x4) -> F32x4 {
|
||||||
unsafe { F32x4(x86_64::_mm_shuffle_ps(self.0, other.0, 0b0001_1011)) }
|
unsafe { F32x4(x86::_mm_shuffle_ps(self.0, other.0, 0b0001_1011)) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for F32x4 {
|
impl Default for F32x4 {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn default() -> F32x4 {
|
fn default() -> F32x4 {
|
||||||
unsafe { F32x4(x86_64::_mm_setzero_ps()) }
|
unsafe { F32x4(x86::_mm_setzero_ps()) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -404,7 +419,7 @@ impl Add<F32x4> for F32x4 {
|
||||||
type Output = F32x4;
|
type Output = F32x4;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn add(self, other: F32x4) -> F32x4 {
|
fn add(self, other: F32x4) -> F32x4 {
|
||||||
unsafe { F32x4(x86_64::_mm_add_ps(self.0, other.0)) }
|
unsafe { F32x4(x86::_mm_add_ps(self.0, other.0)) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -412,7 +427,7 @@ impl Mul<F32x4> for F32x4 {
|
||||||
type Output = F32x4;
|
type Output = F32x4;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn mul(self, other: F32x4) -> F32x4 {
|
fn mul(self, other: F32x4) -> F32x4 {
|
||||||
unsafe { F32x4(x86_64::_mm_mul_ps(self.0, other.0)) }
|
unsafe { F32x4(x86::_mm_mul_ps(self.0, other.0)) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -420,7 +435,7 @@ impl Sub<F32x4> for F32x4 {
|
||||||
type Output = F32x4;
|
type Output = F32x4;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn sub(self, other: F32x4) -> F32x4 {
|
fn sub(self, other: F32x4) -> F32x4 {
|
||||||
unsafe { F32x4(x86_64::_mm_sub_ps(self.0, other.0)) }
|
unsafe { F32x4(x86::_mm_sub_ps(self.0, other.0)) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -457,7 +472,11 @@ impl I32x2 {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn to_i32x4(self) -> I32x4 {
|
pub fn to_i32x4(self) -> I32x4 {
|
||||||
unsafe { I32x4(x86_64::_mm_cvtsi64_si128(self.0 as i64)) }
|
unsafe {
|
||||||
|
let mut result = I32x4::default();
|
||||||
|
*mem::transmute::<&mut __m128i, &mut u64>(&mut result.0) = self.0;
|
||||||
|
result
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -569,40 +588,43 @@ impl I32x4 {
|
||||||
pub fn new(a: i32, b: i32, c: i32, d: i32) -> I32x4 {
|
pub fn new(a: i32, b: i32, c: i32, d: i32) -> I32x4 {
|
||||||
unsafe {
|
unsafe {
|
||||||
let vector = [a, b, c, d];
|
let vector = [a, b, c, d];
|
||||||
I32x4(x86_64::_mm_loadu_si128(vector.as_ptr() as *const __m128i))
|
I32x4(x86::_mm_loadu_si128(vector.as_ptr() as *const __m128i))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn splat(x: i32) -> I32x4 {
|
pub fn splat(x: i32) -> I32x4 {
|
||||||
unsafe { I32x4(x86_64::_mm_set1_epi32(x)) }
|
unsafe { I32x4(x86::_mm_set1_epi32(x)) }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extraction
|
// Extraction
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn xy(self) -> I32x2 {
|
pub fn xy(self) -> I32x2 {
|
||||||
unsafe { I32x2(x86_64::_mm_cvtsi128_si64(self.0) as u64) }
|
unsafe {
|
||||||
|
let swizzled = self.0;
|
||||||
|
I32x2(*mem::transmute::<&__m128i, &u64>(&swizzled))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn xw(self) -> I32x2 {
|
pub fn xw(self) -> I32x2 {
|
||||||
unsafe { I32x2(x86_64::_mm_cvtsi128_si64(self.xwyz().0) as u64) }
|
self.xwyz().xy()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn yx(self) -> I32x2 {
|
pub fn yx(self) -> I32x2 {
|
||||||
unsafe { I32x2(x86_64::_mm_cvtsi128_si64(self.yxwz().0) as u64) }
|
self.yxwz().xy()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn zy(self) -> I32x2 {
|
pub fn zy(self) -> I32x2 {
|
||||||
unsafe { I32x2(x86_64::_mm_cvtsi128_si64(self.zyxw().0) as u64) }
|
self.zyxw().xy()
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn zw(self) -> I32x2 {
|
pub fn zw(self) -> I32x2 {
|
||||||
unsafe { I32x2(x86_64::_mm_cvtsi128_si64(self.zwxy().0) as u64) }
|
self.zwxy().xy()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Concatenations
|
// Concatenations
|
||||||
|
@ -610,10 +632,10 @@ impl I32x4 {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn concat_xy_xy(self, other: I32x4) -> I32x4 {
|
pub fn concat_xy_xy(self, other: I32x4) -> I32x4 {
|
||||||
unsafe {
|
unsafe {
|
||||||
let this = x86_64::_mm_castsi128_pd(self.0);
|
let this = x86::_mm_castsi128_pd(self.0);
|
||||||
let other = x86_64::_mm_castsi128_pd(other.0);
|
let other = x86::_mm_castsi128_pd(other.0);
|
||||||
let result = x86_64::_mm_unpacklo_pd(this, other);
|
let result = x86::_mm_unpacklo_pd(this, other);
|
||||||
I32x4(x86_64::_mm_castpd_si128(result))
|
I32x4(x86::_mm_castpd_si128(result))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -622,7 +644,7 @@ impl I32x4 {
|
||||||
/// Converts these packed integers to floats.
|
/// Converts these packed integers to floats.
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn to_f32x4(self) -> F32x4 {
|
pub fn to_f32x4(self) -> F32x4 {
|
||||||
unsafe { F32x4(x86_64::_mm_cvtepi32_ps(self.0)) }
|
unsafe { F32x4(x86::_mm_cvtepi32_ps(self.0)) }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Converts these packed signed integers to unsigned integers.
|
/// Converts these packed signed integers to unsigned integers.
|
||||||
|
@ -637,21 +659,21 @@ impl I32x4 {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn min(self, other: I32x4) -> I32x4 {
|
pub fn min(self, other: I32x4) -> I32x4 {
|
||||||
unsafe { I32x4(x86_64::_mm_min_epi32(self.0, other.0)) }
|
unsafe { I32x4(x86::_mm_min_epi32(self.0, other.0)) }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Packed comparisons
|
// Packed comparisons
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn packed_eq(self, other: I32x4) -> U32x4 {
|
pub fn packed_eq(self, other: I32x4) -> U32x4 {
|
||||||
unsafe { U32x4(x86_64::_mm_cmpeq_epi32(self.0, other.0)) }
|
unsafe { U32x4(x86::_mm_cmpeq_epi32(self.0, other.0)) }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Comparisons
|
// Comparisons
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn packed_gt(self, other: I32x4) -> U32x4 {
|
pub fn packed_gt(self, other: I32x4) -> U32x4 {
|
||||||
unsafe { U32x4(x86_64::_mm_cmpgt_epi32(self.0, other.0)) }
|
unsafe { U32x4(x86::_mm_cmpgt_epi32(self.0, other.0)) }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -663,7 +685,7 @@ impl I32x4 {
|
||||||
impl Default for I32x4 {
|
impl Default for I32x4 {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn default() -> I32x4 {
|
fn default() -> I32x4 {
|
||||||
unsafe { I32x4(x86_64::_mm_setzero_si128()) }
|
unsafe { I32x4(x86::_mm_setzero_si128()) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -686,7 +708,7 @@ impl Add<I32x4> for I32x4 {
|
||||||
type Output = I32x4;
|
type Output = I32x4;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn add(self, other: I32x4) -> I32x4 {
|
fn add(self, other: I32x4) -> I32x4 {
|
||||||
unsafe { I32x4(x86_64::_mm_add_epi32(self.0, other.0)) }
|
unsafe { I32x4(x86::_mm_add_epi32(self.0, other.0)) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -694,7 +716,7 @@ impl Sub<I32x4> for I32x4 {
|
||||||
type Output = I32x4;
|
type Output = I32x4;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn sub(self, other: I32x4) -> I32x4 {
|
fn sub(self, other: I32x4) -> I32x4 {
|
||||||
unsafe { I32x4(x86_64::_mm_sub_epi32(self.0, other.0)) }
|
unsafe { I32x4(x86::_mm_sub_epi32(self.0, other.0)) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -702,7 +724,7 @@ impl Mul<I32x4> for I32x4 {
|
||||||
type Output = I32x4;
|
type Output = I32x4;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn mul(self, other: I32x4) -> I32x4 {
|
fn mul(self, other: I32x4) -> I32x4 {
|
||||||
unsafe { I32x4(x86_64::_mm_mullo_epi32(self.0, other.0)) }
|
unsafe { I32x4(x86::_mm_mullo_epi32(self.0, other.0)) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -710,7 +732,7 @@ impl BitAnd<I32x4> for I32x4 {
|
||||||
type Output = I32x4;
|
type Output = I32x4;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn bitand(self, other: I32x4) -> I32x4 {
|
fn bitand(self, other: I32x4) -> I32x4 {
|
||||||
unsafe { I32x4(x86_64::_mm_and_si128(self.0, other.0)) }
|
unsafe { I32x4(x86::_mm_and_si128(self.0, other.0)) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -718,7 +740,7 @@ impl BitOr<I32x4> for I32x4 {
|
||||||
type Output = I32x4;
|
type Output = I32x4;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn bitor(self, other: I32x4) -> I32x4 {
|
fn bitor(self, other: I32x4) -> I32x4 {
|
||||||
unsafe { I32x4(x86_64::_mm_or_si128(self.0, other.0)) }
|
unsafe { I32x4(x86::_mm_or_si128(self.0, other.0)) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -773,13 +795,13 @@ impl U32x4 {
|
||||||
pub fn new(a: u32, b: u32, c: u32, d: u32) -> U32x4 {
|
pub fn new(a: u32, b: u32, c: u32, d: u32) -> U32x4 {
|
||||||
unsafe {
|
unsafe {
|
||||||
let vector = [a, b, c, d];
|
let vector = [a, b, c, d];
|
||||||
U32x4(x86_64::_mm_loadu_si128(vector.as_ptr() as *const __m128i))
|
U32x4(x86::_mm_loadu_si128(vector.as_ptr() as *const __m128i))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn splat(x: u32) -> U32x4 {
|
pub fn splat(x: u32) -> U32x4 {
|
||||||
unsafe { U32x4(x86_64::_mm_set1_epi32(x as i32)) }
|
unsafe { U32x4(x86::_mm_set1_epi32(x as i32)) }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Conversions
|
// Conversions
|
||||||
|
@ -800,7 +822,7 @@ impl U32x4 {
|
||||||
/// a value with all bits set or all bits clear (i.e. !0 or 0).
|
/// a value with all bits set or all bits clear (i.e. !0 or 0).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn all_true(self) -> bool {
|
pub fn all_true(self) -> bool {
|
||||||
unsafe { x86_64::_mm_movemask_ps(x86_64::_mm_castsi128_ps(self.0)) == 0x0f }
|
unsafe { x86::_mm_movemask_ps(x86::_mm_castsi128_ps(self.0)) == 0x0f }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns true if all four booleans in this vector are false.
|
/// Returns true if all four booleans in this vector are false.
|
||||||
|
@ -809,21 +831,24 @@ impl U32x4 {
|
||||||
/// a value with all bits set or all bits clear (i.e. !0 or 0).
|
/// a value with all bits set or all bits clear (i.e. !0 or 0).
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn all_false(self) -> bool {
|
pub fn all_false(self) -> bool {
|
||||||
unsafe { x86_64::_mm_movemask_ps(x86_64::_mm_castsi128_ps(self.0)) == 0x00 }
|
unsafe { x86::_mm_movemask_ps(x86::_mm_castsi128_ps(self.0)) == 0x00 }
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extraction
|
// Extraction
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn xy(self) -> U32x2 {
|
pub fn xy(self) -> U32x2 {
|
||||||
unsafe { U32x2(x86_64::_mm_cvtsi128_si64(self.0) as u64) }
|
unsafe {
|
||||||
|
let swizzled = self.0;
|
||||||
|
U32x2(*mem::transmute::<&__m128i, &u64>(&swizzled))
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Packed comparisons
|
// Packed comparisons
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn packed_eq(self, other: U32x4) -> U32x4 {
|
pub fn packed_eq(self, other: U32x4) -> U32x4 {
|
||||||
unsafe { U32x4(x86_64::_mm_cmpeq_epi32(self.0, other.0)) }
|
unsafe { U32x4(x86::_mm_cmpeq_epi32(self.0, other.0)) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -861,7 +886,7 @@ impl BitXor<U32x4> for U32x4 {
|
||||||
type Output = U32x4;
|
type Output = U32x4;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn bitxor(self, other: U32x4) -> U32x4 {
|
fn bitxor(self, other: U32x4) -> U32x4 {
|
||||||
unsafe { U32x4(x86_64::_mm_xor_si128(self.0, other.0)) }
|
unsafe { U32x4(x86::_mm_xor_si128(self.0, other.0)) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -869,6 +894,6 @@ impl Shr<u32> for U32x4 {
|
||||||
type Output = U32x4;
|
type Output = U32x4;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn shr(self, amount: u32) -> U32x4 {
|
fn shr(self, amount: u32) -> U32x4 {
|
||||||
unsafe { U32x4(x86_64::_mm_srl_epi32(self.0, U32x4::new(amount, 0, 0, 0).0)) }
|
unsafe { U32x4(x86::_mm_srl_epi32(self.0, U32x4::new(amount, 0, 0, 0).0)) }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue