This commit is contained in:
Patrick Walton 2019-01-12 17:15:20 -08:00
parent bbf193f00f
commit af2f729520
2 changed files with 567 additions and 337 deletions

View File

@ -18,7 +18,7 @@ mod x86 {
use std::cmp::PartialEq;
use std::fmt::{self, Debug, Formatter};
use std::mem;
use std::ops::{Add, Mul, Sub, Index, IndexMut};
use std::ops::{Add, Index, IndexMut, Mul, Sub};
// 32-bit floats
@ -36,75 +36,63 @@ mod x86 {
#[inline]
pub fn splat(x: f32) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_set1_ps(x))
}
unsafe { F32x4(x86_64::_mm_set1_ps(x)) }
}
#[inline]
pub fn min(self, other: F32x4) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_min_ps(self.0, other.0))
}
unsafe { F32x4(x86_64::_mm_min_ps(self.0, other.0)) }
}
#[inline]
pub fn max(self, other: F32x4) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_max_ps(self.0, other.0))
}
unsafe { F32x4(x86_64::_mm_max_ps(self.0, other.0)) }
}
#[inline]
pub fn packed_eq(self, other: F32x4) -> U32x4 {
unsafe {
U32x4(x86_64::_mm_castps_si128(x86_64::_mm_cmpeq_ps(self.0, other.0)))
U32x4(x86_64::_mm_castps_si128(x86_64::_mm_cmpeq_ps(
self.0, other.0,
)))
}
}
#[inline]
pub fn swap_halves(self) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0b0100_1110))
}
unsafe { F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0b0100_1110)) }
}
#[inline]
pub fn splat_low_half(self) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0b0100_0100))
}
unsafe { F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0b0100_0100)) }
}
#[inline]
pub fn splat_high_half(self) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0b1110_1110))
}
unsafe { F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0b1110_1110)) }
}
#[inline]
pub fn interleave(self, other: F32x4) -> (F32x4, F32x4) {
unsafe {
(F32x4(x86_64::_mm_unpacklo_ps(self.0, other.0)),
F32x4(x86_64::_mm_unpackhi_ps(self.0, other.0)))
(
F32x4(x86_64::_mm_unpacklo_ps(self.0, other.0)),
F32x4(x86_64::_mm_unpackhi_ps(self.0, other.0)),
)
}
}
#[inline]
pub fn to_i32x4(self) -> I32x4 {
unsafe {
I32x4(x86_64::_mm_cvtps_epi32(self.0))
}
unsafe { I32x4(x86_64::_mm_cvtps_epi32(self.0)) }
}
}
impl Default for F32x4 {
#[inline]
fn default() -> F32x4 {
unsafe {
F32x4(x86_64::_mm_setzero_ps())
}
unsafe { F32x4(x86_64::_mm_setzero_ps()) }
}
}
@ -112,18 +100,14 @@ mod x86 {
type Output = f32;
#[inline]
fn index(&self, index: usize) -> &f32 {
unsafe {
&mem::transmute::<&__m128, &[f32; 4]>(&self.0)[index]
}
unsafe { &mem::transmute::<&__m128, &[f32; 4]>(&self.0)[index] }
}
}
impl IndexMut<usize> for F32x4 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut f32 {
unsafe {
&mut mem::transmute::<&mut __m128, &mut [f32; 4]>(&mut self.0)[index]
}
unsafe { &mut mem::transmute::<&mut __m128, &mut [f32; 4]>(&mut self.0)[index] }
}
}
@ -145,9 +129,7 @@ mod x86 {
type Output = F32x4;
#[inline]
fn add(self, other: F32x4) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_add_ps(self.0, other.0))
}
unsafe { F32x4(x86_64::_mm_add_ps(self.0, other.0)) }
}
}
@ -155,9 +137,7 @@ mod x86 {
type Output = F32x4;
#[inline]
fn mul(self, other: F32x4) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_mul_ps(self.0, other.0))
}
unsafe { F32x4(x86_64::_mm_mul_ps(self.0, other.0)) }
}
}
@ -165,9 +145,7 @@ mod x86 {
type Output = F32x4;
#[inline]
fn sub(self, other: F32x4) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_sub_ps(self.0, other.0))
}
unsafe { F32x4(x86_64::_mm_sub_ps(self.0, other.0)) }
}
}
@ -187,9 +165,7 @@ mod x86 {
#[inline]
pub fn splat(x: i32) -> I32x4 {
unsafe {
I32x4(x86_64::_mm_set1_epi32(x))
}
unsafe { I32x4(x86_64::_mm_set1_epi32(x)) }
}
#[inline]
@ -199,9 +175,7 @@ mod x86 {
#[inline]
pub fn min(self, other: I32x4) -> I32x4 {
unsafe {
I32x4(x86_64::_mm_min_epi32(self.0, other.0))
}
unsafe { I32x4(x86_64::_mm_min_epi32(self.0, other.0)) }
}
}
@ -209,9 +183,7 @@ mod x86 {
type Output = i32;
#[inline]
fn index(&self, index: usize) -> &i32 {
unsafe {
&mem::transmute::<&__m128i, &[i32; 4]>(&self.0)[index]
}
unsafe { &mem::transmute::<&__m128i, &[i32; 4]>(&self.0)[index] }
}
}
@ -219,9 +191,7 @@ mod x86 {
type Output = I32x4;
#[inline]
fn sub(self, other: I32x4) -> I32x4 {
unsafe {
I32x4(x86_64::_mm_sub_epi32(self.0, other.0))
}
unsafe { I32x4(x86_64::_mm_sub_epi32(self.0, other.0)) }
}
}
@ -233,9 +203,7 @@ mod x86 {
impl U32x4 {
#[inline]
fn is_all_ones(&self) -> bool {
unsafe {
x86_64::_mm_test_all_ones(self.0) != 0
}
unsafe { x86_64::_mm_test_all_ones(self.0) != 0 }
}
}
@ -243,9 +211,7 @@ mod x86 {
type Output = u32;
#[inline]
fn index(&self, index: usize) -> &u32 {
unsafe {
&mem::transmute::<&__m128i, &[u32; 4]>(&self.0)[index]
}
unsafe { &mem::transmute::<&__m128i, &[u32; 4]>(&self.0)[index] }
}
}
@ -262,9 +228,7 @@ mod x86 {
#[inline]
pub fn shuffle(self, indices: U8x16) -> U8x16 {
unsafe {
U8x16(x86_64::_mm_shuffle_epi8(self.0, indices.0))
}
unsafe { U8x16(x86_64::_mm_shuffle_epi8(self.0, indices.0)) }
}
}
}

File diff suppressed because it is too large Load Diff