From f9aed5b077b6e5070f509daae15f91ba6e4ff745 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Fri, 1 Feb 2019 15:19:44 -0800 Subject: [PATCH] Add some more SIMD stuff --- simd/src/x86.rs | 81 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 75 insertions(+), 6 deletions(-) diff --git a/simd/src/x86.rs b/simd/src/x86.rs index 94c0768f..87a52acd 100644 --- a/simd/src/x86.rs +++ b/simd/src/x86.rs @@ -12,7 +12,7 @@ use std::arch::x86_64::{self, __m128, __m128i}; use std::cmp::PartialEq; use std::fmt::{self, Debug, Formatter}; use std::mem; -use std::ops::{Add, AddAssign, Index, IndexMut, Mul, Neg, Sub}; +use std::ops::{Add, AddAssign, Index, IndexMut, Mul, MulAssign, Neg, Sub, SubAssign}; // 32-bit floats @@ -20,6 +20,8 @@ use std::ops::{Add, AddAssign, Index, IndexMut, Mul, Neg, Sub}; pub struct F32x4(pub __m128); impl F32x4 { + // Constructors + #[inline] pub fn new(a: f32, b: f32, c: f32, d: f32) -> F32x4 { unsafe { @@ -33,6 +35,52 @@ impl F32x4 { unsafe { F32x4(x86_64::_mm_set1_ps(x)) } } + // Accessors + + #[inline] + pub fn x(self) -> f32 { + self[0] + } + + #[inline] + pub fn y(self) -> f32 { + self[1] + } + + #[inline] + pub fn z(self) -> f32 { + self[2] + } + + #[inline] + pub fn w(self) -> f32 { + self[3] + } + + // Mutators + + #[inline] + pub fn set_x(&mut self, x: f32) { + self[0] = x + } + + #[inline] + pub fn set_y(&mut self, y: f32) { + self[1] = y + } + + #[inline] + pub fn set_z(&mut self, z: f32) { + self[2] = z + } + + #[inline] + pub fn set_w(&mut self, w: f32) { + self[3] = w + } + + // Basic ops + #[inline] pub fn min(self, other: F32x4) -> F32x4 { unsafe { F32x4(x86_64::_mm_min_ps(self.0, other.0)) } @@ -71,7 +119,10 @@ impl F32x4 { #[inline] pub fn approx_eq(self, other: F32x4, epsilon: f32) -> bool { - (self - other).abs().packed_gt(F32x4::splat(epsilon)).is_all_zeroes() + (self - other) + .abs() + .packed_gt(F32x4::splat(epsilon)) + .is_all_zeroes() } // Converts these packed floats to integers. @@ -80,7 +131,7 @@ impl F32x4 { unsafe { I32x4(x86_64::_mm_cvtps_epi32(self.0)) } } - // Shuffles + // Swizzles #[inline] pub fn xxxx(self) -> F32x4 { @@ -1400,9 +1451,7 @@ impl F32x4 { #[inline] pub fn transpose_4x4(a: &mut F32x4, b: &mut F32x4, c: &mut F32x4, d: &mut F32x4) { - unsafe { - x86_64::_MM_TRANSPOSE4_PS(&mut a.0, &mut b.0, &mut c.0, &mut d.0) - } + unsafe { x86_64::_MM_TRANSPOSE4_PS(&mut a.0, &mut b.0, &mut c.0, &mut d.0) } } // FIXME(pcwalton): Move to `Point4DF32`! @@ -1471,6 +1520,13 @@ impl Mul for F32x4 { } } +impl MulAssign for F32x4 { + #[inline] + fn mul_assign(&mut self, other: F32x4) { + unsafe { self.0 = x86_64::_mm_mul_ps(self.0, other.0) } + } +} + impl Sub for F32x4 { type Output = F32x4; #[inline] @@ -1479,6 +1535,13 @@ impl Sub for F32x4 { } } +impl SubAssign for F32x4 { + #[inline] + fn sub_assign(&mut self, other: F32x4) { + unsafe { self.0 = x86_64::_mm_sub_ps(self.0, other.0) } + } +} + impl Neg for F32x4 { type Output = F32x4; #[inline] @@ -1493,6 +1556,8 @@ impl Neg for F32x4 { pub struct I32x4(pub __m128i); impl I32x4 { + // Constructors + #[inline] pub fn new(a: i32, b: i32, c: i32, d: i32) -> I32x4 { unsafe { @@ -1506,11 +1571,15 @@ impl I32x4 { unsafe { I32x4(x86_64::_mm_set1_epi32(x)) } } + // Conversions + #[inline] pub fn as_u8x16(self) -> U8x16 { U8x16(self.0) } + // Basic operations + #[inline] pub fn min(self, other: I32x4) -> I32x4 { unsafe { I32x4(x86_64::_mm_min_epi32(self.0, other.0)) }