Split out the swizzles into separate modules

This commit is contained in:
Patrick Walton 2019-03-28 21:25:33 -07:00
parent ae192ffee7
commit 79b26bb9bd
11 changed files with 8501 additions and 8424 deletions

File diff suppressed because it is too large Load Diff

417
simd/src/arm/mod.rs Normal file
View File

@ -0,0 +1,417 @@
// pathfinder/simd/src/arm.rs
//
// Copyright © 2019 The Pathfinder Project Developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::arch::aarch64::{self, float32x4_t, int32x4_t, uint32x4_t, uint64x2_t, uint8x16_t};
use std::arch::aarch64::{uint8x8x2_t, uint8x8_t};
use std::f32;
use std::fmt::{self, Debug, Formatter};
use std::mem;
use std::ops::{Add, Index, IndexMut, Mul, Sub};
mod swizzle_f32x4;
mod swizzle_i32x4;
// 32-bit floats
#[derive(Clone, Copy)]
pub struct F32x4(pub float32x4_t);
impl F32x4 {
#[inline]
pub fn new(a: f32, b: f32, c: f32, d: f32) -> F32x4 {
unsafe { F32x4(mem::transmute([a, b, c, d])) }
}
#[inline]
pub fn splat(x: f32) -> F32x4 {
F32x4::new(x, x, x, x)
}
// Basic operations
#[inline]
pub fn approx_recip(self) -> F32x4 {
unsafe { F32x4(vrecpe_v4f32(self.0)) }
}
#[inline]
pub fn min(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_fmin(self.0, other.0)) }
}
#[inline]
pub fn max(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_fmax(self.0, other.0)) }
}
#[inline]
pub fn clamp(self, min: F32x4, max: F32x4) -> F32x4 {
self.max(min).min(max)
}
#[inline]
pub fn abs(self) -> F32x4 {
unsafe { F32x4(fabs_v4f32(self.0)) }
}
#[inline]
pub fn floor(self) -> F32x4 {
unsafe { F32x4(floor_v4f32(self.0)) }
}
#[inline]
pub fn ceil(self) -> F32x4 {
unsafe { F32x4(ceil_v4f32(self.0)) }
}
// Packed comparisons
#[inline]
pub fn packed_eq(self, other: F32x4) -> U32x4 {
unsafe { U32x4(simd_eq(self.0, other.0)) }
}
#[inline]
pub fn packed_gt(self, other: F32x4) -> U32x4 {
unsafe { U32x4(simd_gt(self.0, other.0)) }
}
#[inline]
pub fn packed_le(self, other: F32x4) -> U32x4 {
unsafe { U32x4(simd_le(self.0, other.0)) }
}
#[inline]
pub fn packed_lt(self, other: F32x4) -> U32x4 {
unsafe { U32x4(simd_lt(self.0, other.0)) }
}
// Converts these packed floats to integers.
#[inline]
pub fn to_i32x4(self) -> I32x4 {
unsafe { I32x4(simd_cast(self.0)) }
}
// Concatenations
#[inline]
pub fn concat_xy_xy(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 4, 5])) }
}
#[inline]
pub fn concat_xy_zw(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 6, 7])) }
}
#[inline]
pub fn concat_zw_zw(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_shuffle4(self.0, other.0, [2, 3, 6, 7])) }
}
#[inline]
pub fn concat_wz_yx(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_shuffle4(self.0, other.0, [3, 2, 5, 4])) }
}
#[inline]
pub fn cross(&self, other: F32x4) -> F32x4 {
unimplemented!()
}
}
impl Default for F32x4 {
#[inline]
fn default() -> F32x4 {
F32x4::new(0.0, 0.0, 0.0, 0.0)
}
}
impl Index<usize> for F32x4 {
type Output = f32;
#[inline]
fn index(&self, index: usize) -> &f32 {
unsafe {
let ptr = &self.0 as *const float32x4_t as *const f32;
mem::transmute::<*const f32, &f32>(ptr.offset(index as isize))
}
}
}
impl IndexMut<usize> for F32x4 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut f32 {
unsafe {
let ptr = &mut self.0 as *mut float32x4_t as *mut f32;
mem::transmute::<*mut f32, &mut f32>(ptr.offset(index as isize))
}
}
}
impl Debug for F32x4 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
}
}
impl PartialEq for F32x4 {
#[inline]
fn eq(&self, other: &F32x4) -> bool {
self.packed_eq(*other).is_all_ones()
}
}
impl Add<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn add(self, other: F32x4) -> F32x4 {
unsafe {
F32x4(simd_add(self.0, other.0))
}
}
}
impl Mul<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn mul(self, other: F32x4) -> F32x4 {
unsafe {
F32x4(simd_mul(self.0, other.0))
}
}
}
impl Sub<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn sub(self, other: F32x4) -> F32x4 {
unsafe {
F32x4(simd_sub(self.0, other.0))
}
}
}
// 32-bit signed integers
#[derive(Clone, Copy, Debug)]
pub struct I32x4(pub int32x4_t);
impl I32x4 {
#[inline]
pub fn new(a: i32, b: i32, c: i32, d: i32) -> I32x4 {
unsafe { I32x4(mem::transmute([a, b, c, d])) }
}
#[inline]
pub fn splat(x: i32) -> I32x4 {
I32x4::new(x, x, x, x)
}
#[inline]
pub fn as_u8x16(self) -> U8x16 {
unsafe { U8x16(*mem::transmute::<&int32x4_t, &uint8x16_t>(&self.0)) }
}
#[inline]
pub fn min(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_fmin(self.0, other.0)) }
}
// Packed comparisons
#[inline]
pub fn packed_eq(self, other: I32x4) -> U32x4 {
unsafe { U32x4(simd_eq(self.0, other.0)) }
}
#[inline]
pub fn packed_le(self, other: I32x4) -> U32x4 {
unsafe { U32x4(simd_le(self.0, other.0)) }
}
// Concatenations
#[inline]
pub fn concat_xy_xy(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_shuffle4(self.0, other.0, [0, 1, 4, 5])) }
}
// Conversions
/// Converts these packed integers to floats.
#[inline]
pub fn to_f32x4(self) -> F32x4 {
unsafe { F32x4(simd_cast(self.0)) }
}
}
impl Default for I32x4 {
#[inline]
fn default() -> I32x4 {
I32x4::new(0, 0, 0, 0)
}
}
impl Index<usize> for I32x4 {
type Output = i32;
#[inline]
fn index(&self, index: usize) -> &i32 {
unsafe {
let ptr = &self.0 as *const int32x4_t as *const i32;
mem::transmute::<*const i32, &i32>(ptr.offset(index as isize))
}
}
}
impl IndexMut<usize> for I32x4 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut i32 {
unsafe {
let ptr = &mut self.0 as *mut int32x4_t as *mut i32;
mem::transmute::<*mut i32, &mut i32>(ptr.offset(index as isize))
}
}
}
impl Add<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn add(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_add(self.0, other.0)) }
}
}
impl Sub<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn sub(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_sub(self.0, other.0)) }
}
}
impl Mul<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn mul(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_mul(self.0, other.0)) }
}
}
impl PartialEq for I32x4 {
#[inline]
fn eq(&self, other: &I32x4) -> bool {
self.packed_eq(*other).is_all_ones()
}
}
// 32-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U32x4(pub uint32x4_t);
impl U32x4 {
#[inline]
pub fn is_all_ones(&self) -> bool {
unsafe { aarch64::vminvq_u32(self.0) == !0 }
}
#[inline]
pub fn is_all_zeroes(&self) -> bool {
unsafe { aarch64::vmaxvq_u32(self.0) == 0 }
}
}
impl Index<usize> for U32x4 {
type Output = u32;
#[inline]
fn index(&self, index: usize) -> &u32 {
unsafe {
let ptr = &self.0 as *const uint32x4_t as *const u32;
mem::transmute::<*const u32, &u32>(ptr.offset(index as isize))
}
}
}
// 8-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U8x16(pub uint8x16_t);
impl U8x16 {
#[inline]
pub fn as_i32x4(self) -> I32x4 {
unsafe {
I32x4(*mem::transmute::<&uint8x16_t, &int32x4_t>(&self.0))
}
}
#[inline]
pub fn shuffle(self, indices: U8x16) -> U8x16 {
unsafe {
let table = mem::transmute::<uint8x16_t, uint8x8x2_t>(self.0);
let low = aarch64::vtbl2_u8(table, indices.extract_low());
let high = aarch64::vtbl2_u8(table, indices.extract_high());
U8x16(aarch64::vcombine_u8(low, high))
}
}
#[inline]
fn extract_low(self) -> uint8x8_t {
unsafe {
let low = simd_extract(mem::transmute::<uint8x16_t, uint64x2_t>(self.0), 0);
mem::transmute::<u64, uint8x8_t>(low)
}
}
#[inline]
fn extract_high(self) -> uint8x8_t {
unsafe {
let high = simd_extract(mem::transmute::<uint8x16_t, uint64x2_t>(self.0), 1);
mem::transmute::<u64, uint8x8_t>(high)
}
}
}
// Intrinsics
extern "platform-intrinsic" {
fn simd_add<T>(x: T, y: T) -> T;
fn simd_mul<T>(x: T, y: T) -> T;
fn simd_sub<T>(x: T, y: T) -> T;
fn simd_fmin<T>(x: T, y: T) -> T;
fn simd_fmax<T>(x: T, y: T) -> T;
fn simd_eq<T, U>(x: T, y: T) -> U;
fn simd_gt<T, U>(x: T, y: T) -> U;
fn simd_le<T, U>(x: T, y: T) -> U;
fn simd_lt<T, U>(x: T, y: T) -> U;
fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U;
fn simd_cast<T, U>(x: T) -> U;
fn simd_insert<T, U>(x: T, index: u32, value: U) -> T;
fn simd_extract<T, U>(x: T, index: u32) -> U;
}
extern "C" {
#[link_name = "llvm.fabs.v4f32"]
fn fabs_v4f32(a: float32x4_t) -> float32x4_t;
#[link_name = "llvm.floor.v4f32"]
fn floor_v4f32(a: float32x4_t) -> float32x4_t;
#[link_name = "llvm.ceil.v4f32"]
fn ceil_v4f32(a: float32x4_t) -> float32x4_t;
#[link_name = "llvm.aarch64.neon.frecpe.v4f32"]
fn vrecpe_v4f32(a: float32x4_t) -> float32x4_t;
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

360
simd/src/scalar/mod.rs Normal file
View File

@ -0,0 +1,360 @@
// pathfinder/simd/src/scalar.rs
//
// Copyright © 2019 The Pathfinder Project Developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::f32;
use std::fmt::{self, Debug, Formatter};
use std::mem;
use std::ops::{Add, Index, IndexMut, Mul, Sub};
mod swizzle_f32x4;
mod swizzle_i32x4;
// 32-bit floats
#[derive(Clone, Copy, Default, PartialEq)]
pub struct F32x4(pub [f32; 4]);
impl F32x4 {
#[inline]
pub fn new(a: f32, b: f32, c: f32, d: f32) -> F32x4 {
F32x4([a, b, c, d])
}
#[inline]
pub fn splat(x: f32) -> F32x4 {
F32x4([x; 4])
}
// Basic operations
#[inline]
pub fn approx_recip(self) -> F32x4 {
F32x4([1.0 / self[0], 1.0 / self[1], 1.0 / self[2], 1.0 / self[3]])
}
#[inline]
pub fn min(self, other: F32x4) -> F32x4 {
F32x4([
self[0].min(other[0]),
self[1].min(other[1]),
self[2].min(other[2]),
self[3].min(other[3]),
])
}
#[inline]
pub fn max(self, other: F32x4) -> F32x4 {
F32x4([
self[0].max(other[0]),
self[1].max(other[1]),
self[2].max(other[2]),
self[3].max(other[3]),
])
}
#[inline]
pub fn clamp(self, min: F32x4, max: F32x4) -> F32x4 {
self.max(min).min(max)
}
#[inline]
pub fn abs(self) -> F32x4 {
F32x4([self[0].abs(), self[1].abs(), self[2].abs(), self[3].abs()])
}
#[inline]
pub fn floor(self) -> F32x4 {
F32x4([self[0].floor(), self[1].floor(), self[2].floor(), self[3].floor()])
}
#[inline]
pub fn ceil(self) -> F32x4 {
F32x4([self[0].ceil(), self[1].ceil(), self[2].ceil(), self[3].ceil()])
}
// Packed comparisons
#[inline]
pub fn packed_eq(self, other: F32x4) -> U32x4 {
U32x4([
if self[0] == other[0] { !0 } else { 0 },
if self[1] == other[1] { !0 } else { 0 },
if self[2] == other[2] { !0 } else { 0 },
if self[3] == other[3] { !0 } else { 0 },
])
}
#[inline]
pub fn packed_gt(self, other: F32x4) -> U32x4 {
U32x4([
if self[0] > other[0] { !0 } else { 0 },
if self[1] > other[1] { !0 } else { 0 },
if self[2] > other[2] { !0 } else { 0 },
if self[3] > other[3] { !0 } else { 0 },
])
}
#[inline]
pub fn packed_le(self, other: F32x4) -> U32x4 {
U32x4([
if self[0] <= other[0] { !0 } else { 0 },
if self[1] <= other[1] { !0 } else { 0 },
if self[2] <= other[2] { !0 } else { 0 },
if self[3] <= other[3] { !0 } else { 0 },
])
}
#[inline]
pub fn packed_lt(self, other: F32x4) -> U32x4 {
U32x4([
if self[0] < other[0] { !0 } else { 0 },
if self[1] < other[1] { !0 } else { 0 },
if self[2] < other[2] { !0 } else { 0 },
if self[3] < other[3] { !0 } else { 0 },
])
}
// Converts these packed floats to integers.
#[inline]
pub fn to_i32x4(self) -> I32x4 {
I32x4([self[0] as i32, self[1] as i32, self[2] as i32, self[3] as i32])
}
// Concatenations
#[inline]
pub fn concat_xy_xy(self, other: F32x4) -> F32x4 {
F32x4([self[0], self[1], other[0], other[1]])
}
#[inline]
pub fn concat_xy_zw(self, other: F32x4) -> F32x4 {
F32x4([self[0], self[1], other[2], other[3]])
}
#[inline]
pub fn concat_zw_zw(self, other: F32x4) -> F32x4 {
F32x4([self[2], self[3], other[2], other[3]])
}
#[inline]
pub fn concat_wz_yx(self, other: F32x4) -> F32x4 {
F32x4([self[3], self[2], other[1], other[0]])
}
#[inline]
pub fn cross(&self, other: F32x4) -> F32x4 {
unimplemented!()
}
}
impl Index<usize> for F32x4 {
type Output = f32;
#[inline]
fn index(&self, index: usize) -> &f32 {
&self.0[index]
}
}
impl IndexMut<usize> for F32x4 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut f32 {
&mut self.0[index]
}
}
impl Debug for F32x4 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
}
}
impl Add<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn add(self, other: F32x4) -> F32x4 {
F32x4([self[0] + other[0], self[1] + other[1], self[2] + other[2], self[3] + other[3]])
}
}
impl Mul<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn mul(self, other: F32x4) -> F32x4 {
F32x4([self[0] * other[0], self[1] * other[1], self[2] * other[2], self[3] * other[3]])
}
}
impl Sub<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn sub(self, other: F32x4) -> F32x4 {
F32x4([self[0] - other[0], self[1] - other[1], self[2] - other[2], self[3] - other[3]])
}
}
// 32-bit signed integers
#[derive(Clone, Copy, Default, Debug, PartialEq)]
pub struct I32x4([i32; 4]);
impl I32x4 {
#[inline]
pub fn new(a: i32, b: i32, c: i32, d: i32) -> I32x4 {
I32x4([a, b, c, d])
}
#[inline]
pub fn splat(x: i32) -> I32x4 {
I32x4([x; 4])
}
#[inline]
pub fn as_u8x16(self) -> U8x16 {
unsafe {
U8x16(*mem::transmute::<&[i32; 4], &[u8; 16]>(&self.0))
}
}
#[inline]
pub fn min(self, other: I32x4) -> I32x4 {
I32x4([
self[0].min(other[0]),
self[1].min(other[1]),
self[2].min(other[2]),
self[3].min(other[3]),
])
}
// Packed comparisons
#[inline]
pub fn packed_eq(self, other: I32x4) -> U32x4 {
U32x4([
if self[0] == other[0] { !0 } else { 0 },
if self[1] == other[1] { !0 } else { 0 },
if self[2] == other[2] { !0 } else { 0 },
if self[3] == other[3] { !0 } else { 0 },
])
}
#[inline]
pub fn packed_le(self, other: I32x4) -> U32x4 {
U32x4([
if self[0] <= other[0] { !0 } else { 0 },
if self[1] <= other[1] { !0 } else { 0 },
if self[2] <= other[2] { !0 } else { 0 },
if self[3] <= other[3] { !0 } else { 0 },
])
}
// Concatenations
#[inline]
pub fn concat_xy_xy(self, other: I32x4) -> I32x4 {
I32x4([self[0], self[1], other[0], other[1]])
}
// Conversions
/// Converts these packed integers to floats.
#[inline]
pub fn to_f32x4(self) -> F32x4 {
F32x4([self[0] as f32, self[1] as f32, self[2] as f32, self[3] as f32])
}
}
impl Index<usize> for I32x4 {
type Output = i32;
#[inline]
fn index(&self, index: usize) -> &i32 {
&self.0[index]
}
}
impl IndexMut<usize> for I32x4 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut i32 {
&mut self.0[index]
}
}
impl Add<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn add(self, other: I32x4) -> I32x4 {
I32x4([self[0] + other[0], self[1] + other[1], self[2] + other[2], self[3] + other[3]])
}
}
impl Sub<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn sub(self, other: I32x4) -> I32x4 {
I32x4([self[0] - other[0], self[1] - other[1], self[2] - other[2], self[3] - other[3]])
}
}
impl Mul<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn mul(self, other: I32x4) -> I32x4 {
I32x4([self[0] * other[0], self[1] * other[1], self[2] * other[2], self[3] * other[3]])
}
}
// 32-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U32x4(pub [u32; 4]);
impl U32x4 {
#[inline]
pub fn is_all_ones(&self) -> bool {
self[0] == !0 && self[1] == !0 && self[2] == !0 && self[3] == !0
}
#[inline]
pub fn is_all_zeroes(&self) -> bool {
self[0] == 0 && self[1] == 0 && self[2] == 0 && self[3] == 0
}
}
impl Index<usize> for U32x4 {
type Output = u32;
#[inline]
fn index(&self, index: usize) -> &u32 {
&self.0[index]
}
}
// 8-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U8x16([u8; 16]);
impl U8x16 {
#[inline]
pub fn as_i32x4(self) -> I32x4 {
unsafe {
I32x4(*mem::transmute::<&[u8; 16], &[i32; 4]>(&self.0))
}
}
#[inline]
pub fn shuffle(self, indices: U8x16) -> U8x16 {
let mut result = [0; 16];
for index in 0..16 {
result[index] = self.0[(indices.0[index] & 0x0f) as usize]
}
U8x16(result)
}
}

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

454
simd/src/x86/mod.rs Normal file
View File

@ -0,0 +1,454 @@
// pathfinder/simd/src/x86.rs
//
// Copyright © 2019 The Pathfinder Project Developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::arch::x86_64::{self, __m128, __m128i};
use std::cmp::PartialEq;
use std::fmt::{self, Debug, Formatter};
use std::mem;
use std::ops::{Add, BitXor, Index, IndexMut, Mul, Not, Sub};
mod swizzle_f32x4;
mod swizzle_i32x4;
// 32-bit floats
#[derive(Clone, Copy)]
pub struct F32x4(pub __m128);
impl F32x4 {
// Constructors
#[inline]
pub fn new(a: f32, b: f32, c: f32, d: f32) -> F32x4 {
unsafe {
let vector = [a, b, c, d];
F32x4(x86_64::_mm_loadu_ps(vector.as_ptr()))
}
}
#[inline]
pub fn splat(x: f32) -> F32x4 {
unsafe { F32x4(x86_64::_mm_set1_ps(x)) }
}
// Basic operations
#[inline]
pub fn approx_recip(self) -> F32x4 {
unsafe { F32x4(x86_64::_mm_rcp_ps(self.0)) }
}
#[inline]
pub fn min(self, other: F32x4) -> F32x4 {
unsafe { F32x4(x86_64::_mm_min_ps(self.0, other.0)) }
}
#[inline]
pub fn max(self, other: F32x4) -> F32x4 {
unsafe { F32x4(x86_64::_mm_max_ps(self.0, other.0)) }
}
#[inline]
pub fn clamp(self, min: F32x4, max: F32x4) -> F32x4 {
self.max(min).min(max)
}
#[inline]
pub fn abs(self) -> F32x4 {
unsafe {
let tmp = x86_64::_mm_srli_epi32(I32x4::splat(-1).0, 1);
F32x4(x86_64::_mm_and_ps(x86_64::_mm_castsi128_ps(tmp), self.0))
}
}
#[inline]
pub fn floor(self) -> F32x4 {
unsafe { F32x4(x86_64::_mm_floor_ps(self.0)) }
}
#[inline]
pub fn ceil(self) -> F32x4 {
unsafe { F32x4(x86_64::_mm_ceil_ps(self.0)) }
}
// Packed comparisons
#[inline]
pub fn packed_eq(self, other: F32x4) -> U32x4 {
unsafe {
U32x4(x86_64::_mm_castps_si128(x86_64::_mm_cmpeq_ps(
self.0, other.0,
)))
}
}
#[inline]
pub fn packed_gt(self, other: F32x4) -> U32x4 {
unsafe {
U32x4(x86_64::_mm_castps_si128(x86_64::_mm_cmpgt_ps(
self.0, other.0,
)))
}
}
#[inline]
pub fn packed_lt(self, other: F32x4) -> U32x4 {
other.packed_gt(self)
}
#[inline]
pub fn packed_le(self, other: F32x4) -> U32x4 {
!self.packed_gt(other)
}
// Conversions
/// Converts these packed floats to integers.
#[inline]
pub fn to_i32x4(self) -> I32x4 {
unsafe { I32x4(x86_64::_mm_cvtps_epi32(self.0)) }
}
// Concatenations
#[inline]
pub fn concat_xy_xy(self, other: F32x4) -> F32x4 {
unsafe {
let this = x86_64::_mm_castps_pd(self.0);
let other = x86_64::_mm_castps_pd(other.0);
let result = x86_64::_mm_unpacklo_pd(this, other);
F32x4(x86_64::_mm_castpd_ps(result))
}
}
#[inline]
pub fn concat_xy_zw(self, other: F32x4) -> F32x4 {
unsafe {
let this = x86_64::_mm_castps_pd(self.0);
let other = x86_64::_mm_castps_pd(other.0);
let result = x86_64::_mm_shuffle_pd(this, other, 0b10);
F32x4(x86_64::_mm_castpd_ps(result))
}
}
#[inline]
pub fn concat_zw_zw(self, other: F32x4) -> F32x4 {
unsafe {
let this = x86_64::_mm_castps_pd(self.0);
let other = x86_64::_mm_castps_pd(other.0);
let result = x86_64::_mm_unpackhi_pd(this, other);
F32x4(x86_64::_mm_castpd_ps(result))
}
}
#[inline]
pub fn concat_wz_yx(self, other: F32x4) -> F32x4 {
unsafe { F32x4(x86_64::_mm_shuffle_ps(self.0, other.0, 0b0001_1011)) }
}
// FIXME(pcwalton): Move to `Point3DF32`!
#[inline]
pub fn cross(&self, other: F32x4) -> F32x4 {
self.yzxw() * other.zxyw() - self.zxyw() * other.yzxw()
}
}
impl Default for F32x4 {
#[inline]
fn default() -> F32x4 {
unsafe { F32x4(x86_64::_mm_setzero_ps()) }
}
}
impl Index<usize> for F32x4 {
type Output = f32;
#[inline]
fn index(&self, index: usize) -> &f32 {
unsafe { &mem::transmute::<&__m128, &[f32; 4]>(&self.0)[index] }
}
}
impl IndexMut<usize> for F32x4 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut f32 {
unsafe { &mut mem::transmute::<&mut __m128, &mut [f32; 4]>(&mut self.0)[index] }
}
}
impl Debug for F32x4 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
}
}
impl PartialEq for F32x4 {
#[inline]
fn eq(&self, other: &F32x4) -> bool {
self.packed_eq(*other).is_all_ones()
}
}
impl Add<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn add(self, other: F32x4) -> F32x4 {
unsafe { F32x4(x86_64::_mm_add_ps(self.0, other.0)) }
}
}
impl Mul<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn mul(self, other: F32x4) -> F32x4 {
unsafe { F32x4(x86_64::_mm_mul_ps(self.0, other.0)) }
}
}
impl Sub<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn sub(self, other: F32x4) -> F32x4 {
unsafe { F32x4(x86_64::_mm_sub_ps(self.0, other.0)) }
}
}
// 32-bit signed integers
#[derive(Clone, Copy)]
pub struct I32x4(pub __m128i);
impl I32x4 {
// Constructors
#[inline]
pub fn new(a: i32, b: i32, c: i32, d: i32) -> I32x4 {
unsafe {
let vector = [a, b, c, d];
I32x4(x86_64::_mm_loadu_si128(vector.as_ptr() as *const __m128i))
}
}
#[inline]
pub fn splat(x: i32) -> I32x4 {
unsafe { I32x4(x86_64::_mm_set1_epi32(x)) }
}
// Concatenations
#[inline]
pub fn concat_xy_xy(self, other: I32x4) -> I32x4 {
unsafe {
let this = x86_64::_mm_castsi128_pd(self.0);
let other = x86_64::_mm_castsi128_pd(other.0);
let result = x86_64::_mm_unpacklo_pd(this, other);
I32x4(x86_64::_mm_castpd_si128(result))
}
}
// Conversions
#[inline]
pub fn as_u8x16(self) -> U8x16 {
U8x16(self.0)
}
/// Converts these packed integers to floats.
#[inline]
pub fn to_f32x4(self) -> F32x4 {
unsafe { F32x4(x86_64::_mm_cvtepi32_ps(self.0)) }
}
// Basic operations
#[inline]
pub fn min(self, other: I32x4) -> I32x4 {
unsafe { I32x4(x86_64::_mm_min_epi32(self.0, other.0)) }
}
// Packed comparisons
#[inline]
pub fn packed_eq(self, other: I32x4) -> U32x4 {
unsafe { U32x4(x86_64::_mm_cmpeq_epi32(self.0, other.0)) }
}
// Comparisons
#[inline]
pub fn packed_gt(self, other: I32x4) -> U32x4 {
unsafe {
U32x4(x86_64::_mm_cmpgt_epi32(self.0, other.0))
}
}
#[inline]
pub fn packed_le(self, other: I32x4) -> U32x4 {
!self.packed_gt(other)
}
}
impl Default for I32x4 {
#[inline]
fn default() -> I32x4 {
unsafe { I32x4(x86_64::_mm_setzero_si128()) }
}
}
impl Index<usize> for I32x4 {
type Output = i32;
#[inline]
fn index(&self, index: usize) -> &i32 {
unsafe { &mem::transmute::<&__m128i, &[i32; 4]>(&self.0)[index] }
}
}
impl IndexMut<usize> for I32x4 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut i32 {
unsafe { &mut mem::transmute::<&mut __m128i, &mut [i32; 4]>(&mut self.0)[index] }
}
}
impl Add<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn add(self, other: I32x4) -> I32x4 {
unsafe { I32x4(x86_64::_mm_add_epi32(self.0, other.0)) }
}
}
impl Sub<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn sub(self, other: I32x4) -> I32x4 {
unsafe { I32x4(x86_64::_mm_sub_epi32(self.0, other.0)) }
}
}
impl Mul<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn mul(self, other: I32x4) -> I32x4 {
unsafe { I32x4(x86_64::_mm_mullo_epi32(self.0, other.0)) }
}
}
impl Debug for I32x4 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
}
}
impl PartialEq for I32x4 {
#[inline]
fn eq(&self, other: &I32x4) -> bool {
self.packed_eq(*other).is_all_ones()
}
}
// 32-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U32x4(pub __m128i);
impl U32x4 {
// Constructors
#[inline]
pub fn new(a: u32, b: u32, c: u32, d: u32) -> U32x4 {
unsafe {
let vector = [a, b, c, d];
U32x4(x86_64::_mm_loadu_si128(vector.as_ptr() as *const __m128i))
}
}
#[inline]
pub fn splat(x: u32) -> U32x4 {
unsafe { U32x4(x86_64::_mm_set1_epi32(x as i32)) }
}
// Basic operations
#[inline]
pub fn is_all_ones(self) -> bool {
unsafe { x86_64::_mm_test_all_ones(self.0) != 0 }
}
#[inline]
pub fn is_all_zeroes(self) -> bool {
unsafe { x86_64::_mm_test_all_zeros(self.0, self.0) != 0 }
}
// Packed comparisons
#[inline]
pub fn packed_eq(self, other: U32x4) -> U32x4 {
unsafe { U32x4(x86_64::_mm_cmpeq_epi32(self.0, other.0)) }
}
}
impl Debug for U32x4 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
}
}
impl Index<usize> for U32x4 {
type Output = u32;
#[inline]
fn index(&self, index: usize) -> &u32 {
unsafe { &mem::transmute::<&__m128i, &[u32; 4]>(&self.0)[index] }
}
}
impl PartialEq for U32x4 {
#[inline]
fn eq(&self, other: &U32x4) -> bool {
self.packed_eq(*other).is_all_ones()
}
}
impl Not for U32x4 {
type Output = U32x4;
#[inline]
fn not(self) -> U32x4 {
self ^ U32x4::splat(!0)
}
}
impl BitXor<U32x4> for U32x4 {
type Output = U32x4;
#[inline]
fn bitxor(self, other: U32x4) -> U32x4 {
unsafe {
U32x4(x86_64::_mm_xor_si128(self.0, other.0))
}
}
}
// 8-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U8x16(pub __m128i);
impl U8x16 {
#[inline]
pub fn as_i32x4(self) -> I32x4 {
I32x4(self.0)
}
#[inline]
pub fn shuffle(self, indices: U8x16) -> U8x16 {
unsafe { U8x16(x86_64::_mm_shuffle_epi8(self.0, indices.0)) }
}
}

View File

@ -1,4 +1,4 @@
// pathfinder/simd/src/x86.rs // pathfinder/simd/src/x86/swizzle_f32x4.rs
// //
// Copyright © 2019 The Pathfinder Project Developers. // Copyright © 2019 The Pathfinder Project Developers.
// //
@ -8,113 +8,10 @@
// option. This file may not be copied, modified, or distributed // option. This file may not be copied, modified, or distributed
// except according to those terms. // except according to those terms.
use std::arch::x86_64::{self, __m128, __m128i}; use crate::x86::F32x4;
use std::cmp::PartialEq; use std::arch::x86_64;
use std::fmt::{self, Debug, Formatter};
use std::mem;
use std::ops::{Add, BitXor, Index, IndexMut, Mul, Not, Sub};
// 32-bit floats
#[derive(Clone, Copy)]
pub struct F32x4(pub __m128);
impl F32x4 { impl F32x4 {
// Constructors
#[inline]
pub fn new(a: f32, b: f32, c: f32, d: f32) -> F32x4 {
unsafe {
let vector = [a, b, c, d];
F32x4(x86_64::_mm_loadu_ps(vector.as_ptr()))
}
}
#[inline]
pub fn splat(x: f32) -> F32x4 {
unsafe { F32x4(x86_64::_mm_set1_ps(x)) }
}
// Basic operations
#[inline]
pub fn approx_recip(self) -> F32x4 {
unsafe { F32x4(x86_64::_mm_rcp_ps(self.0)) }
}
#[inline]
pub fn min(self, other: F32x4) -> F32x4 {
unsafe { F32x4(x86_64::_mm_min_ps(self.0, other.0)) }
}
#[inline]
pub fn max(self, other: F32x4) -> F32x4 {
unsafe { F32x4(x86_64::_mm_max_ps(self.0, other.0)) }
}
#[inline]
pub fn clamp(self, min: F32x4, max: F32x4) -> F32x4 {
self.max(min).min(max)
}
#[inline]
pub fn abs(self) -> F32x4 {
unsafe {
let tmp = x86_64::_mm_srli_epi32(I32x4::splat(-1).0, 1);
F32x4(x86_64::_mm_and_ps(x86_64::_mm_castsi128_ps(tmp), self.0))
}
}
#[inline]
pub fn floor(self) -> F32x4 {
unsafe { F32x4(x86_64::_mm_floor_ps(self.0)) }
}
#[inline]
pub fn ceil(self) -> F32x4 {
unsafe { F32x4(x86_64::_mm_ceil_ps(self.0)) }
}
// Packed comparisons
#[inline]
pub fn packed_eq(self, other: F32x4) -> U32x4 {
unsafe {
U32x4(x86_64::_mm_castps_si128(x86_64::_mm_cmpeq_ps(
self.0, other.0,
)))
}
}
#[inline]
pub fn packed_gt(self, other: F32x4) -> U32x4 {
unsafe {
U32x4(x86_64::_mm_castps_si128(x86_64::_mm_cmpgt_ps(
self.0, other.0,
)))
}
}
#[inline]
pub fn packed_lt(self, other: F32x4) -> U32x4 {
other.packed_gt(self)
}
#[inline]
pub fn packed_le(self, other: F32x4) -> U32x4 {
!self.packed_gt(other)
}
// Conversions
/// Converts these packed floats to integers.
#[inline]
pub fn to_i32x4(self) -> I32x4 {
unsafe { I32x4(x86_64::_mm_cvtps_epi32(self.0)) }
}
// Swizzles
#[inline] #[inline]
pub fn xxxx(self) -> F32x4 { pub fn xxxx(self) -> F32x4 {
unsafe { F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0)) } unsafe { F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0)) }
@ -1394,374 +1291,4 @@ impl F32x4 {
pub fn wwww(self) -> F32x4 { pub fn wwww(self) -> F32x4 {
unsafe { F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 255)) } unsafe { F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 255)) }
} }
// Concatenations
#[inline]
pub fn concat_xy_xy(self, other: F32x4) -> F32x4 {
unsafe {
let this = x86_64::_mm_castps_pd(self.0);
let other = x86_64::_mm_castps_pd(other.0);
let result = x86_64::_mm_unpacklo_pd(this, other);
F32x4(x86_64::_mm_castpd_ps(result))
}
}
#[inline]
pub fn concat_xy_zw(self, other: F32x4) -> F32x4 {
unsafe {
let this = x86_64::_mm_castps_pd(self.0);
let other = x86_64::_mm_castps_pd(other.0);
let result = x86_64::_mm_shuffle_pd(this, other, 0b10);
F32x4(x86_64::_mm_castpd_ps(result))
}
}
#[inline]
pub fn concat_zw_zw(self, other: F32x4) -> F32x4 {
unsafe {
let this = x86_64::_mm_castps_pd(self.0);
let other = x86_64::_mm_castps_pd(other.0);
let result = x86_64::_mm_unpackhi_pd(this, other);
F32x4(x86_64::_mm_castpd_ps(result))
}
}
#[inline]
pub fn concat_wz_yx(self, other: F32x4) -> F32x4 {
unsafe { F32x4(x86_64::_mm_shuffle_ps(self.0, other.0, 0b0001_1011)) }
}
// FIXME(pcwalton): Move to `Point3DF32`!
#[inline]
pub fn cross(&self, other: F32x4) -> F32x4 {
self.yzxw() * other.zxyw() - self.zxyw() * other.yzxw()
}
}
impl Default for F32x4 {
#[inline]
fn default() -> F32x4 {
unsafe { F32x4(x86_64::_mm_setzero_ps()) }
}
}
impl Index<usize> for F32x4 {
type Output = f32;
#[inline]
fn index(&self, index: usize) -> &f32 {
unsafe { &mem::transmute::<&__m128, &[f32; 4]>(&self.0)[index] }
}
}
impl IndexMut<usize> for F32x4 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut f32 {
unsafe { &mut mem::transmute::<&mut __m128, &mut [f32; 4]>(&mut self.0)[index] }
}
}
impl Debug for F32x4 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
}
}
impl PartialEq for F32x4 {
#[inline]
fn eq(&self, other: &F32x4) -> bool {
self.packed_eq(*other).is_all_ones()
}
}
impl Add<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn add(self, other: F32x4) -> F32x4 {
unsafe { F32x4(x86_64::_mm_add_ps(self.0, other.0)) }
}
}
impl Mul<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn mul(self, other: F32x4) -> F32x4 {
unsafe { F32x4(x86_64::_mm_mul_ps(self.0, other.0)) }
}
}
impl Sub<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn sub(self, other: F32x4) -> F32x4 {
unsafe { F32x4(x86_64::_mm_sub_ps(self.0, other.0)) }
}
}
// 32-bit signed integers
#[derive(Clone, Copy)]
pub struct I32x4(pub __m128i);
impl I32x4 {
// Constructors
#[inline]
pub fn new(a: i32, b: i32, c: i32, d: i32) -> I32x4 {
unsafe {
let vector = [a, b, c, d];
I32x4(x86_64::_mm_loadu_si128(vector.as_ptr() as *const __m128i))
}
}
#[inline]
pub fn splat(x: i32) -> I32x4 {
unsafe { I32x4(x86_64::_mm_set1_epi32(x)) }
}
// Concatenations
#[inline]
pub fn concat_xy_xy(self, other: I32x4) -> I32x4 {
unsafe {
let this = x86_64::_mm_castsi128_pd(self.0);
let other = x86_64::_mm_castsi128_pd(other.0);
let result = x86_64::_mm_unpacklo_pd(this, other);
I32x4(x86_64::_mm_castpd_si128(result))
}
}
// Conversions
#[inline]
pub fn as_u8x16(self) -> U8x16 {
U8x16(self.0)
}
/// Converts these packed integers to floats.
#[inline]
pub fn to_f32x4(self) -> F32x4 {
unsafe { F32x4(x86_64::_mm_cvtepi32_ps(self.0)) }
}
// Basic operations
#[inline]
pub fn min(self, other: I32x4) -> I32x4 {
unsafe { I32x4(x86_64::_mm_min_epi32(self.0, other.0)) }
}
// Packed comparisons
#[inline]
pub fn packed_eq(self, other: I32x4) -> U32x4 {
unsafe { U32x4(x86_64::_mm_cmpeq_epi32(self.0, other.0)) }
}
// Swizzles
#[inline]
pub fn xyxy(self) -> I32x4 {
unsafe {
let this = x86_64::_mm_castsi128_ps(self.0);
I32x4(x86_64::_mm_castps_si128(x86_64::_mm_shuffle_ps(this, this, 68)))
}
}
#[inline]
pub fn xwzy(self) -> I32x4 {
unsafe {
let this = x86_64::_mm_castsi128_ps(self.0);
I32x4(x86_64::_mm_castps_si128(x86_64::_mm_shuffle_ps(this, this, 108)))
}
}
#[inline]
pub fn zyxw(self) -> I32x4 {
unsafe {
let this = x86_64::_mm_castsi128_ps(self.0);
I32x4(x86_64::_mm_castps_si128(x86_64::_mm_shuffle_ps(this, this, 198)))
}
}
#[inline]
pub fn zwxy(self) -> I32x4 {
unsafe {
let this = x86_64::_mm_castsi128_ps(self.0);
I32x4(x86_64::_mm_castps_si128(x86_64::_mm_shuffle_ps(this, this, 78)))
}
}
// Comparisons
#[inline]
pub fn packed_gt(self, other: I32x4) -> U32x4 {
unsafe {
U32x4(x86_64::_mm_cmpgt_epi32(self.0, other.0))
}
}
#[inline]
pub fn packed_le(self, other: I32x4) -> U32x4 {
!self.packed_gt(other)
}
}
impl Default for I32x4 {
#[inline]
fn default() -> I32x4 {
unsafe { I32x4(x86_64::_mm_setzero_si128()) }
}
}
impl Index<usize> for I32x4 {
type Output = i32;
#[inline]
fn index(&self, index: usize) -> &i32 {
unsafe { &mem::transmute::<&__m128i, &[i32; 4]>(&self.0)[index] }
}
}
impl IndexMut<usize> for I32x4 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut i32 {
unsafe { &mut mem::transmute::<&mut __m128i, &mut [i32; 4]>(&mut self.0)[index] }
}
}
impl Add<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn add(self, other: I32x4) -> I32x4 {
unsafe { I32x4(x86_64::_mm_add_epi32(self.0, other.0)) }
}
}
impl Sub<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn sub(self, other: I32x4) -> I32x4 {
unsafe { I32x4(x86_64::_mm_sub_epi32(self.0, other.0)) }
}
}
impl Mul<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn mul(self, other: I32x4) -> I32x4 {
unsafe { I32x4(x86_64::_mm_mullo_epi32(self.0, other.0)) }
}
}
impl Debug for I32x4 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
}
}
impl PartialEq for I32x4 {
#[inline]
fn eq(&self, other: &I32x4) -> bool {
self.packed_eq(*other).is_all_ones()
}
}
// 32-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U32x4(pub __m128i);
impl U32x4 {
// Constructors
#[inline]
pub fn new(a: u32, b: u32, c: u32, d: u32) -> U32x4 {
unsafe {
let vector = [a, b, c, d];
U32x4(x86_64::_mm_loadu_si128(vector.as_ptr() as *const __m128i))
}
}
#[inline]
pub fn splat(x: u32) -> U32x4 {
unsafe { U32x4(x86_64::_mm_set1_epi32(x as i32)) }
}
// Basic operations
#[inline]
pub fn is_all_ones(self) -> bool {
unsafe { x86_64::_mm_test_all_ones(self.0) != 0 }
}
#[inline]
pub fn is_all_zeroes(self) -> bool {
unsafe { x86_64::_mm_test_all_zeros(self.0, self.0) != 0 }
}
// Packed comparisons
#[inline]
pub fn packed_eq(self, other: U32x4) -> U32x4 {
unsafe { U32x4(x86_64::_mm_cmpeq_epi32(self.0, other.0)) }
}
}
impl Debug for U32x4 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
}
}
impl Index<usize> for U32x4 {
type Output = u32;
#[inline]
fn index(&self, index: usize) -> &u32 {
unsafe { &mem::transmute::<&__m128i, &[u32; 4]>(&self.0)[index] }
}
}
impl PartialEq for U32x4 {
#[inline]
fn eq(&self, other: &U32x4) -> bool {
self.packed_eq(*other).is_all_ones()
}
}
impl Not for U32x4 {
type Output = U32x4;
#[inline]
fn not(self) -> U32x4 {
self ^ U32x4::splat(!0)
}
}
impl BitXor<U32x4> for U32x4 {
type Output = U32x4;
#[inline]
fn bitxor(self, other: U32x4) -> U32x4 {
unsafe {
U32x4(x86_64::_mm_xor_si128(self.0, other.0))
}
}
}
// 8-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U8x16(pub __m128i);
impl U8x16 {
#[inline]
pub fn as_i32x4(self) -> I32x4 {
I32x4(self.0)
}
#[inline]
pub fn shuffle(self, indices: U8x16) -> U8x16 {
unsafe { U8x16(x86_64::_mm_shuffle_epi8(self.0, indices.0)) }
}
} }

View File

@ -0,0 +1,47 @@
// pathfinder/simd/src/x86/swizzle_i32x4.rs
//
// Copyright © 2019 The Pathfinder Project Developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use crate::x86::I32x4;
use std::arch::x86_64;
// TODO(pcwalton): Add the remaining swizzles.
impl I32x4 {
#[inline]
pub fn xyxy(self) -> I32x4 {
unsafe {
let this = x86_64::_mm_castsi128_ps(self.0);
I32x4(x86_64::_mm_castps_si128(x86_64::_mm_shuffle_ps(this, this, 68)))
}
}
#[inline]
pub fn xwzy(self) -> I32x4 {
unsafe {
let this = x86_64::_mm_castsi128_ps(self.0);
I32x4(x86_64::_mm_castps_si128(x86_64::_mm_shuffle_ps(this, this, 108)))
}
}
#[inline]
pub fn zyxw(self) -> I32x4 {
unsafe {
let this = x86_64::_mm_castsi128_ps(self.0);
I32x4(x86_64::_mm_castps_si128(x86_64::_mm_shuffle_ps(this, this, 198)))
}
}
#[inline]
pub fn zwxy(self) -> I32x4 {
unsafe {
let this = x86_64::_mm_castsi128_ps(self.0);
I32x4(x86_64::_mm_castps_si128(x86_64::_mm_shuffle_ps(this, this, 78)))
}
}
}