From bd6d015e6ff7ccc68fa50662e5b714aa4d5e9d45 Mon Sep 17 00:00:00 2001 From: Berkus Decker Date: Sun, 10 Jan 2021 23:08:03 +0200 Subject: [PATCH] Fix arm simd shuffling arguments According to packed_simd docs, the syntax for simd_shuffle4() is as follows: "The indices must be in range [0, M * N) where M is the number of input vectors (1 or 2)and N is the number of lanes of the input vectors. The indices i in range [0, N) refer to the i-th element of vec0, while the indices in range [N, 2*N) refer to the i - N-th element of vec1." I did not find implementation or documentation for simd_shuffle4() but I believe packed_simd implements exactly the same interface. Plus, implementing this change has fixed font-kit glyph output on an Apple M1 mac (64-bit arm). --- simd/src/arm/mod.rs | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/simd/src/arm/mod.rs b/simd/src/arm/mod.rs index 606914e4..931a64a5 100644 --- a/simd/src/arm/mod.rs +++ b/simd/src/arm/mod.rs @@ -129,7 +129,7 @@ impl F32x2 { #[inline] pub fn concat_xy_xy(self, other: F32x2) -> F32x4 { - unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 0, 1])) } + unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 2, 3])) } } } @@ -314,17 +314,17 @@ impl F32x4 { #[inline] pub fn concat_xy_xy(self, other: F32x4) -> F32x4 { - unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 0, 1])) } - } - - #[inline] - pub fn concat_xy_zw(self, other: F32x4) -> F32x4 { unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 2, 3])) } } + #[inline] + pub fn concat_xy_zw(self, other: F32x4) -> F32x4 { + unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 6, 7])) } + } + #[inline] pub fn concat_zw_zw(self, other: F32x4) -> F32x4 { - unsafe { F32x4(simd_shuffle4(self.0, other.0, [2, 3, 2, 3])) } + unsafe { F32x4(simd_shuffle4(self.0, other.0, [2, 3, 6, 7])) } } // Conversions @@ -461,7 +461,7 @@ impl I32x2 { #[inline] pub fn concat_xy_xy(self, other: I32x2) -> I32x4 { - unsafe { I32x4(simd_shuffle4(self.0, other.0, [0, 1, 0, 1])) } + unsafe { I32x4(simd_shuffle4(self.0, other.0, [0, 1, 2, 3])) } } // Conversions @@ -471,7 +471,7 @@ impl I32x2 { pub fn to_f32x2(self) -> F32x2 { unsafe { F32x2(simd_cast(self.0)) } } - + #[inline] pub fn to_i32x4(self) -> I32x4 { self.concat_xy_xy(I32x2::default())