From b2432b3a7f8fd964b6e44b398e6c8b323a2f8b9c Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Fri, 4 Jan 2019 18:51:01 -0800 Subject: [PATCH] Throw even more SIMD at `add_fill()` --- utils/tile-svg/src/main.rs | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/utils/tile-svg/src/main.rs b/utils/tile-svg/src/main.rs index 03bdd6a1..24edb334 100644 --- a/utils/tile-svg/src/main.rs +++ b/utils/tile-svg/src/main.rs @@ -31,7 +31,9 @@ use pathfinder_path_utils::stroke::{StrokeStyle, StrokeToFillIter}; use rayon::ThreadPoolBuilder; use rayon::iter::{IntoParallelRefIterator, ParallelIterator}; use simdeez::Simd; +use simdeez::overloads::I32x4_41; use simdeez::sse41::Sse41; +use std::arch::x86_64; use std::cmp::Ordering; use std::fmt::{self, Debug, Formatter}; use std::fs::File; @@ -1306,7 +1308,6 @@ impl BuiltObject { } // TODO(pcwalton): SIMD-ify `tile_x` and `tile_y`. - // FIXME(pcwalton): Use a line segment. fn add_fill(&mut self, segment: &LineSegmentF32, tile_x: i16, tile_y: i16) { let tile_origin = Point2DF32::new((tile_x as i32 * TILE_WIDTH as i32) as f32, (tile_y as i32 * TILE_HEIGHT as i32) as f32); @@ -1328,7 +1329,6 @@ impl BuiltObject { } */ - self.fills.push(FillObjectPrimitive { px, subpx, tile_x, tile_y }); self.solid_tiles.set(tile_index as usize, false); @@ -2047,25 +2047,20 @@ impl LineSegmentF32 { } } - // FIXME(pcwalton): Use `pshufb`! fn to_line_segment_u4(&self) -> LineSegmentU4 { unsafe { let values = Sse41::cvtps_epi32(Sse41::fastfloor_ps(self.0)); - LineSegmentU4(values[0] as u16 | - ((values[1] as u16) << 4) | - ((values[2] as u16) << 8) | - ((values[3] as u16) << 12)) + let mask = Sse41::set1_epi32(0x0c040800); + let values_0213 = Sse41::shuffle_epi8(values, mask)[0] as u32; + LineSegmentU4((values_0213 | (values_0213 >> 12)) as u16) } } - // FIXME(pcwalton): Use `pshufb`! fn to_line_segment_u8(&self) -> LineSegmentU8 { unsafe { let values = Sse41::cvtps_epi32(Sse41::fastfloor_ps(self.0)); - LineSegmentU8(values[0] as u32 | - ((values[1] as u32) << 8) | - ((values[2] as u32) << 16) | - ((values[3] as u32) << 24)) + let mask = Sse41::set1_epi32(0x0c080400); + LineSegmentU8(Sse41::shuffle_epi8(values, mask)[0] as u32) } } @@ -2130,6 +2125,20 @@ fn quadratic_segment_is_tiny(segment: &QuadraticBezierSegment) -> bool { } +// SIMD extensions + +trait SimdExt: Simd { + // TODO(pcwalton): Default scalar implementation. + unsafe fn shuffle_epi8(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32; +} + +impl SimdExt for Sse41 { + #[inline(always)] + unsafe fn shuffle_epi8(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32 { + I32x4_41(x86_64::_mm_shuffle_epi8(a.0, b.0)) + } +} + // Trivial utilities fn lerp(a: f32, b: f32, t: f32) -> f32 {