Throw even more SIMD at `add_fill()`
This commit is contained in:
parent
c5ad95ffd5
commit
b2432b3a7f
|
@ -31,7 +31,9 @@ use pathfinder_path_utils::stroke::{StrokeStyle, StrokeToFillIter};
|
||||||
use rayon::ThreadPoolBuilder;
|
use rayon::ThreadPoolBuilder;
|
||||||
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
|
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
|
||||||
use simdeez::Simd;
|
use simdeez::Simd;
|
||||||
|
use simdeez::overloads::I32x4_41;
|
||||||
use simdeez::sse41::Sse41;
|
use simdeez::sse41::Sse41;
|
||||||
|
use std::arch::x86_64;
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::fmt::{self, Debug, Formatter};
|
use std::fmt::{self, Debug, Formatter};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
@ -1306,7 +1308,6 @@ impl BuiltObject {
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO(pcwalton): SIMD-ify `tile_x` and `tile_y`.
|
// TODO(pcwalton): SIMD-ify `tile_x` and `tile_y`.
|
||||||
// FIXME(pcwalton): Use a line segment.
|
|
||||||
fn add_fill(&mut self, segment: &LineSegmentF32, tile_x: i16, tile_y: i16) {
|
fn add_fill(&mut self, segment: &LineSegmentF32, tile_x: i16, tile_y: i16) {
|
||||||
let tile_origin = Point2DF32::new((tile_x as i32 * TILE_WIDTH as i32) as f32,
|
let tile_origin = Point2DF32::new((tile_x as i32 * TILE_WIDTH as i32) as f32,
|
||||||
(tile_y as i32 * TILE_HEIGHT as i32) as f32);
|
(tile_y as i32 * TILE_HEIGHT as i32) as f32);
|
||||||
|
@ -1328,7 +1329,6 @@ impl BuiltObject {
|
||||||
}
|
}
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
self.fills.push(FillObjectPrimitive { px, subpx, tile_x, tile_y });
|
self.fills.push(FillObjectPrimitive { px, subpx, tile_x, tile_y });
|
||||||
|
|
||||||
self.solid_tiles.set(tile_index as usize, false);
|
self.solid_tiles.set(tile_index as usize, false);
|
||||||
|
@ -2047,25 +2047,20 @@ impl LineSegmentF32 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME(pcwalton): Use `pshufb`!
|
|
||||||
fn to_line_segment_u4(&self) -> LineSegmentU4 {
|
fn to_line_segment_u4(&self) -> LineSegmentU4 {
|
||||||
unsafe {
|
unsafe {
|
||||||
let values = Sse41::cvtps_epi32(Sse41::fastfloor_ps(self.0));
|
let values = Sse41::cvtps_epi32(Sse41::fastfloor_ps(self.0));
|
||||||
LineSegmentU4(values[0] as u16 |
|
let mask = Sse41::set1_epi32(0x0c040800);
|
||||||
((values[1] as u16) << 4) |
|
let values_0213 = Sse41::shuffle_epi8(values, mask)[0] as u32;
|
||||||
((values[2] as u16) << 8) |
|
LineSegmentU4((values_0213 | (values_0213 >> 12)) as u16)
|
||||||
((values[3] as u16) << 12))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// FIXME(pcwalton): Use `pshufb`!
|
|
||||||
fn to_line_segment_u8(&self) -> LineSegmentU8 {
|
fn to_line_segment_u8(&self) -> LineSegmentU8 {
|
||||||
unsafe {
|
unsafe {
|
||||||
let values = Sse41::cvtps_epi32(Sse41::fastfloor_ps(self.0));
|
let values = Sse41::cvtps_epi32(Sse41::fastfloor_ps(self.0));
|
||||||
LineSegmentU8(values[0] as u32 |
|
let mask = Sse41::set1_epi32(0x0c080400);
|
||||||
((values[1] as u32) << 8) |
|
LineSegmentU8(Sse41::shuffle_epi8(values, mask)[0] as u32)
|
||||||
((values[2] as u32) << 16) |
|
|
||||||
((values[3] as u32) << 24))
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2130,6 +2125,20 @@ fn quadratic_segment_is_tiny(segment: &QuadraticBezierSegment<f32>) -> bool {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SIMD extensions
|
||||||
|
|
||||||
|
trait SimdExt: Simd {
|
||||||
|
// TODO(pcwalton): Default scalar implementation.
|
||||||
|
unsafe fn shuffle_epi8(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SimdExt for Sse41 {
|
||||||
|
#[inline(always)]
|
||||||
|
unsafe fn shuffle_epi8(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32 {
|
||||||
|
I32x4_41(x86_64::_mm_shuffle_epi8(a.0, b.0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Trivial utilities
|
// Trivial utilities
|
||||||
|
|
||||||
fn lerp(a: f32, b: f32, t: f32) -> f32 {
|
fn lerp(a: f32, b: f32, t: f32) -> f32 {
|
||||||
|
|
Loading…
Reference in New Issue