Use SIMD a bit more in the new tiling code.

2020-05-16 10:38:34 -07:00 · 2020-05-16 10:38:34 -07:00 · 13f9fdc96c
parent 28c4bc194d
commit 13f9fdc96c
4 changed files with 107 additions and 5 deletions
--- a/renderer/src/tiler.rs
+++ b/renderer/src/tiler.rs
@ -19,6 +19,7 @@ use pathfinder_content::segment::Segment;
 use pathfinder_geometry::line_segment::LineSegment2F;
 use pathfinder_geometry::rect::RectF;
 use pathfinder_geometry::vector::{Vector2F, Vector2I, vec2f, vec2i};
+use pathfinder_simd::default::{F32x2, U32x2};

 const FLATTENING_TOLERANCE: f32 = 0.25;

@ -157,12 +158,17 @@ fn process_line_segment(line_segment: LineSegment2F,
    let from_tile_coords = Vector2I(tile_line_segment.xy());
    let to_tile_coords = Vector2I(tile_line_segment.zw());

+    // Compute `vector_is_negative = vec2i(vector.x < 0 ? -1 : 0, vector.y < 0 ? -1 : 0)`.
    let vector = line_segment.vector();
-    let step = vec2f(vector.x().signum(), vector.y().signum()).to_i32();
+    let vector_is_negative = vector.0.packed_lt(F32x2::default());

-    let first_tile_crossing =
-        (from_tile_coords + vec2i(if step.x() <= 0 { 0 } else { 1 },
-                                  if step.y() <= 0 { 0 } else { 1 })).to_f32() * tile_size;
+    // Compute `step = vec2f(vector.x < 0 ? -1 : 1, vector.y < 0 ? -1 : 1)`.
+    let step = Vector2I((vector_is_negative | U32x2::splat(1)).to_i32x2());
+
+    // Compute `first_tile_crossing = (from_tile_coords + vec2i(vector.x > 0 ? 1 : 0,
+    // vector.y > 0 ? 1 : 0)) * tile_size`.
+    let first_tile_crossing = (from_tile_coords +
+        Vector2I((!vector_is_negative & U32x2::splat(1)).to_i32x2())).to_f32() * tile_size;

    let mut t_max = (first_tile_crossing - line_segment.from()) / vector;
    let t_delta = (tile_size / vector).abs();
--- a/simd/src/arm/mod.rs
+++ b/simd/src/arm/mod.rs
@ -13,7 +13,7 @@ use std::arch::aarch64::{uint32x2_t, uint32x4_t};
 use std::f32;
 use std::fmt::{self, Debug, Formatter};
 use std::mem;
-use std::ops::{Add, BitAnd, BitOr, Div, Index, IndexMut, Mul, Shr, Sub};
+use std::ops::{Add, BitAnd, BitOr, Div, Index, IndexMut, Mul, Not, Shr, Sub};

 mod swizzle_f32x4;
 mod swizzle_i32x4;
@ -723,6 +723,16 @@ impl Shr<I32x4> for I32x4 {
 pub struct U32x2(pub uint32x2_t);

 impl U32x2 {
+    #[inline]
+    pub fn new(x: u32, y: u32) -> U32x2 {
+        unsafe { U32x2(mem::transmute([x, y])) }
+    }
+
+    #[inline]
+    pub fn splat(x: u32) -> U32x2 {
+        U32x2::new(x, x)
+    }
+
    /// Returns true if both booleans in this vector are true.
    ///
    /// The result is *undefined* if both values in this vector are not booleans. A boolean is a
@ -740,6 +750,11 @@ impl U32x2 {
    pub fn all_false(&self) -> bool {
        unsafe { aarch64::vmaxv_u32(self.0) == 0 }
    }
+
+    #[inline]
+    pub fn to_i32x2(self) -> I32x2 {
+        unsafe { I32x2(simd_cast(self.0)) }
+    }
 }

 impl Index<usize> for U32x2 {
@ -754,6 +769,32 @@ impl Index<usize> for U32x2 {
    }
 }

+impl Not for U32x2 {
+    type Output = U32x2;
+    #[inline]
+    fn not(self) -> U32x2 {
+        // FIXME(pcwalton): Is there a better way to do this?
+        unsafe { U32x2(simd_xor(self.0, U32x2::splat(!0).0)) }
+    }
+}
+
+impl BitAnd<U32x2> for U32x2 {
+    type Output = U32x2;
+    #[inline]
+    fn bitand(self, other: U32x2) -> U32x2 {
+        unsafe { U32x2(simd_and(self.0, other.0)) }
+    }
+}
+
+impl BitOr<U32x2> for U32x2 {
+    type Output = U32x2;
+    #[inline]
+    fn bitor(self, other: U32x2) -> U32x2 {
+        unsafe { U32x2(simd_or(self.0, other.0)) }
+    }
+}
+
+
 // Four 32-bit unsigned integers

 #[derive(Clone, Copy)]
@ -803,6 +844,7 @@ extern "platform-intrinsic" {

    fn simd_and<T>(x: T, y: T) -> T;
    fn simd_or<T>(x: T, y: T) -> T;
+    fn simd_xor<T>(x: T, y: T) -> T;

    fn simd_fmin<T>(x: T, y: T) -> T;
    fn simd_fmax<T>(x: T, y: T) -> T;
--- a/simd/src/scalar/mod.rs
+++ b/simd/src/scalar/mod.rs
@ -808,6 +808,16 @@ impl Shr<I32x4> for I32x4 {
 pub struct U32x2(pub [u32; 2]);

 impl U32x2 {
+    #[inline]
+    pub fn new(x: u32, y: u32) -> U32x2 {
+        U32x2([x, y])
+    }
+
+    #[inline]
+    pub fn splat(x: u32) -> U32x2 {
+        U32x2::new(x, x)
+    }
+
    /// Returns true if both booleans in this vector are true.
    ///
    /// The result is *undefined* if both values in this vector are not booleans. A boolean is a
@ -825,6 +835,11 @@ impl U32x2 {
    pub fn all_false(&self) -> bool {
        self[0] == 0 && self[1] == 0
    }
+
+    #[inline]
+    pub fn to_i32x2(self) -> I32x2 {
+        I32x2::new(self[0] as i32, self[1] as i32)
+    }
 }

 impl Index<usize> for U32x2 {
--- a/simd/src/x86/mod.rs
+++ b/simd/src/x86/mod.rs
@ -817,6 +817,16 @@ impl PartialEq for I32x4 {
 pub struct U32x2(pub u64);

 impl U32x2 {
+    #[inline]
+    pub fn new(x: u32, y: u32) -> U32x2 {
+        U32x2(x as u64 | ((y as u64) << 32))
+    }
+
+    #[inline]
+    pub fn splat(x: u32) -> U32x2 {
+        U32x2::new(x, x)
+    }
+
    /// Returns true if both booleans in this vector are true.
    ///
    /// The result is *undefined* if both values in this vector are not booleans. A boolean is a
@ -834,6 +844,35 @@ impl U32x2 {
    pub fn all_false(self) -> bool {
        self.0 == 0
    }
+
+    #[inline]
+    pub fn to_i32x2(self) -> I32x2 {
+        I32x2(self.0)
+    }
+}
+
+impl Not for U32x2 {
+    type Output = U32x2;
+    #[inline]
+    fn not(self) -> U32x2 {
+        U32x2(!self.0)
+    }
+}
+
+impl BitAnd<U32x2> for U32x2 {
+    type Output = U32x2;
+    #[inline]
+    fn bitand(self, other: U32x2) -> U32x2 {
+        U32x2(self.0 & other.0)
+    }
+}
+
+impl BitOr<U32x2> for U32x2 {
+    type Output = U32x2;
+    #[inline]
+    fn bitor(self, other: U32x2) -> U32x2 {
+        U32x2(self.0 | other.0)
+    }
 }

 // Four 32-bit unsigned integers