Use SIMD a bit more in the new tiling code.

This commit is contained in:
Patrick Walton 2020-05-16 10:38:34 -07:00
parent 28c4bc194d
commit 13f9fdc96c
4 changed files with 107 additions and 5 deletions

View File

@ -19,6 +19,7 @@ use pathfinder_content::segment::Segment;
use pathfinder_geometry::line_segment::LineSegment2F;
use pathfinder_geometry::rect::RectF;
use pathfinder_geometry::vector::{Vector2F, Vector2I, vec2f, vec2i};
use pathfinder_simd::default::{F32x2, U32x2};
const FLATTENING_TOLERANCE: f32 = 0.25;
@ -157,12 +158,17 @@ fn process_line_segment(line_segment: LineSegment2F,
let from_tile_coords = Vector2I(tile_line_segment.xy());
let to_tile_coords = Vector2I(tile_line_segment.zw());
// Compute `vector_is_negative = vec2i(vector.x < 0 ? -1 : 0, vector.y < 0 ? -1 : 0)`.
let vector = line_segment.vector();
let step = vec2f(vector.x().signum(), vector.y().signum()).to_i32();
let vector_is_negative = vector.0.packed_lt(F32x2::default());
let first_tile_crossing =
(from_tile_coords + vec2i(if step.x() <= 0 { 0 } else { 1 },
if step.y() <= 0 { 0 } else { 1 })).to_f32() * tile_size;
// Compute `step = vec2f(vector.x < 0 ? -1 : 1, vector.y < 0 ? -1 : 1)`.
let step = Vector2I((vector_is_negative | U32x2::splat(1)).to_i32x2());
// Compute `first_tile_crossing = (from_tile_coords + vec2i(vector.x > 0 ? 1 : 0,
// vector.y > 0 ? 1 : 0)) * tile_size`.
let first_tile_crossing = (from_tile_coords +
Vector2I((!vector_is_negative & U32x2::splat(1)).to_i32x2())).to_f32() * tile_size;
let mut t_max = (first_tile_crossing - line_segment.from()) / vector;
let t_delta = (tile_size / vector).abs();

View File

@ -13,7 +13,7 @@ use std::arch::aarch64::{uint32x2_t, uint32x4_t};
use std::f32;
use std::fmt::{self, Debug, Formatter};
use std::mem;
use std::ops::{Add, BitAnd, BitOr, Div, Index, IndexMut, Mul, Shr, Sub};
use std::ops::{Add, BitAnd, BitOr, Div, Index, IndexMut, Mul, Not, Shr, Sub};
mod swizzle_f32x4;
mod swizzle_i32x4;
@ -723,6 +723,16 @@ impl Shr<I32x4> for I32x4 {
pub struct U32x2(pub uint32x2_t);
impl U32x2 {
#[inline]
pub fn new(x: u32, y: u32) -> U32x2 {
unsafe { U32x2(mem::transmute([x, y])) }
}
#[inline]
pub fn splat(x: u32) -> U32x2 {
U32x2::new(x, x)
}
/// Returns true if both booleans in this vector are true.
///
/// The result is *undefined* if both values in this vector are not booleans. A boolean is a
@ -740,6 +750,11 @@ impl U32x2 {
pub fn all_false(&self) -> bool {
unsafe { aarch64::vmaxv_u32(self.0) == 0 }
}
#[inline]
pub fn to_i32x2(self) -> I32x2 {
unsafe { I32x2(simd_cast(self.0)) }
}
}
impl Index<usize> for U32x2 {
@ -754,6 +769,32 @@ impl Index<usize> for U32x2 {
}
}
impl Not for U32x2 {
type Output = U32x2;
#[inline]
fn not(self) -> U32x2 {
// FIXME(pcwalton): Is there a better way to do this?
unsafe { U32x2(simd_xor(self.0, U32x2::splat(!0).0)) }
}
}
impl BitAnd<U32x2> for U32x2 {
type Output = U32x2;
#[inline]
fn bitand(self, other: U32x2) -> U32x2 {
unsafe { U32x2(simd_and(self.0, other.0)) }
}
}
impl BitOr<U32x2> for U32x2 {
type Output = U32x2;
#[inline]
fn bitor(self, other: U32x2) -> U32x2 {
unsafe { U32x2(simd_or(self.0, other.0)) }
}
}
// Four 32-bit unsigned integers
#[derive(Clone, Copy)]
@ -803,6 +844,7 @@ extern "platform-intrinsic" {
fn simd_and<T>(x: T, y: T) -> T;
fn simd_or<T>(x: T, y: T) -> T;
fn simd_xor<T>(x: T, y: T) -> T;
fn simd_fmin<T>(x: T, y: T) -> T;
fn simd_fmax<T>(x: T, y: T) -> T;

View File

@ -808,6 +808,16 @@ impl Shr<I32x4> for I32x4 {
pub struct U32x2(pub [u32; 2]);
impl U32x2 {
#[inline]
pub fn new(x: u32, y: u32) -> U32x2 {
U32x2([x, y])
}
#[inline]
pub fn splat(x: u32) -> U32x2 {
U32x2::new(x, x)
}
/// Returns true if both booleans in this vector are true.
///
/// The result is *undefined* if both values in this vector are not booleans. A boolean is a
@ -825,6 +835,11 @@ impl U32x2 {
pub fn all_false(&self) -> bool {
self[0] == 0 && self[1] == 0
}
#[inline]
pub fn to_i32x2(self) -> I32x2 {
I32x2::new(self[0] as i32, self[1] as i32)
}
}
impl Index<usize> for U32x2 {

View File

@ -817,6 +817,16 @@ impl PartialEq for I32x4 {
pub struct U32x2(pub u64);
impl U32x2 {
#[inline]
pub fn new(x: u32, y: u32) -> U32x2 {
U32x2(x as u64 | ((y as u64) << 32))
}
#[inline]
pub fn splat(x: u32) -> U32x2 {
U32x2::new(x, x)
}
/// Returns true if both booleans in this vector are true.
///
/// The result is *undefined* if both values in this vector are not booleans. A boolean is a
@ -834,6 +844,35 @@ impl U32x2 {
pub fn all_false(self) -> bool {
self.0 == 0
}
#[inline]
pub fn to_i32x2(self) -> I32x2 {
I32x2(self.0)
}
}
impl Not for U32x2 {
type Output = U32x2;
#[inline]
fn not(self) -> U32x2 {
U32x2(!self.0)
}
}
impl BitAnd<U32x2> for U32x2 {
type Output = U32x2;
#[inline]
fn bitand(self, other: U32x2) -> U32x2 {
U32x2(self.0 & other.0)
}
}
impl BitOr<U32x2> for U32x2 {
type Output = U32x2;
#[inline]
fn bitor(self, other: U32x2) -> U32x2 {
U32x2(self.0 | other.0)
}
}
// Four 32-bit unsigned integers