Remove simdeez

This commit is contained in:
Patrick Walton 2019-01-12 17:13:58 -08:00
parent 37e6e71251
commit bbf193f00f
9 changed files with 403 additions and 295 deletions

8
Cargo.lock generated
View File

@ -435,7 +435,6 @@ dependencies = [
"lyon_path 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
"serde 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)",
"serde_derive 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)",
"simdeez 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
@ -626,11 +625,6 @@ dependencies = [
"syn 0.15.24 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "simdeez"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "simplecss"
version = "0.1.0"
@ -734,7 +728,6 @@ dependencies = [
"quickcheck 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
"rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
"rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
"simdeez 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
"svgtypes 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
"usvg 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
]
@ -904,7 +897,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
"checksum serde 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)" = "0e732ed5a5592c17d961555e3b552985baf98d50ce418b7b655f31f6ba7eb1b7"
"checksum serde_derive 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d6115a3ca25c224e409185325afc16a0d5aaaabc15c42b09587d6f1ba39a5b"
"checksum simdeez 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "53d1e4a8ee9c44fa7c2d6464b679bd62c6b156edb865f084eb51af7b34efaa63"
"checksum simplecss 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "135685097a85a64067df36e28a243e94a94f76d829087ce0be34eeb014260c0e"
"checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
"checksum slab 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5f9776d6b986f77b35c6cf846c11ad986ff128fe0b2b63a3628e3755e8d3102d"

View File

@ -12,4 +12,3 @@ lyon_geom = "0.12"
lyon_path = "0.12"
serde = "1.0"
serde_derive = "1.0"
simdeez = "0.4"

View File

@ -15,11 +15,6 @@
#[macro_use]
extern crate bitflags;
use simdeez::sse41::Sse41;
// TODO(pcwalton): Make this configurable.
pub type SimdImpl = Sse41;
pub mod clip;
pub mod cubic_to_quadratic;
pub mod line_segment;
@ -28,6 +23,7 @@ pub mod orientation;
pub mod point;
pub mod segment;
pub mod segments;
pub mod simd;
pub mod stroke;
pub mod transform;
pub mod util;

View File

@ -10,62 +10,40 @@
//! Line segment types, optimized with SIMD.
use crate::SimdImpl;
use crate::point::Point2DF32;
use crate::simd::F32x4;
use crate::util;
use simdeez::Simd;
use std::ops::Sub;
#[derive(Clone, Copy, Debug)]
pub struct LineSegmentF32(pub <SimdImpl as Simd>::Vf32);
#[derive(Clone, Copy, Debug, PartialEq, Default)]
pub struct LineSegmentF32(pub F32x4);
impl LineSegmentF32 {
#[inline]
pub fn new(from: &Point2DF32, to: &Point2DF32) -> LineSegmentF32 {
unsafe {
LineSegmentF32(SimdImpl::castpd_ps(SimdImpl::unpacklo_pd(
SimdImpl::castps_pd(from.0),
SimdImpl::castps_pd(to.0),
)))
}
LineSegmentF32(F32x4::new(from.x(), from.y(), to.x(), to.y()))
}
#[inline]
pub fn from(&self) -> Point2DF32 {
unsafe {
Point2DF32(SimdImpl::castpd_ps(SimdImpl::unpacklo_pd(
SimdImpl::castps_pd(self.0),
SimdImpl::setzero_pd(),
)))
}
Point2DF32(self.0)
}
#[inline]
pub fn to(&self) -> Point2DF32 {
unsafe {
Point2DF32(SimdImpl::castpd_ps(SimdImpl::unpackhi_pd(
SimdImpl::castps_pd(self.0),
SimdImpl::setzero_pd(),
)))
}
Point2DF32(self.0.swap_halves())
}
#[inline]
pub fn set_from(&mut self, point: &Point2DF32) {
unsafe {
let (mut this, point) = (SimdImpl::castps_pd(self.0), SimdImpl::castps_pd(point.0));
this[0] = point[0];
self.0 = SimdImpl::castpd_ps(this);
}
self.0[0] = point.x();
self.0[1] = point.y();
}
#[inline]
pub fn set_to(&mut self, point: &Point2DF32) {
unsafe {
let (mut this, point) = (SimdImpl::castps_pd(self.0), SimdImpl::castps_pd(point.0));
this[1] = point[0];
self.0 = SimdImpl::castpd_ps(this);
}
self.0[2] = point.x();
self.0[3] = point.y();
}
#[allow(clippy::wrong_self_convention)]
@ -92,34 +70,17 @@ impl LineSegmentF32 {
#[inline]
pub fn scale(&self, factor: f32) -> LineSegmentF32 {
unsafe { LineSegmentF32(SimdImpl::mul_ps(self.0, SimdImpl::set1_ps(factor))) }
LineSegmentF32(self.0 * F32x4::splat(factor))
}
#[inline]
pub fn split(&self, t: f32) -> (LineSegmentF32, LineSegmentF32) {
debug_assert!(t >= 0.0 && t <= 1.0);
unsafe {
let from_from = SimdImpl::castpd_ps(SimdImpl::unpacklo_pd(
SimdImpl::castps_pd(self.0),
SimdImpl::castps_pd(self.0),
));
let to_to = SimdImpl::castpd_ps(SimdImpl::unpackhi_pd(
SimdImpl::castps_pd(self.0),
SimdImpl::castps_pd(self.0),
));
let (from_from, to_to) = (self.0.splat_low_half(), self.0.splat_high_half());
let d_d = to_to - from_from;
let mid_mid = from_from + d_d * SimdImpl::set1_ps(t);
(
LineSegmentF32(SimdImpl::castpd_ps(SimdImpl::unpacklo_pd(
SimdImpl::castps_pd(from_from),
SimdImpl::castps_pd(mid_mid),
))),
LineSegmentF32(SimdImpl::castpd_ps(SimdImpl::unpackhi_pd(
SimdImpl::castps_pd(mid_mid),
SimdImpl::castps_pd(to_to),
))),
)
}
let mid_mid = from_from + d_d * F32x4::splat(t);
(LineSegmentF32(F32x4::new(from_from[0], from_from[1], mid_mid[0], mid_mid[1])),
LineSegmentF32(F32x4::new(mid_mid[0], mid_mid[1], to_to[0], to_to[1])))
}
// Returns the upper segment first, followed by the lower segment.
@ -150,7 +111,7 @@ impl LineSegmentF32 {
#[inline]
pub fn reversed(&self) -> LineSegmentF32 {
unsafe { LineSegmentF32(SimdImpl::shuffle_ps(self.0, self.0, 0b0100_1110)) }
LineSegmentF32(self.0.swap_halves())
}
#[inline]
@ -193,35 +154,11 @@ impl LineSegmentF32 {
}
}
impl PartialEq for LineSegmentF32 {
#[inline]
fn eq(&self, other: &LineSegmentF32) -> bool {
unsafe {
let results = SimdImpl::castps_epi32(SimdImpl::cmpeq_ps(self.0, other.0));
// FIXME(pcwalton): Is there a better way to do this?
results[0] == -1 && results[1] == -1 && results[2] == -1 && results[3] == -1
}
}
}
impl Default for LineSegmentF32 {
#[inline]
fn default() -> LineSegmentF32 {
unsafe { LineSegmentF32(SimdImpl::setzero_ps()) }
}
}
impl Sub<Point2DF32> for LineSegmentF32 {
type Output = LineSegmentF32;
#[inline]
fn sub(self, point: Point2DF32) -> LineSegmentF32 {
unsafe {
let point_point = SimdImpl::castpd_ps(SimdImpl::unpacklo_pd(
SimdImpl::castps_pd(point.0),
SimdImpl::castps_pd(point.0),
));
LineSegmentF32(self.0 - point_point)
}
LineSegmentF32(self.0 - point.0.splat_low_half())
}
}

View File

@ -10,28 +10,22 @@
//! A SIMD-optimized point type.
use crate::SimdImpl;
use crate::simd::F32x4;
use euclid::Point2D;
use simdeez::Simd;
use std::ops::{Add, Mul, Sub};
#[derive(Clone, Copy, Debug)]
pub struct Point2DF32(pub <SimdImpl as Simd>::Vf32);
#[derive(Clone, Copy, Debug, Default)]
pub struct Point2DF32(pub F32x4);
impl Point2DF32 {
#[inline]
pub fn new(x: f32, y: f32) -> Point2DF32 {
unsafe {
let mut data = SimdImpl::setzero_ps();
data[0] = x;
data[1] = y;
Point2DF32(data)
}
Point2DF32(F32x4::new(x, y, 0.0, 0.0))
}
#[inline]
pub fn splat(value: f32) -> Point2DF32 {
unsafe { Point2DF32(SimdImpl::set1_ps(value)) }
Point2DF32(F32x4::splat(value))
}
#[inline]
@ -56,29 +50,20 @@ impl Point2DF32 {
#[inline]
pub fn min(&self, other: Point2DF32) -> Point2DF32 {
unsafe { Point2DF32(SimdImpl::min_ps(self.0, other.0)) }
Point2DF32(self.0.min(other.0))
}
#[inline]
pub fn max(&self, other: Point2DF32) -> Point2DF32 {
unsafe { Point2DF32(SimdImpl::max_ps(self.0, other.0)) }
Point2DF32(self.0.max(other.0))
}
}
impl PartialEq for Point2DF32 {
#[inline]
fn eq(&self, other: &Point2DF32) -> bool {
unsafe {
let results = SimdImpl::castps_epi32(SimdImpl::cmpeq_ps(self.0, other.0));
results[0] == -1 && results[1] == -1
}
}
}
impl Default for Point2DF32 {
#[inline]
fn default() -> Point2DF32 {
unsafe { Point2DF32(SimdImpl::setzero_ps()) }
let results = self.0.packed_eq(other.0);
results[0] != 0 && results[1] != 0
}
}

View File

@ -10,10 +10,9 @@
//! Line or curve segments, optimized with SIMD.
use crate::SimdImpl;
use crate::line_segment::LineSegmentF32;
use crate::point::Point2DF32;
use simdeez::Simd;
use crate::simd::F32x4;
#[derive(Clone, Copy, Debug, PartialEq)]
pub struct Segment {
@ -160,12 +159,9 @@ pub struct CubicSegment<'s>(&'s Segment);
impl<'s> CubicSegment<'s> {
#[inline]
pub fn flatten_once(self, tolerance: f32) -> Option<Segment> {
let s2inv;
unsafe {
let (baseline, ctrl) = (self.0.baseline.0, self.0.ctrl.0);
let from_from = SimdImpl::shuffle_ps(baseline, baseline, 0b0100_0100);
let v0102 = SimdImpl::sub_ps(ctrl, from_from);
let from_from = baseline.splat_low_half();
let v0102 = ctrl - from_from;
// v01.x v01.y v02.x v02.y
// * v01.x v01.y v01.y v01.x
@ -175,15 +171,14 @@ impl<'s> CubicSegment<'s> {
// +-------+ +-----+
// + -
// v01 len^2 determinant
let products = SimdImpl::mul_ps(v0102, SimdImpl::shuffle_ps(v0102, v0102, 0b0001_0100));
let products = v0102 * F32x4::new(v0102[0], v0102[1], v0102[1], v0102[0]);
let det = products[2] - products[3];
if det == 0.0 {
return None;
}
s2inv = (products[0] + products[1]).sqrt() / det;
}
let s2inv = (products[0] + products[1]).sqrt() / det;
let t = 2.0 * ((tolerance / 3.0) * s2inv.abs()).sqrt();
if t >= 1.0 - EPSILON || t == 0.0 {
@ -197,71 +192,40 @@ impl<'s> CubicSegment<'s> {
#[inline]
pub fn split(self, t: f32) -> (Segment, Segment) {
unsafe {
let tttt = SimdImpl::set1_ps(t);
let tttt = F32x4::splat(t);
let p0p3 = self.0.baseline.0;
let p1p2 = self.0.ctrl.0;
let p0p1 = assemble(&p0p3, &p1p2, 0, 0);
let p0p1 = F32x4::new(p0p3[0], p0p3[1], p1p2[0], p1p2[1]);
// p01 = lerp(p0, p1, t), p12 = lerp(p1, p2, t), p23 = lerp(p2, p3, t)
let p01p12 = SimdImpl::add_ps(p0p1, SimdImpl::mul_ps(tttt, SimdImpl::sub_ps(p1p2, p0p1)));
let pxxp23 = SimdImpl::add_ps(p1p2, SimdImpl::mul_ps(tttt, SimdImpl::sub_ps(p0p3, p1p2)));
let p12p23 = assemble(&p01p12, &pxxp23, 1, 1);
let p01p12 = p0p1 + tttt * (p1p2 - p0p1);
let pxxp23 = p1p2 + tttt * (p0p3 - p1p2);
let p12p23 = F32x4::new(p01p12[2], p01p12[3], pxxp23[2], pxxp23[3]);
// p012 = lerp(p01, p12, t), p123 = lerp(p12, p23, t)
let p012p123 =
SimdImpl::add_ps(p01p12, SimdImpl::mul_ps(tttt, SimdImpl::sub_ps(p12p23, p01p12)));
let p123 = pluck(&p012p123, 1);
let p012p123 = p01p12 + tttt * (p12p23 - p01p12);
let p123 = p012p123.splat_high_half();
// p0123 = lerp(p012, p123, t)
let p0123 = SimdImpl::add_ps(p012p123, SimdImpl::mul_ps(tttt, SimdImpl::sub_ps(p123, p012p123)));
let p0123 = p012p123 + tttt * (p123 - p012p123);
let baseline0 = assemble(&p0p3, &p0123, 0, 0);
let ctrl0 = assemble(&p01p12, &p012p123, 0, 0);
let baseline1 = assemble(&p0123, &p0p3, 0, 1);
let ctrl1 = assemble(&p012p123, &p12p23, 1, 1);
let baseline0 = F32x4::new(p0p3[0], p0p3[1], p0123[0], p0123[1]);
let ctrl0 = F32x4::new(p01p12[0], p01p12[1], p012p123[0], p012p123[1]);
let baseline1 = F32x4::new(p0123[0], p0123[1], p0p3[2], p0p3[3]);
let ctrl1 = F32x4::new(p012p123[2], p012p123[3], p12p23[2], p12p23[3]);
// FIXME(pcwalton): Set flags appropriately!
return (
Segment {
(Segment {
baseline: LineSegmentF32(baseline0),
ctrl: LineSegmentF32(ctrl0),
kind: SegmentKind::Cubic,
flags: self.0.flags & SegmentFlags::FIRST_IN_SUBPATH,
},
Segment {
}, Segment {
baseline: LineSegmentF32(baseline1),
ctrl: LineSegmentF32(ctrl1),
kind: SegmentKind::Cubic,
flags: self.0.flags & SegmentFlags::CLOSES_SUBPATH,
},
);
}
// Constructs a new 4-element vector from two pairs of adjacent lanes in two input vectors.
unsafe fn assemble(
a_data: &<SimdImpl as Simd>::Vf32,
b_data: &<SimdImpl as Simd>::Vf32,
a_index: usize,
b_index: usize,
) -> <SimdImpl as Simd>::Vf32 {
let (a_data, b_data) = (SimdImpl::castps_pd(*a_data), SimdImpl::castps_pd(*b_data));
let mut result = SimdImpl::setzero_pd();
result[0] = a_data[a_index];
result[1] = b_data[b_index];
SimdImpl::castpd_ps(result)
}
// Constructs a new 2-element vector from a pair of adjacent lanes in an input vector.
unsafe fn pluck(data: &<SimdImpl as Simd>::Vf32, index: usize) -> <SimdImpl as Simd>::Vf32 {
let data = SimdImpl::castps_pd(*data);
let mut result = SimdImpl::setzero_pd();
result[0] = data[index];
SimdImpl::castpd_ps(result)
}
})
}
#[inline]
@ -272,15 +236,15 @@ impl<'s> CubicSegment<'s> {
#[inline]
pub fn y_extrema(self) -> (Option<f32>, Option<f32>) {
let (t0, t1);
unsafe {
let mut p0p1p2p3 = SimdImpl::setzero_ps();
p0p1p2p3[0] = self.0.baseline.from_y();
p0p1p2p3[1] = self.0.ctrl.from_y();
p0p1p2p3[2] = self.0.ctrl.to_y();
p0p1p2p3[3] = self.0.baseline.to_y();
let pxp0p1p2 = SimdImpl::shuffle_ps(p0p1p2p3, p0p1p2p3, 0b1001_0000);
let pxv0v1v2 = SimdImpl::sub_ps(p0p1p2p3, pxp0p1p2);
let p0p1p2p3 = F32x4::new(self.0.baseline.from_y(),
self.0.ctrl.from_y(),
self.0.ctrl.to_y(),
self.0.baseline.to_y());
let pxp0p1p2 = F32x4::new(self.0.baseline.from_y(),
self.0.baseline.from_y(),
self.0.ctrl.from_y(),
self.0.ctrl.to_y());
let pxv0v1v2 = p0p1p2p3 - pxp0p1p2;
let (v0, v1, v2) = (pxv0v1v2[1], pxv0v1v2[2], pxv0v1v2[3]);
let (v0_to_v1, v2_to_v1) = (v0 - v1, v2 - v1);
@ -289,7 +253,6 @@ impl<'s> CubicSegment<'s> {
t0 = (v0_to_v1 + discrim) * denom;
t1 = (v0_to_v1 - discrim) * denom;
}
return match (
t0 > EPSILON && t0 < 1.0 - EPSILON,

270
geometry/src/simd.rs Normal file
View File

@ -0,0 +1,270 @@
// pathfinder/geometry/src/simd.rs
//
// Copyright © 2019 The Pathfinder Project Developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.
pub type F32x4 = x86::F32x4;
pub type I32x4 = x86::I32x4;
pub type U32x4 = x86::U32x4;
pub type U8x16 = x86::U8x16;
mod x86 {
use std::arch::x86_64::{self, __m128, __m128i};
use std::cmp::PartialEq;
use std::fmt::{self, Debug, Formatter};
use std::mem;
use std::ops::{Add, Mul, Sub, Index, IndexMut};
// 32-bit floats
#[derive(Clone, Copy)]
pub struct F32x4(pub __m128);
impl F32x4 {
#[inline]
pub fn new(a: f32, b: f32, c: f32, d: f32) -> F32x4 {
unsafe {
let vector = [a, b, c, d];
F32x4(x86_64::_mm_loadu_ps(vector.as_ptr()))
}
}
#[inline]
pub fn splat(x: f32) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_set1_ps(x))
}
}
#[inline]
pub fn min(self, other: F32x4) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_min_ps(self.0, other.0))
}
}
#[inline]
pub fn max(self, other: F32x4) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_max_ps(self.0, other.0))
}
}
#[inline]
pub fn packed_eq(self, other: F32x4) -> U32x4 {
unsafe {
U32x4(x86_64::_mm_castps_si128(x86_64::_mm_cmpeq_ps(self.0, other.0)))
}
}
#[inline]
pub fn swap_halves(self) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0b0100_1110))
}
}
#[inline]
pub fn splat_low_half(self) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0b0100_0100))
}
}
#[inline]
pub fn splat_high_half(self) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0b1110_1110))
}
}
#[inline]
pub fn interleave(self, other: F32x4) -> (F32x4, F32x4) {
unsafe {
(F32x4(x86_64::_mm_unpacklo_ps(self.0, other.0)),
F32x4(x86_64::_mm_unpackhi_ps(self.0, other.0)))
}
}
#[inline]
pub fn to_i32x4(self) -> I32x4 {
unsafe {
I32x4(x86_64::_mm_cvtps_epi32(self.0))
}
}
}
impl Default for F32x4 {
#[inline]
fn default() -> F32x4 {
unsafe {
F32x4(x86_64::_mm_setzero_ps())
}
}
}
impl Index<usize> for F32x4 {
type Output = f32;
#[inline]
fn index(&self, index: usize) -> &f32 {
unsafe {
&mem::transmute::<&__m128, &[f32; 4]>(&self.0)[index]
}
}
}
impl IndexMut<usize> for F32x4 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut f32 {
unsafe {
&mut mem::transmute::<&mut __m128, &mut [f32; 4]>(&mut self.0)[index]
}
}
}
impl Debug for F32x4 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
}
}
impl PartialEq for F32x4 {
#[inline]
fn eq(&self, other: &F32x4) -> bool {
self.packed_eq(*other).is_all_ones()
}
}
impl Add<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn add(self, other: F32x4) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_add_ps(self.0, other.0))
}
}
}
impl Mul<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn mul(self, other: F32x4) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_mul_ps(self.0, other.0))
}
}
}
impl Sub<F32x4> for F32x4 {
type Output = F32x4;
#[inline]
fn sub(self, other: F32x4) -> F32x4 {
unsafe {
F32x4(x86_64::_mm_sub_ps(self.0, other.0))
}
}
}
// 32-bit signed integers
#[derive(Clone, Copy)]
pub struct I32x4(pub __m128i);
impl I32x4 {
#[inline]
pub fn new(a: i32, b: i32, c: i32, d: i32) -> I32x4 {
unsafe {
let vector = [a, b, c, d];
I32x4(x86_64::_mm_loadu_si128(vector.as_ptr() as *const __m128i))
}
}
#[inline]
pub fn splat(x: i32) -> I32x4 {
unsafe {
I32x4(x86_64::_mm_set1_epi32(x))
}
}
#[inline]
pub fn as_u8x16(self) -> U8x16 {
U8x16(self.0)
}
#[inline]
pub fn min(self, other: I32x4) -> I32x4 {
unsafe {
I32x4(x86_64::_mm_min_epi32(self.0, other.0))
}
}
}
impl Index<usize> for I32x4 {
type Output = i32;
#[inline]
fn index(&self, index: usize) -> &i32 {
unsafe {
&mem::transmute::<&__m128i, &[i32; 4]>(&self.0)[index]
}
}
}
impl Sub<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn sub(self, other: I32x4) -> I32x4 {
unsafe {
I32x4(x86_64::_mm_sub_epi32(self.0, other.0))
}
}
}
// 32-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U32x4(pub __m128i);
impl U32x4 {
#[inline]
fn is_all_ones(&self) -> bool {
unsafe {
x86_64::_mm_test_all_ones(self.0) != 0
}
}
}
impl Index<usize> for U32x4 {
type Output = u32;
#[inline]
fn index(&self, index: usize) -> &u32 {
unsafe {
&mem::transmute::<&__m128i, &[u32; 4]>(&self.0)[index]
}
}
}
// 8-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U8x16(pub __m128i);
impl U8x16 {
#[inline]
pub fn as_i32x4(self) -> I32x4 {
I32x4(self.0)
}
#[inline]
pub fn shuffle(self, indices: U8x16) -> U8x16 {
unsafe {
U8x16(x86_64::_mm_shuffle_epi8(self.0, indices.0))
}
}
}
}

View File

@ -16,7 +16,6 @@ jemallocator = "0.1"
lyon_geom = "0.12"
lyon_path = "0.12"
rayon = "1.0"
simdeez = "0.4"
svgtypes = "0.3"
usvg = "0.4"

View File

@ -30,14 +30,11 @@ use lyon_path::iterator::PathIter;
use pathfinder_geometry::line_segment::{LineSegmentF32, LineSegmentU4, LineSegmentU8};
use pathfinder_geometry::point::Point2DF32;
use pathfinder_geometry::segment::{Segment, SegmentFlags, SegmentKind};
use pathfinder_geometry::simd::{F32x4, I32x4};
use pathfinder_geometry::stroke::{StrokeStyle, StrokeToFillIter};
use pathfinder_geometry::util;
use rayon::ThreadPoolBuilder;
use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
use simdeez::Simd;
use simdeez::overloads::I32x4_41;
use simdeez::sse41::Sse41;
use std::arch::x86_64;
use std::cmp::Ordering;
use std::fmt::{self, Debug, Formatter};
use std::fs::File;
@ -1193,32 +1190,26 @@ impl BuiltObject {
// TODO(pcwalton): SIMD-ify `tile_x` and `tile_y`.
fn add_fill(&mut self, segment: &LineSegmentF32, tile_x: i16, tile_y: i16) {
//println!("add_fill({:?} ({}, {}))", segment, tile_x, tile_y);
let (px, subpx);
unsafe {
let mut segment = Sse41::cvtps_epi32(Sse41::mul_ps(segment.0, Sse41::set1_ps(256.0)));
let mut segment = (segment.0 * F32x4::splat(256.0)).to_i32x4();
let mut tile_origin = Sse41::setzero_epi32();
tile_origin[0] = (tile_x as i32) * (TILE_WIDTH as i32) * 256;
tile_origin[1] = (tile_y as i32) * (TILE_HEIGHT as i32) * 256;
tile_origin = Sse41::shuffle_epi32(tile_origin, 0b0100_0100);
let tile_origin_x = (tile_x as i32) * (TILE_WIDTH as i32) * 256;
let tile_origin_y = (tile_y as i32) * (TILE_HEIGHT as i32) * 256;
let tile_origin = I32x4::new(tile_origin_x, tile_origin_y, tile_origin_x, tile_origin_y);
segment = Sse41::sub_epi32(segment, tile_origin);
segment = segment - tile_origin;
/*
println!("... before min: {} {} {} {}",
segment[0], segment[1], segment[2], segment[3]);
*/
//segment = Sse41::max_epi32(segment, Sse41::setzero_epi32());
segment = Sse41::min_epi32(segment, Sse41::set1_epi32(0x0fff));
segment = segment.min(I32x4::splat(0x0fff));
//println!("... after min: {} {} {} {}", segment[0], segment[1], segment[2], segment[3]);
let mut shuffle_mask = Sse41::setzero_epi32();
shuffle_mask[0] = 0x0c08_0400;
shuffle_mask[1] = 0x0d05_0901;
segment = Sse41::shuffle_epi8(segment, shuffle_mask);
let shuffle_mask = I32x4::new(0x0c08_0400, 0x0d05_0901, 0, 0);
segment = segment.as_u8x16().shuffle(shuffle_mask.as_u8x16()).as_i32x4();
px = LineSegmentU4((segment[1] | (segment[1] >> 12)) as u16);
subpx = LineSegmentU8(segment[0] as u32);
}
let px = LineSegmentU4((segment[1] | (segment[1] >> 12)) as u16);
let subpx = LineSegmentU8(segment[0] as u32);
let tile_index = self.tile_coords_to_index(tile_x, tile_y);
@ -1930,87 +1921,63 @@ impl PartialOrd<ActiveEdge> for ActiveEdge {
#[derive(Clone, Copy)]
struct Transform2DF32 {
// Row-major order.
matrix: <Sse41 as Simd>::Vf32,
matrix: F32x4,
vector: Point2DF32,
}
impl Default for Transform2DF32 {
fn default() -> Transform2DF32 {
unsafe {
let mut matrix = <Sse41 as Simd>::setzero_ps();
matrix[0] = 1.0;
matrix[3] = 1.0;
Transform2DF32 { matrix, vector: Point2DF32::default() }
}
Self::from_scale(&Point2DF32::splat(1.0))
}
}
impl Transform2DF32 {
fn from_scale(scale: &Point2DF32) -> Transform2DF32 {
unsafe {
let mut matrix = Sse41::setzero_ps();
matrix[0] = scale.x();
matrix[3] = scale.y();
Transform2DF32 { matrix, vector: Point2DF32::default() }
Transform2DF32 {
matrix: F32x4::new(scale.x(), 0.0, 0.0, scale.y()),
vector: Point2DF32::default(),
}
}
fn row_major(m11: f32, m12: f32, m21: f32, m22: f32, m31: f32, m32: f32) -> Transform2DF32 {
unsafe {
let mut matrix = Sse41::setzero_ps();
matrix[0] = m11;
matrix[1] = m12;
matrix[2] = m21;
matrix[3] = m22;
Transform2DF32 { matrix, vector: Point2DF32::new(m31, m32) }
Transform2DF32 {
matrix: F32x4::new(m11, m12, m21, m22),
vector: Point2DF32::new(m31, m32),
}
}
fn m11(&self) -> f32 { self.matrix[0] }
fn m12(&self) -> f32 { self.matrix[1] }
fn m21(&self) -> f32 { self.matrix[2] }
fn m22(&self) -> f32 { self.matrix[3] }
fn transform_point(&self, point: &Point2DF32) -> Point2DF32 {
unsafe {
let xxyy = Sse41::shuffle_ps(point.0, point.0, 0b0101_0000);
let x11_x12_y21_y22 = Sse41::mul_ps(xxyy, self.matrix);
let y21_y22 = Sse41::shuffle_ps(x11_x12_y21_y22, x11_x12_y21_y22, 0b0000_1110);
Point2DF32(Sse41::add_ps(Sse41::add_ps(x11_x12_y21_y22, y21_y22), self.vector.0))
}
let xxyy = F32x4::new(point.x(), point.x(), point.y(), point.y());
let x11_x12_y21_y22 = xxyy * self.matrix;
let y21_y22 = x11_x12_y21_y22.splat_high_half();
Point2DF32(x11_x12_y21_y22 + y21_y22 + self.vector.0)
}
fn post_mul(&self, other: &Transform2DF32) -> Transform2DF32 {
unsafe {
// Here `a` is self and `b` is `other`.
let a11a21a11a21 = Sse41::shuffle_ps(self.matrix, self.matrix, 0b1000_1000);
let b11b11b12b12 = Sse41::shuffle_ps(other.matrix, other.matrix, 0b0101_0000);
let lhs = Sse41::mul_ps(a11a21a11a21, b11b11b12b12);
let a11a21a11a21 = F32x4::new(self.m11(), self.m21(), self.m11(), self.m21());
let b11b11b12b12 = F32x4::new(other.m11(), other.m11(), other.m12(), other.m12());
let lhs = a11a21a11a21 * b11b11b12b12;
let a12a22a12a22 = Sse41::shuffle_ps(self.matrix, self.matrix, 0b1101_1101);
let b21b21b22b22 = Sse41::shuffle_ps(other.matrix, other.matrix, 0b1111_1010);
let rhs = Sse41::mul_ps(a12a22a12a22, b21b21b22b22);
let a12a22a12a22 = F32x4::new(self.m12(), self.m22(), self.m12(), self.m22());
let b21b21b22b22 = F32x4::new(other.m21(), other.m21(), other.m22(), other.m22());
let rhs = a12a22a12a22 * b21b21b22b22;
let matrix = Sse41::add_ps(lhs, rhs);
let matrix = lhs + rhs;
let vector = other.transform_point(&self.vector) + other.vector;
Transform2DF32 { matrix, vector }
}
}
fn pre_mul(&self, other: &Transform2DF32) -> Transform2DF32 {
other.post_mul(self)
}
}
// SIMD extensions
trait SimdExt: Simd {
// TODO(pcwalton): Default scalar implementation.
unsafe fn shuffle_epi8(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
}
impl SimdExt for Sse41 {
#[inline(always)]
unsafe fn shuffle_epi8(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32 {
I32x4_41(x86_64::_mm_shuffle_epi8(a.0, b.0))
}
}
// Testing
#[cfg(test)]