Remove simdeez
This commit is contained in:
parent
37e6e71251
commit
bbf193f00f
|
@ -435,7 +435,6 @@ dependencies = [
|
|||
"lyon_path 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"serde_derive 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"simdeez 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
|
@ -626,11 +625,6 @@ dependencies = [
|
|||
"syn 0.15.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "simdeez"
|
||||
version = "0.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
|
||||
[[package]]
|
||||
name = "simplecss"
|
||||
version = "0.1.0"
|
||||
|
@ -734,7 +728,6 @@ dependencies = [
|
|||
"quickcheck 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"simdeez 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"svgtypes 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
"usvg 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||
]
|
||||
|
@ -904,7 +897,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
|||
"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
|
||||
"checksum serde 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)" = "0e732ed5a5592c17d961555e3b552985baf98d50ce418b7b655f31f6ba7eb1b7"
|
||||
"checksum serde_derive 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d6115a3ca25c224e409185325afc16a0d5aaaabc15c42b09587d6f1ba39a5b"
|
||||
"checksum simdeez 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "53d1e4a8ee9c44fa7c2d6464b679bd62c6b156edb865f084eb51af7b34efaa63"
|
||||
"checksum simplecss 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "135685097a85a64067df36e28a243e94a94f76d829087ce0be34eeb014260c0e"
|
||||
"checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
|
||||
"checksum slab 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5f9776d6b986f77b35c6cf846c11ad986ff128fe0b2b63a3628e3755e8d3102d"
|
||||
|
|
|
@ -12,4 +12,3 @@ lyon_geom = "0.12"
|
|||
lyon_path = "0.12"
|
||||
serde = "1.0"
|
||||
serde_derive = "1.0"
|
||||
simdeez = "0.4"
|
||||
|
|
|
@ -15,11 +15,6 @@
|
|||
#[macro_use]
|
||||
extern crate bitflags;
|
||||
|
||||
use simdeez::sse41::Sse41;
|
||||
|
||||
// TODO(pcwalton): Make this configurable.
|
||||
pub type SimdImpl = Sse41;
|
||||
|
||||
pub mod clip;
|
||||
pub mod cubic_to_quadratic;
|
||||
pub mod line_segment;
|
||||
|
@ -28,6 +23,7 @@ pub mod orientation;
|
|||
pub mod point;
|
||||
pub mod segment;
|
||||
pub mod segments;
|
||||
pub mod simd;
|
||||
pub mod stroke;
|
||||
pub mod transform;
|
||||
pub mod util;
|
||||
|
|
|
@ -10,62 +10,40 @@
|
|||
|
||||
//! Line segment types, optimized with SIMD.
|
||||
|
||||
use crate::SimdImpl;
|
||||
use crate::point::Point2DF32;
|
||||
use crate::simd::F32x4;
|
||||
use crate::util;
|
||||
use simdeez::Simd;
|
||||
use std::ops::Sub;
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct LineSegmentF32(pub <SimdImpl as Simd>::Vf32);
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Default)]
|
||||
pub struct LineSegmentF32(pub F32x4);
|
||||
|
||||
impl LineSegmentF32 {
|
||||
#[inline]
|
||||
pub fn new(from: &Point2DF32, to: &Point2DF32) -> LineSegmentF32 {
|
||||
unsafe {
|
||||
LineSegmentF32(SimdImpl::castpd_ps(SimdImpl::unpacklo_pd(
|
||||
SimdImpl::castps_pd(from.0),
|
||||
SimdImpl::castps_pd(to.0),
|
||||
)))
|
||||
}
|
||||
LineSegmentF32(F32x4::new(from.x(), from.y(), to.x(), to.y()))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn from(&self) -> Point2DF32 {
|
||||
unsafe {
|
||||
Point2DF32(SimdImpl::castpd_ps(SimdImpl::unpacklo_pd(
|
||||
SimdImpl::castps_pd(self.0),
|
||||
SimdImpl::setzero_pd(),
|
||||
)))
|
||||
}
|
||||
Point2DF32(self.0)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn to(&self) -> Point2DF32 {
|
||||
unsafe {
|
||||
Point2DF32(SimdImpl::castpd_ps(SimdImpl::unpackhi_pd(
|
||||
SimdImpl::castps_pd(self.0),
|
||||
SimdImpl::setzero_pd(),
|
||||
)))
|
||||
}
|
||||
Point2DF32(self.0.swap_halves())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn set_from(&mut self, point: &Point2DF32) {
|
||||
unsafe {
|
||||
let (mut this, point) = (SimdImpl::castps_pd(self.0), SimdImpl::castps_pd(point.0));
|
||||
this[0] = point[0];
|
||||
self.0 = SimdImpl::castpd_ps(this);
|
||||
}
|
||||
self.0[0] = point.x();
|
||||
self.0[1] = point.y();
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn set_to(&mut self, point: &Point2DF32) {
|
||||
unsafe {
|
||||
let (mut this, point) = (SimdImpl::castps_pd(self.0), SimdImpl::castps_pd(point.0));
|
||||
this[1] = point[0];
|
||||
self.0 = SimdImpl::castpd_ps(this);
|
||||
}
|
||||
self.0[2] = point.x();
|
||||
self.0[3] = point.y();
|
||||
}
|
||||
|
||||
#[allow(clippy::wrong_self_convention)]
|
||||
|
@ -92,34 +70,17 @@ impl LineSegmentF32 {
|
|||
|
||||
#[inline]
|
||||
pub fn scale(&self, factor: f32) -> LineSegmentF32 {
|
||||
unsafe { LineSegmentF32(SimdImpl::mul_ps(self.0, SimdImpl::set1_ps(factor))) }
|
||||
LineSegmentF32(self.0 * F32x4::splat(factor))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn split(&self, t: f32) -> (LineSegmentF32, LineSegmentF32) {
|
||||
debug_assert!(t >= 0.0 && t <= 1.0);
|
||||
unsafe {
|
||||
let from_from = SimdImpl::castpd_ps(SimdImpl::unpacklo_pd(
|
||||
SimdImpl::castps_pd(self.0),
|
||||
SimdImpl::castps_pd(self.0),
|
||||
));
|
||||
let to_to = SimdImpl::castpd_ps(SimdImpl::unpackhi_pd(
|
||||
SimdImpl::castps_pd(self.0),
|
||||
SimdImpl::castps_pd(self.0),
|
||||
));
|
||||
let (from_from, to_to) = (self.0.splat_low_half(), self.0.splat_high_half());
|
||||
let d_d = to_to - from_from;
|
||||
let mid_mid = from_from + d_d * SimdImpl::set1_ps(t);
|
||||
(
|
||||
LineSegmentF32(SimdImpl::castpd_ps(SimdImpl::unpacklo_pd(
|
||||
SimdImpl::castps_pd(from_from),
|
||||
SimdImpl::castps_pd(mid_mid),
|
||||
))),
|
||||
LineSegmentF32(SimdImpl::castpd_ps(SimdImpl::unpackhi_pd(
|
||||
SimdImpl::castps_pd(mid_mid),
|
||||
SimdImpl::castps_pd(to_to),
|
||||
))),
|
||||
)
|
||||
}
|
||||
let mid_mid = from_from + d_d * F32x4::splat(t);
|
||||
(LineSegmentF32(F32x4::new(from_from[0], from_from[1], mid_mid[0], mid_mid[1])),
|
||||
LineSegmentF32(F32x4::new(mid_mid[0], mid_mid[1], to_to[0], to_to[1])))
|
||||
}
|
||||
|
||||
// Returns the upper segment first, followed by the lower segment.
|
||||
|
@ -150,7 +111,7 @@ impl LineSegmentF32 {
|
|||
|
||||
#[inline]
|
||||
pub fn reversed(&self) -> LineSegmentF32 {
|
||||
unsafe { LineSegmentF32(SimdImpl::shuffle_ps(self.0, self.0, 0b0100_1110)) }
|
||||
LineSegmentF32(self.0.swap_halves())
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -193,35 +154,11 @@ impl LineSegmentF32 {
|
|||
}
|
||||
}
|
||||
|
||||
impl PartialEq for LineSegmentF32 {
|
||||
#[inline]
|
||||
fn eq(&self, other: &LineSegmentF32) -> bool {
|
||||
unsafe {
|
||||
let results = SimdImpl::castps_epi32(SimdImpl::cmpeq_ps(self.0, other.0));
|
||||
// FIXME(pcwalton): Is there a better way to do this?
|
||||
results[0] == -1 && results[1] == -1 && results[2] == -1 && results[3] == -1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for LineSegmentF32 {
|
||||
#[inline]
|
||||
fn default() -> LineSegmentF32 {
|
||||
unsafe { LineSegmentF32(SimdImpl::setzero_ps()) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Sub<Point2DF32> for LineSegmentF32 {
|
||||
type Output = LineSegmentF32;
|
||||
#[inline]
|
||||
fn sub(self, point: Point2DF32) -> LineSegmentF32 {
|
||||
unsafe {
|
||||
let point_point = SimdImpl::castpd_ps(SimdImpl::unpacklo_pd(
|
||||
SimdImpl::castps_pd(point.0),
|
||||
SimdImpl::castps_pd(point.0),
|
||||
));
|
||||
LineSegmentF32(self.0 - point_point)
|
||||
}
|
||||
LineSegmentF32(self.0 - point.0.splat_low_half())
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -10,28 +10,22 @@
|
|||
|
||||
//! A SIMD-optimized point type.
|
||||
|
||||
use crate::SimdImpl;
|
||||
use crate::simd::F32x4;
|
||||
use euclid::Point2D;
|
||||
use simdeez::Simd;
|
||||
use std::ops::{Add, Mul, Sub};
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct Point2DF32(pub <SimdImpl as Simd>::Vf32);
|
||||
#[derive(Clone, Copy, Debug, Default)]
|
||||
pub struct Point2DF32(pub F32x4);
|
||||
|
||||
impl Point2DF32 {
|
||||
#[inline]
|
||||
pub fn new(x: f32, y: f32) -> Point2DF32 {
|
||||
unsafe {
|
||||
let mut data = SimdImpl::setzero_ps();
|
||||
data[0] = x;
|
||||
data[1] = y;
|
||||
Point2DF32(data)
|
||||
}
|
||||
Point2DF32(F32x4::new(x, y, 0.0, 0.0))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn splat(value: f32) -> Point2DF32 {
|
||||
unsafe { Point2DF32(SimdImpl::set1_ps(value)) }
|
||||
Point2DF32(F32x4::splat(value))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -56,29 +50,20 @@ impl Point2DF32 {
|
|||
|
||||
#[inline]
|
||||
pub fn min(&self, other: Point2DF32) -> Point2DF32 {
|
||||
unsafe { Point2DF32(SimdImpl::min_ps(self.0, other.0)) }
|
||||
Point2DF32(self.0.min(other.0))
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn max(&self, other: Point2DF32) -> Point2DF32 {
|
||||
unsafe { Point2DF32(SimdImpl::max_ps(self.0, other.0)) }
|
||||
Point2DF32(self.0.max(other.0))
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Point2DF32 {
|
||||
#[inline]
|
||||
fn eq(&self, other: &Point2DF32) -> bool {
|
||||
unsafe {
|
||||
let results = SimdImpl::castps_epi32(SimdImpl::cmpeq_ps(self.0, other.0));
|
||||
results[0] == -1 && results[1] == -1
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Point2DF32 {
|
||||
#[inline]
|
||||
fn default() -> Point2DF32 {
|
||||
unsafe { Point2DF32(SimdImpl::setzero_ps()) }
|
||||
let results = self.0.packed_eq(other.0);
|
||||
results[0] != 0 && results[1] != 0
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -10,10 +10,9 @@
|
|||
|
||||
//! Line or curve segments, optimized with SIMD.
|
||||
|
||||
use crate::SimdImpl;
|
||||
use crate::line_segment::LineSegmentF32;
|
||||
use crate::point::Point2DF32;
|
||||
use simdeez::Simd;
|
||||
use crate::simd::F32x4;
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||
pub struct Segment {
|
||||
|
@ -160,12 +159,9 @@ pub struct CubicSegment<'s>(&'s Segment);
|
|||
impl<'s> CubicSegment<'s> {
|
||||
#[inline]
|
||||
pub fn flatten_once(self, tolerance: f32) -> Option<Segment> {
|
||||
let s2inv;
|
||||
unsafe {
|
||||
let (baseline, ctrl) = (self.0.baseline.0, self.0.ctrl.0);
|
||||
let from_from = SimdImpl::shuffle_ps(baseline, baseline, 0b0100_0100);
|
||||
|
||||
let v0102 = SimdImpl::sub_ps(ctrl, from_from);
|
||||
let from_from = baseline.splat_low_half();
|
||||
let v0102 = ctrl - from_from;
|
||||
|
||||
// v01.x v01.y v02.x v02.y
|
||||
// * v01.x v01.y v01.y v01.x
|
||||
|
@ -175,15 +171,14 @@ impl<'s> CubicSegment<'s> {
|
|||
// +-------+ +-----+
|
||||
// + -
|
||||
// v01 len^2 determinant
|
||||
let products = SimdImpl::mul_ps(v0102, SimdImpl::shuffle_ps(v0102, v0102, 0b0001_0100));
|
||||
let products = v0102 * F32x4::new(v0102[0], v0102[1], v0102[1], v0102[0]);
|
||||
|
||||
let det = products[2] - products[3];
|
||||
if det == 0.0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
s2inv = (products[0] + products[1]).sqrt() / det;
|
||||
}
|
||||
let s2inv = (products[0] + products[1]).sqrt() / det;
|
||||
|
||||
let t = 2.0 * ((tolerance / 3.0) * s2inv.abs()).sqrt();
|
||||
if t >= 1.0 - EPSILON || t == 0.0 {
|
||||
|
@ -197,71 +192,40 @@ impl<'s> CubicSegment<'s> {
|
|||
|
||||
#[inline]
|
||||
pub fn split(self, t: f32) -> (Segment, Segment) {
|
||||
unsafe {
|
||||
let tttt = SimdImpl::set1_ps(t);
|
||||
let tttt = F32x4::splat(t);
|
||||
|
||||
let p0p3 = self.0.baseline.0;
|
||||
let p1p2 = self.0.ctrl.0;
|
||||
let p0p1 = assemble(&p0p3, &p1p2, 0, 0);
|
||||
let p0p1 = F32x4::new(p0p3[0], p0p3[1], p1p2[0], p1p2[1]);
|
||||
|
||||
// p01 = lerp(p0, p1, t), p12 = lerp(p1, p2, t), p23 = lerp(p2, p3, t)
|
||||
let p01p12 = SimdImpl::add_ps(p0p1, SimdImpl::mul_ps(tttt, SimdImpl::sub_ps(p1p2, p0p1)));
|
||||
let pxxp23 = SimdImpl::add_ps(p1p2, SimdImpl::mul_ps(tttt, SimdImpl::sub_ps(p0p3, p1p2)));
|
||||
|
||||
let p12p23 = assemble(&p01p12, &pxxp23, 1, 1);
|
||||
let p01p12 = p0p1 + tttt * (p1p2 - p0p1);
|
||||
let pxxp23 = p1p2 + tttt * (p0p3 - p1p2);
|
||||
let p12p23 = F32x4::new(p01p12[2], p01p12[3], pxxp23[2], pxxp23[3]);
|
||||
|
||||
// p012 = lerp(p01, p12, t), p123 = lerp(p12, p23, t)
|
||||
let p012p123 =
|
||||
SimdImpl::add_ps(p01p12, SimdImpl::mul_ps(tttt, SimdImpl::sub_ps(p12p23, p01p12)));
|
||||
|
||||
let p123 = pluck(&p012p123, 1);
|
||||
let p012p123 = p01p12 + tttt * (p12p23 - p01p12);
|
||||
let p123 = p012p123.splat_high_half();
|
||||
|
||||
// p0123 = lerp(p012, p123, t)
|
||||
let p0123 = SimdImpl::add_ps(p012p123, SimdImpl::mul_ps(tttt, SimdImpl::sub_ps(p123, p012p123)));
|
||||
let p0123 = p012p123 + tttt * (p123 - p012p123);
|
||||
|
||||
let baseline0 = assemble(&p0p3, &p0123, 0, 0);
|
||||
let ctrl0 = assemble(&p01p12, &p012p123, 0, 0);
|
||||
let baseline1 = assemble(&p0123, &p0p3, 0, 1);
|
||||
let ctrl1 = assemble(&p012p123, &p12p23, 1, 1);
|
||||
let baseline0 = F32x4::new(p0p3[0], p0p3[1], p0123[0], p0123[1]);
|
||||
let ctrl0 = F32x4::new(p01p12[0], p01p12[1], p012p123[0], p012p123[1]);
|
||||
let baseline1 = F32x4::new(p0123[0], p0123[1], p0p3[2], p0p3[3]);
|
||||
let ctrl1 = F32x4::new(p012p123[2], p012p123[3], p12p23[2], p12p23[3]);
|
||||
|
||||
// FIXME(pcwalton): Set flags appropriately!
|
||||
return (
|
||||
Segment {
|
||||
(Segment {
|
||||
baseline: LineSegmentF32(baseline0),
|
||||
ctrl: LineSegmentF32(ctrl0),
|
||||
kind: SegmentKind::Cubic,
|
||||
flags: self.0.flags & SegmentFlags::FIRST_IN_SUBPATH,
|
||||
},
|
||||
Segment {
|
||||
}, Segment {
|
||||
baseline: LineSegmentF32(baseline1),
|
||||
ctrl: LineSegmentF32(ctrl1),
|
||||
kind: SegmentKind::Cubic,
|
||||
flags: self.0.flags & SegmentFlags::CLOSES_SUBPATH,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
// Constructs a new 4-element vector from two pairs of adjacent lanes in two input vectors.
|
||||
unsafe fn assemble(
|
||||
a_data: &<SimdImpl as Simd>::Vf32,
|
||||
b_data: &<SimdImpl as Simd>::Vf32,
|
||||
a_index: usize,
|
||||
b_index: usize,
|
||||
) -> <SimdImpl as Simd>::Vf32 {
|
||||
let (a_data, b_data) = (SimdImpl::castps_pd(*a_data), SimdImpl::castps_pd(*b_data));
|
||||
let mut result = SimdImpl::setzero_pd();
|
||||
result[0] = a_data[a_index];
|
||||
result[1] = b_data[b_index];
|
||||
SimdImpl::castpd_ps(result)
|
||||
}
|
||||
|
||||
// Constructs a new 2-element vector from a pair of adjacent lanes in an input vector.
|
||||
unsafe fn pluck(data: &<SimdImpl as Simd>::Vf32, index: usize) -> <SimdImpl as Simd>::Vf32 {
|
||||
let data = SimdImpl::castps_pd(*data);
|
||||
let mut result = SimdImpl::setzero_pd();
|
||||
result[0] = data[index];
|
||||
SimdImpl::castpd_ps(result)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -272,15 +236,15 @@ impl<'s> CubicSegment<'s> {
|
|||
#[inline]
|
||||
pub fn y_extrema(self) -> (Option<f32>, Option<f32>) {
|
||||
let (t0, t1);
|
||||
unsafe {
|
||||
let mut p0p1p2p3 = SimdImpl::setzero_ps();
|
||||
p0p1p2p3[0] = self.0.baseline.from_y();
|
||||
p0p1p2p3[1] = self.0.ctrl.from_y();
|
||||
p0p1p2p3[2] = self.0.ctrl.to_y();
|
||||
p0p1p2p3[3] = self.0.baseline.to_y();
|
||||
|
||||
let pxp0p1p2 = SimdImpl::shuffle_ps(p0p1p2p3, p0p1p2p3, 0b1001_0000);
|
||||
let pxv0v1v2 = SimdImpl::sub_ps(p0p1p2p3, pxp0p1p2);
|
||||
let p0p1p2p3 = F32x4::new(self.0.baseline.from_y(),
|
||||
self.0.ctrl.from_y(),
|
||||
self.0.ctrl.to_y(),
|
||||
self.0.baseline.to_y());
|
||||
let pxp0p1p2 = F32x4::new(self.0.baseline.from_y(),
|
||||
self.0.baseline.from_y(),
|
||||
self.0.ctrl.from_y(),
|
||||
self.0.ctrl.to_y());
|
||||
let pxv0v1v2 = p0p1p2p3 - pxp0p1p2;
|
||||
let (v0, v1, v2) = (pxv0v1v2[1], pxv0v1v2[2], pxv0v1v2[3]);
|
||||
|
||||
let (v0_to_v1, v2_to_v1) = (v0 - v1, v2 - v1);
|
||||
|
@ -289,7 +253,6 @@ impl<'s> CubicSegment<'s> {
|
|||
|
||||
t0 = (v0_to_v1 + discrim) * denom;
|
||||
t1 = (v0_to_v1 - discrim) * denom;
|
||||
}
|
||||
|
||||
return match (
|
||||
t0 > EPSILON && t0 < 1.0 - EPSILON,
|
||||
|
|
|
@ -0,0 +1,270 @@
|
|||
// pathfinder/geometry/src/simd.rs
|
||||
//
|
||||
// Copyright © 2019 The Pathfinder Project Developers.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||
// option. This file may not be copied, modified, or distributed
|
||||
// except according to those terms.
|
||||
|
||||
pub type F32x4 = x86::F32x4;
|
||||
pub type I32x4 = x86::I32x4;
|
||||
pub type U32x4 = x86::U32x4;
|
||||
pub type U8x16 = x86::U8x16;
|
||||
|
||||
mod x86 {
|
||||
use std::arch::x86_64::{self, __m128, __m128i};
|
||||
use std::cmp::PartialEq;
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use std::mem;
|
||||
use std::ops::{Add, Mul, Sub, Index, IndexMut};
|
||||
|
||||
// 32-bit floats
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct F32x4(pub __m128);
|
||||
|
||||
impl F32x4 {
|
||||
#[inline]
|
||||
pub fn new(a: f32, b: f32, c: f32, d: f32) -> F32x4 {
|
||||
unsafe {
|
||||
let vector = [a, b, c, d];
|
||||
F32x4(x86_64::_mm_loadu_ps(vector.as_ptr()))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn splat(x: f32) -> F32x4 {
|
||||
unsafe {
|
||||
F32x4(x86_64::_mm_set1_ps(x))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn min(self, other: F32x4) -> F32x4 {
|
||||
unsafe {
|
||||
F32x4(x86_64::_mm_min_ps(self.0, other.0))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn max(self, other: F32x4) -> F32x4 {
|
||||
unsafe {
|
||||
F32x4(x86_64::_mm_max_ps(self.0, other.0))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn packed_eq(self, other: F32x4) -> U32x4 {
|
||||
unsafe {
|
||||
U32x4(x86_64::_mm_castps_si128(x86_64::_mm_cmpeq_ps(self.0, other.0)))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn swap_halves(self) -> F32x4 {
|
||||
unsafe {
|
||||
F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0b0100_1110))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn splat_low_half(self) -> F32x4 {
|
||||
unsafe {
|
||||
F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0b0100_0100))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn splat_high_half(self) -> F32x4 {
|
||||
unsafe {
|
||||
F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0b1110_1110))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn interleave(self, other: F32x4) -> (F32x4, F32x4) {
|
||||
unsafe {
|
||||
(F32x4(x86_64::_mm_unpacklo_ps(self.0, other.0)),
|
||||
F32x4(x86_64::_mm_unpackhi_ps(self.0, other.0)))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn to_i32x4(self) -> I32x4 {
|
||||
unsafe {
|
||||
I32x4(x86_64::_mm_cvtps_epi32(self.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for F32x4 {
|
||||
#[inline]
|
||||
fn default() -> F32x4 {
|
||||
unsafe {
|
||||
F32x4(x86_64::_mm_setzero_ps())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<usize> for F32x4 {
|
||||
type Output = f32;
|
||||
#[inline]
|
||||
fn index(&self, index: usize) -> &f32 {
|
||||
unsafe {
|
||||
&mem::transmute::<&__m128, &[f32; 4]>(&self.0)[index]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl IndexMut<usize> for F32x4 {
|
||||
#[inline]
|
||||
fn index_mut(&mut self, index: usize) -> &mut f32 {
|
||||
unsafe {
|
||||
&mut mem::transmute::<&mut __m128, &mut [f32; 4]>(&mut self.0)[index]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for F32x4 {
|
||||
#[inline]
|
||||
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
|
||||
write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for F32x4 {
|
||||
#[inline]
|
||||
fn eq(&self, other: &F32x4) -> bool {
|
||||
self.packed_eq(*other).is_all_ones()
|
||||
}
|
||||
}
|
||||
|
||||
impl Add<F32x4> for F32x4 {
|
||||
type Output = F32x4;
|
||||
#[inline]
|
||||
fn add(self, other: F32x4) -> F32x4 {
|
||||
unsafe {
|
||||
F32x4(x86_64::_mm_add_ps(self.0, other.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Mul<F32x4> for F32x4 {
|
||||
type Output = F32x4;
|
||||
#[inline]
|
||||
fn mul(self, other: F32x4) -> F32x4 {
|
||||
unsafe {
|
||||
F32x4(x86_64::_mm_mul_ps(self.0, other.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Sub<F32x4> for F32x4 {
|
||||
type Output = F32x4;
|
||||
#[inline]
|
||||
fn sub(self, other: F32x4) -> F32x4 {
|
||||
unsafe {
|
||||
F32x4(x86_64::_mm_sub_ps(self.0, other.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 32-bit signed integers
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct I32x4(pub __m128i);
|
||||
|
||||
impl I32x4 {
|
||||
#[inline]
|
||||
pub fn new(a: i32, b: i32, c: i32, d: i32) -> I32x4 {
|
||||
unsafe {
|
||||
let vector = [a, b, c, d];
|
||||
I32x4(x86_64::_mm_loadu_si128(vector.as_ptr() as *const __m128i))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn splat(x: i32) -> I32x4 {
|
||||
unsafe {
|
||||
I32x4(x86_64::_mm_set1_epi32(x))
|
||||
}
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn as_u8x16(self) -> U8x16 {
|
||||
U8x16(self.0)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn min(self, other: I32x4) -> I32x4 {
|
||||
unsafe {
|
||||
I32x4(x86_64::_mm_min_epi32(self.0, other.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<usize> for I32x4 {
|
||||
type Output = i32;
|
||||
#[inline]
|
||||
fn index(&self, index: usize) -> &i32 {
|
||||
unsafe {
|
||||
&mem::transmute::<&__m128i, &[i32; 4]>(&self.0)[index]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Sub<I32x4> for I32x4 {
|
||||
type Output = I32x4;
|
||||
#[inline]
|
||||
fn sub(self, other: I32x4) -> I32x4 {
|
||||
unsafe {
|
||||
I32x4(x86_64::_mm_sub_epi32(self.0, other.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 32-bit unsigned integers
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct U32x4(pub __m128i);
|
||||
|
||||
impl U32x4 {
|
||||
#[inline]
|
||||
fn is_all_ones(&self) -> bool {
|
||||
unsafe {
|
||||
x86_64::_mm_test_all_ones(self.0) != 0
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<usize> for U32x4 {
|
||||
type Output = u32;
|
||||
#[inline]
|
||||
fn index(&self, index: usize) -> &u32 {
|
||||
unsafe {
|
||||
&mem::transmute::<&__m128i, &[u32; 4]>(&self.0)[index]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 8-bit unsigned integers
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
pub struct U8x16(pub __m128i);
|
||||
|
||||
impl U8x16 {
|
||||
#[inline]
|
||||
pub fn as_i32x4(self) -> I32x4 {
|
||||
I32x4(self.0)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
pub fn shuffle(self, indices: U8x16) -> U8x16 {
|
||||
unsafe {
|
||||
U8x16(x86_64::_mm_shuffle_epi8(self.0, indices.0))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -16,7 +16,6 @@ jemallocator = "0.1"
|
|||
lyon_geom = "0.12"
|
||||
lyon_path = "0.12"
|
||||
rayon = "1.0"
|
||||
simdeez = "0.4"
|
||||
svgtypes = "0.3"
|
||||
usvg = "0.4"
|
||||
|
||||
|
|
|
@ -30,14 +30,11 @@ use lyon_path::iterator::PathIter;
|
|||
use pathfinder_geometry::line_segment::{LineSegmentF32, LineSegmentU4, LineSegmentU8};
|
||||
use pathfinder_geometry::point::Point2DF32;
|
||||
use pathfinder_geometry::segment::{Segment, SegmentFlags, SegmentKind};
|
||||
use pathfinder_geometry::simd::{F32x4, I32x4};
|
||||
use pathfinder_geometry::stroke::{StrokeStyle, StrokeToFillIter};
|
||||
use pathfinder_geometry::util;
|
||||
use rayon::ThreadPoolBuilder;
|
||||
use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
|
||||
use simdeez::Simd;
|
||||
use simdeez::overloads::I32x4_41;
|
||||
use simdeez::sse41::Sse41;
|
||||
use std::arch::x86_64;
|
||||
use std::cmp::Ordering;
|
||||
use std::fmt::{self, Debug, Formatter};
|
||||
use std::fs::File;
|
||||
|
@ -1193,32 +1190,26 @@ impl BuiltObject {
|
|||
// TODO(pcwalton): SIMD-ify `tile_x` and `tile_y`.
|
||||
fn add_fill(&mut self, segment: &LineSegmentF32, tile_x: i16, tile_y: i16) {
|
||||
//println!("add_fill({:?} ({}, {}))", segment, tile_x, tile_y);
|
||||
let (px, subpx);
|
||||
unsafe {
|
||||
let mut segment = Sse41::cvtps_epi32(Sse41::mul_ps(segment.0, Sse41::set1_ps(256.0)));
|
||||
let mut segment = (segment.0 * F32x4::splat(256.0)).to_i32x4();
|
||||
|
||||
let mut tile_origin = Sse41::setzero_epi32();
|
||||
tile_origin[0] = (tile_x as i32) * (TILE_WIDTH as i32) * 256;
|
||||
tile_origin[1] = (tile_y as i32) * (TILE_HEIGHT as i32) * 256;
|
||||
tile_origin = Sse41::shuffle_epi32(tile_origin, 0b0100_0100);
|
||||
let tile_origin_x = (tile_x as i32) * (TILE_WIDTH as i32) * 256;
|
||||
let tile_origin_y = (tile_y as i32) * (TILE_HEIGHT as i32) * 256;
|
||||
let tile_origin = I32x4::new(tile_origin_x, tile_origin_y, tile_origin_x, tile_origin_y);
|
||||
|
||||
segment = Sse41::sub_epi32(segment, tile_origin);
|
||||
segment = segment - tile_origin;
|
||||
/*
|
||||
println!("... before min: {} {} {} {}",
|
||||
segment[0], segment[1], segment[2], segment[3]);
|
||||
*/
|
||||
//segment = Sse41::max_epi32(segment, Sse41::setzero_epi32());
|
||||
segment = Sse41::min_epi32(segment, Sse41::set1_epi32(0x0fff));
|
||||
segment = segment.min(I32x4::splat(0x0fff));
|
||||
//println!("... after min: {} {} {} {}", segment[0], segment[1], segment[2], segment[3]);
|
||||
|
||||
let mut shuffle_mask = Sse41::setzero_epi32();
|
||||
shuffle_mask[0] = 0x0c08_0400;
|
||||
shuffle_mask[1] = 0x0d05_0901;
|
||||
segment = Sse41::shuffle_epi8(segment, shuffle_mask);
|
||||
let shuffle_mask = I32x4::new(0x0c08_0400, 0x0d05_0901, 0, 0);
|
||||
segment = segment.as_u8x16().shuffle(shuffle_mask.as_u8x16()).as_i32x4();
|
||||
|
||||
px = LineSegmentU4((segment[1] | (segment[1] >> 12)) as u16);
|
||||
subpx = LineSegmentU8(segment[0] as u32);
|
||||
}
|
||||
let px = LineSegmentU4((segment[1] | (segment[1] >> 12)) as u16);
|
||||
let subpx = LineSegmentU8(segment[0] as u32);
|
||||
|
||||
let tile_index = self.tile_coords_to_index(tile_x, tile_y);
|
||||
|
||||
|
@ -1930,87 +1921,63 @@ impl PartialOrd<ActiveEdge> for ActiveEdge {
|
|||
#[derive(Clone, Copy)]
|
||||
struct Transform2DF32 {
|
||||
// Row-major order.
|
||||
matrix: <Sse41 as Simd>::Vf32,
|
||||
matrix: F32x4,
|
||||
vector: Point2DF32,
|
||||
}
|
||||
|
||||
impl Default for Transform2DF32 {
|
||||
fn default() -> Transform2DF32 {
|
||||
unsafe {
|
||||
let mut matrix = <Sse41 as Simd>::setzero_ps();
|
||||
matrix[0] = 1.0;
|
||||
matrix[3] = 1.0;
|
||||
Transform2DF32 { matrix, vector: Point2DF32::default() }
|
||||
}
|
||||
Self::from_scale(&Point2DF32::splat(1.0))
|
||||
}
|
||||
}
|
||||
|
||||
impl Transform2DF32 {
|
||||
fn from_scale(scale: &Point2DF32) -> Transform2DF32 {
|
||||
unsafe {
|
||||
let mut matrix = Sse41::setzero_ps();
|
||||
matrix[0] = scale.x();
|
||||
matrix[3] = scale.y();
|
||||
Transform2DF32 { matrix, vector: Point2DF32::default() }
|
||||
Transform2DF32 {
|
||||
matrix: F32x4::new(scale.x(), 0.0, 0.0, scale.y()),
|
||||
vector: Point2DF32::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn row_major(m11: f32, m12: f32, m21: f32, m22: f32, m31: f32, m32: f32) -> Transform2DF32 {
|
||||
unsafe {
|
||||
let mut matrix = Sse41::setzero_ps();
|
||||
matrix[0] = m11;
|
||||
matrix[1] = m12;
|
||||
matrix[2] = m21;
|
||||
matrix[3] = m22;
|
||||
Transform2DF32 { matrix, vector: Point2DF32::new(m31, m32) }
|
||||
Transform2DF32 {
|
||||
matrix: F32x4::new(m11, m12, m21, m22),
|
||||
vector: Point2DF32::new(m31, m32),
|
||||
}
|
||||
}
|
||||
|
||||
fn m11(&self) -> f32 { self.matrix[0] }
|
||||
fn m12(&self) -> f32 { self.matrix[1] }
|
||||
fn m21(&self) -> f32 { self.matrix[2] }
|
||||
fn m22(&self) -> f32 { self.matrix[3] }
|
||||
|
||||
fn transform_point(&self, point: &Point2DF32) -> Point2DF32 {
|
||||
unsafe {
|
||||
let xxyy = Sse41::shuffle_ps(point.0, point.0, 0b0101_0000);
|
||||
let x11_x12_y21_y22 = Sse41::mul_ps(xxyy, self.matrix);
|
||||
let y21_y22 = Sse41::shuffle_ps(x11_x12_y21_y22, x11_x12_y21_y22, 0b0000_1110);
|
||||
Point2DF32(Sse41::add_ps(Sse41::add_ps(x11_x12_y21_y22, y21_y22), self.vector.0))
|
||||
}
|
||||
let xxyy = F32x4::new(point.x(), point.x(), point.y(), point.y());
|
||||
let x11_x12_y21_y22 = xxyy * self.matrix;
|
||||
let y21_y22 = x11_x12_y21_y22.splat_high_half();
|
||||
Point2DF32(x11_x12_y21_y22 + y21_y22 + self.vector.0)
|
||||
}
|
||||
|
||||
fn post_mul(&self, other: &Transform2DF32) -> Transform2DF32 {
|
||||
unsafe {
|
||||
// Here `a` is self and `b` is `other`.
|
||||
let a11a21a11a21 = Sse41::shuffle_ps(self.matrix, self.matrix, 0b1000_1000);
|
||||
let b11b11b12b12 = Sse41::shuffle_ps(other.matrix, other.matrix, 0b0101_0000);
|
||||
let lhs = Sse41::mul_ps(a11a21a11a21, b11b11b12b12);
|
||||
let a11a21a11a21 = F32x4::new(self.m11(), self.m21(), self.m11(), self.m21());
|
||||
let b11b11b12b12 = F32x4::new(other.m11(), other.m11(), other.m12(), other.m12());
|
||||
let lhs = a11a21a11a21 * b11b11b12b12;
|
||||
|
||||
let a12a22a12a22 = Sse41::shuffle_ps(self.matrix, self.matrix, 0b1101_1101);
|
||||
let b21b21b22b22 = Sse41::shuffle_ps(other.matrix, other.matrix, 0b1111_1010);
|
||||
let rhs = Sse41::mul_ps(a12a22a12a22, b21b21b22b22);
|
||||
let a12a22a12a22 = F32x4::new(self.m12(), self.m22(), self.m12(), self.m22());
|
||||
let b21b21b22b22 = F32x4::new(other.m21(), other.m21(), other.m22(), other.m22());
|
||||
let rhs = a12a22a12a22 * b21b21b22b22;
|
||||
|
||||
let matrix = Sse41::add_ps(lhs, rhs);
|
||||
let matrix = lhs + rhs;
|
||||
let vector = other.transform_point(&self.vector) + other.vector;
|
||||
Transform2DF32 { matrix, vector }
|
||||
}
|
||||
}
|
||||
|
||||
fn pre_mul(&self, other: &Transform2DF32) -> Transform2DF32 {
|
||||
other.post_mul(self)
|
||||
}
|
||||
}
|
||||
|
||||
// SIMD extensions
|
||||
|
||||
trait SimdExt: Simd {
|
||||
// TODO(pcwalton): Default scalar implementation.
|
||||
unsafe fn shuffle_epi8(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
|
||||
}
|
||||
|
||||
impl SimdExt for Sse41 {
|
||||
#[inline(always)]
|
||||
unsafe fn shuffle_epi8(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32 {
|
||||
I32x4_41(x86_64::_mm_shuffle_epi8(a.0, b.0))
|
||||
}
|
||||
}
|
||||
|
||||
// Testing
|
||||
|
||||
#[cfg(test)]
|
||||
|
|
Loading…
Reference in New Issue