Remove simdeez
This commit is contained in:
parent
37e6e71251
commit
bbf193f00f
|
@ -435,7 +435,6 @@ dependencies = [
|
||||||
"lyon_path 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"lyon_path 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"serde 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)",
|
"serde 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"serde_derive 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)",
|
"serde_derive 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"simdeez 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
@ -626,11 +625,6 @@ dependencies = [
|
||||||
"syn 0.15.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
"syn 0.15.24 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "simdeez"
|
|
||||||
version = "0.4.2"
|
|
||||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "simplecss"
|
name = "simplecss"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
|
@ -734,7 +728,6 @@ dependencies = [
|
||||||
"quickcheck 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
"quickcheck 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
"rand 0.5.5 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
"rayon 1.0.3 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"simdeez 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)",
|
|
||||||
"svgtypes 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"svgtypes 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
"usvg 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
"usvg 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
|
||||||
]
|
]
|
||||||
|
@ -904,7 +897,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
|
"checksum scopeguard 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "94258f53601af11e6a49f722422f6e3425c52b06245a5cf9bc09908b174f5e27"
|
||||||
"checksum serde 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)" = "0e732ed5a5592c17d961555e3b552985baf98d50ce418b7b655f31f6ba7eb1b7"
|
"checksum serde 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)" = "0e732ed5a5592c17d961555e3b552985baf98d50ce418b7b655f31f6ba7eb1b7"
|
||||||
"checksum serde_derive 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d6115a3ca25c224e409185325afc16a0d5aaaabc15c42b09587d6f1ba39a5b"
|
"checksum serde_derive 1.0.84 (registry+https://github.com/rust-lang/crates.io-index)" = "b4d6115a3ca25c224e409185325afc16a0d5aaaabc15c42b09587d6f1ba39a5b"
|
||||||
"checksum simdeez 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)" = "53d1e4a8ee9c44fa7c2d6464b679bd62c6b156edb865f084eb51af7b34efaa63"
|
|
||||||
"checksum simplecss 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "135685097a85a64067df36e28a243e94a94f76d829087ce0be34eeb014260c0e"
|
"checksum simplecss 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "135685097a85a64067df36e28a243e94a94f76d829087ce0be34eeb014260c0e"
|
||||||
"checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
|
"checksum siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "0b8de496cf83d4ed58b6be86c3a275b8602f6ffe98d3024a869e124147a9a3ac"
|
||||||
"checksum slab 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5f9776d6b986f77b35c6cf846c11ad986ff128fe0b2b63a3628e3755e8d3102d"
|
"checksum slab 0.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5f9776d6b986f77b35c6cf846c11ad986ff128fe0b2b63a3628e3755e8d3102d"
|
||||||
|
|
|
@ -12,4 +12,3 @@ lyon_geom = "0.12"
|
||||||
lyon_path = "0.12"
|
lyon_path = "0.12"
|
||||||
serde = "1.0"
|
serde = "1.0"
|
||||||
serde_derive = "1.0"
|
serde_derive = "1.0"
|
||||||
simdeez = "0.4"
|
|
||||||
|
|
|
@ -15,11 +15,6 @@
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
extern crate bitflags;
|
extern crate bitflags;
|
||||||
|
|
||||||
use simdeez::sse41::Sse41;
|
|
||||||
|
|
||||||
// TODO(pcwalton): Make this configurable.
|
|
||||||
pub type SimdImpl = Sse41;
|
|
||||||
|
|
||||||
pub mod clip;
|
pub mod clip;
|
||||||
pub mod cubic_to_quadratic;
|
pub mod cubic_to_quadratic;
|
||||||
pub mod line_segment;
|
pub mod line_segment;
|
||||||
|
@ -28,6 +23,7 @@ pub mod orientation;
|
||||||
pub mod point;
|
pub mod point;
|
||||||
pub mod segment;
|
pub mod segment;
|
||||||
pub mod segments;
|
pub mod segments;
|
||||||
|
pub mod simd;
|
||||||
pub mod stroke;
|
pub mod stroke;
|
||||||
pub mod transform;
|
pub mod transform;
|
||||||
pub mod util;
|
pub mod util;
|
||||||
|
|
|
@ -10,62 +10,40 @@
|
||||||
|
|
||||||
//! Line segment types, optimized with SIMD.
|
//! Line segment types, optimized with SIMD.
|
||||||
|
|
||||||
use crate::SimdImpl;
|
|
||||||
use crate::point::Point2DF32;
|
use crate::point::Point2DF32;
|
||||||
|
use crate::simd::F32x4;
|
||||||
use crate::util;
|
use crate::util;
|
||||||
use simdeez::Simd;
|
|
||||||
use std::ops::Sub;
|
use std::ops::Sub;
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug)]
|
#[derive(Clone, Copy, Debug, PartialEq, Default)]
|
||||||
pub struct LineSegmentF32(pub <SimdImpl as Simd>::Vf32);
|
pub struct LineSegmentF32(pub F32x4);
|
||||||
|
|
||||||
impl LineSegmentF32 {
|
impl LineSegmentF32 {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn new(from: &Point2DF32, to: &Point2DF32) -> LineSegmentF32 {
|
pub fn new(from: &Point2DF32, to: &Point2DF32) -> LineSegmentF32 {
|
||||||
unsafe {
|
LineSegmentF32(F32x4::new(from.x(), from.y(), to.x(), to.y()))
|
||||||
LineSegmentF32(SimdImpl::castpd_ps(SimdImpl::unpacklo_pd(
|
|
||||||
SimdImpl::castps_pd(from.0),
|
|
||||||
SimdImpl::castps_pd(to.0),
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn from(&self) -> Point2DF32 {
|
pub fn from(&self) -> Point2DF32 {
|
||||||
unsafe {
|
Point2DF32(self.0)
|
||||||
Point2DF32(SimdImpl::castpd_ps(SimdImpl::unpacklo_pd(
|
|
||||||
SimdImpl::castps_pd(self.0),
|
|
||||||
SimdImpl::setzero_pd(),
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn to(&self) -> Point2DF32 {
|
pub fn to(&self) -> Point2DF32 {
|
||||||
unsafe {
|
Point2DF32(self.0.swap_halves())
|
||||||
Point2DF32(SimdImpl::castpd_ps(SimdImpl::unpackhi_pd(
|
|
||||||
SimdImpl::castps_pd(self.0),
|
|
||||||
SimdImpl::setzero_pd(),
|
|
||||||
)))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn set_from(&mut self, point: &Point2DF32) {
|
pub fn set_from(&mut self, point: &Point2DF32) {
|
||||||
unsafe {
|
self.0[0] = point.x();
|
||||||
let (mut this, point) = (SimdImpl::castps_pd(self.0), SimdImpl::castps_pd(point.0));
|
self.0[1] = point.y();
|
||||||
this[0] = point[0];
|
|
||||||
self.0 = SimdImpl::castpd_ps(this);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn set_to(&mut self, point: &Point2DF32) {
|
pub fn set_to(&mut self, point: &Point2DF32) {
|
||||||
unsafe {
|
self.0[2] = point.x();
|
||||||
let (mut this, point) = (SimdImpl::castps_pd(self.0), SimdImpl::castps_pd(point.0));
|
self.0[3] = point.y();
|
||||||
this[1] = point[0];
|
|
||||||
self.0 = SimdImpl::castpd_ps(this);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[allow(clippy::wrong_self_convention)]
|
#[allow(clippy::wrong_self_convention)]
|
||||||
|
@ -92,34 +70,17 @@ impl LineSegmentF32 {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn scale(&self, factor: f32) -> LineSegmentF32 {
|
pub fn scale(&self, factor: f32) -> LineSegmentF32 {
|
||||||
unsafe { LineSegmentF32(SimdImpl::mul_ps(self.0, SimdImpl::set1_ps(factor))) }
|
LineSegmentF32(self.0 * F32x4::splat(factor))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn split(&self, t: f32) -> (LineSegmentF32, LineSegmentF32) {
|
pub fn split(&self, t: f32) -> (LineSegmentF32, LineSegmentF32) {
|
||||||
debug_assert!(t >= 0.0 && t <= 1.0);
|
debug_assert!(t >= 0.0 && t <= 1.0);
|
||||||
unsafe {
|
let (from_from, to_to) = (self.0.splat_low_half(), self.0.splat_high_half());
|
||||||
let from_from = SimdImpl::castpd_ps(SimdImpl::unpacklo_pd(
|
|
||||||
SimdImpl::castps_pd(self.0),
|
|
||||||
SimdImpl::castps_pd(self.0),
|
|
||||||
));
|
|
||||||
let to_to = SimdImpl::castpd_ps(SimdImpl::unpackhi_pd(
|
|
||||||
SimdImpl::castps_pd(self.0),
|
|
||||||
SimdImpl::castps_pd(self.0),
|
|
||||||
));
|
|
||||||
let d_d = to_to - from_from;
|
let d_d = to_to - from_from;
|
||||||
let mid_mid = from_from + d_d * SimdImpl::set1_ps(t);
|
let mid_mid = from_from + d_d * F32x4::splat(t);
|
||||||
(
|
(LineSegmentF32(F32x4::new(from_from[0], from_from[1], mid_mid[0], mid_mid[1])),
|
||||||
LineSegmentF32(SimdImpl::castpd_ps(SimdImpl::unpacklo_pd(
|
LineSegmentF32(F32x4::new(mid_mid[0], mid_mid[1], to_to[0], to_to[1])))
|
||||||
SimdImpl::castps_pd(from_from),
|
|
||||||
SimdImpl::castps_pd(mid_mid),
|
|
||||||
))),
|
|
||||||
LineSegmentF32(SimdImpl::castpd_ps(SimdImpl::unpackhi_pd(
|
|
||||||
SimdImpl::castps_pd(mid_mid),
|
|
||||||
SimdImpl::castps_pd(to_to),
|
|
||||||
))),
|
|
||||||
)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the upper segment first, followed by the lower segment.
|
// Returns the upper segment first, followed by the lower segment.
|
||||||
|
@ -150,7 +111,7 @@ impl LineSegmentF32 {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn reversed(&self) -> LineSegmentF32 {
|
pub fn reversed(&self) -> LineSegmentF32 {
|
||||||
unsafe { LineSegmentF32(SimdImpl::shuffle_ps(self.0, self.0, 0b0100_1110)) }
|
LineSegmentF32(self.0.swap_halves())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -193,35 +154,11 @@ impl LineSegmentF32 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PartialEq for LineSegmentF32 {
|
|
||||||
#[inline]
|
|
||||||
fn eq(&self, other: &LineSegmentF32) -> bool {
|
|
||||||
unsafe {
|
|
||||||
let results = SimdImpl::castps_epi32(SimdImpl::cmpeq_ps(self.0, other.0));
|
|
||||||
// FIXME(pcwalton): Is there a better way to do this?
|
|
||||||
results[0] == -1 && results[1] == -1 && results[2] == -1 && results[3] == -1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for LineSegmentF32 {
|
|
||||||
#[inline]
|
|
||||||
fn default() -> LineSegmentF32 {
|
|
||||||
unsafe { LineSegmentF32(SimdImpl::setzero_ps()) }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Sub<Point2DF32> for LineSegmentF32 {
|
impl Sub<Point2DF32> for LineSegmentF32 {
|
||||||
type Output = LineSegmentF32;
|
type Output = LineSegmentF32;
|
||||||
#[inline]
|
#[inline]
|
||||||
fn sub(self, point: Point2DF32) -> LineSegmentF32 {
|
fn sub(self, point: Point2DF32) -> LineSegmentF32 {
|
||||||
unsafe {
|
LineSegmentF32(self.0 - point.0.splat_low_half())
|
||||||
let point_point = SimdImpl::castpd_ps(SimdImpl::unpacklo_pd(
|
|
||||||
SimdImpl::castps_pd(point.0),
|
|
||||||
SimdImpl::castps_pd(point.0),
|
|
||||||
));
|
|
||||||
LineSegmentF32(self.0 - point_point)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,28 +10,22 @@
|
||||||
|
|
||||||
//! A SIMD-optimized point type.
|
//! A SIMD-optimized point type.
|
||||||
|
|
||||||
use crate::SimdImpl;
|
use crate::simd::F32x4;
|
||||||
use euclid::Point2D;
|
use euclid::Point2D;
|
||||||
use simdeez::Simd;
|
|
||||||
use std::ops::{Add, Mul, Sub};
|
use std::ops::{Add, Mul, Sub};
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug)]
|
#[derive(Clone, Copy, Debug, Default)]
|
||||||
pub struct Point2DF32(pub <SimdImpl as Simd>::Vf32);
|
pub struct Point2DF32(pub F32x4);
|
||||||
|
|
||||||
impl Point2DF32 {
|
impl Point2DF32 {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn new(x: f32, y: f32) -> Point2DF32 {
|
pub fn new(x: f32, y: f32) -> Point2DF32 {
|
||||||
unsafe {
|
Point2DF32(F32x4::new(x, y, 0.0, 0.0))
|
||||||
let mut data = SimdImpl::setzero_ps();
|
|
||||||
data[0] = x;
|
|
||||||
data[1] = y;
|
|
||||||
Point2DF32(data)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn splat(value: f32) -> Point2DF32 {
|
pub fn splat(value: f32) -> Point2DF32 {
|
||||||
unsafe { Point2DF32(SimdImpl::set1_ps(value)) }
|
Point2DF32(F32x4::splat(value))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -56,29 +50,20 @@ impl Point2DF32 {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn min(&self, other: Point2DF32) -> Point2DF32 {
|
pub fn min(&self, other: Point2DF32) -> Point2DF32 {
|
||||||
unsafe { Point2DF32(SimdImpl::min_ps(self.0, other.0)) }
|
Point2DF32(self.0.min(other.0))
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn max(&self, other: Point2DF32) -> Point2DF32 {
|
pub fn max(&self, other: Point2DF32) -> Point2DF32 {
|
||||||
unsafe { Point2DF32(SimdImpl::max_ps(self.0, other.0)) }
|
Point2DF32(self.0.max(other.0))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl PartialEq for Point2DF32 {
|
impl PartialEq for Point2DF32 {
|
||||||
#[inline]
|
#[inline]
|
||||||
fn eq(&self, other: &Point2DF32) -> bool {
|
fn eq(&self, other: &Point2DF32) -> bool {
|
||||||
unsafe {
|
let results = self.0.packed_eq(other.0);
|
||||||
let results = SimdImpl::castps_epi32(SimdImpl::cmpeq_ps(self.0, other.0));
|
results[0] != 0 && results[1] != 0
|
||||||
results[0] == -1 && results[1] == -1
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for Point2DF32 {
|
|
||||||
#[inline]
|
|
||||||
fn default() -> Point2DF32 {
|
|
||||||
unsafe { Point2DF32(SimdImpl::setzero_ps()) }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -10,10 +10,9 @@
|
||||||
|
|
||||||
//! Line or curve segments, optimized with SIMD.
|
//! Line or curve segments, optimized with SIMD.
|
||||||
|
|
||||||
use crate::SimdImpl;
|
|
||||||
use crate::line_segment::LineSegmentF32;
|
use crate::line_segment::LineSegmentF32;
|
||||||
use crate::point::Point2DF32;
|
use crate::point::Point2DF32;
|
||||||
use simdeez::Simd;
|
use crate::simd::F32x4;
|
||||||
|
|
||||||
#[derive(Clone, Copy, Debug, PartialEq)]
|
#[derive(Clone, Copy, Debug, PartialEq)]
|
||||||
pub struct Segment {
|
pub struct Segment {
|
||||||
|
@ -160,12 +159,9 @@ pub struct CubicSegment<'s>(&'s Segment);
|
||||||
impl<'s> CubicSegment<'s> {
|
impl<'s> CubicSegment<'s> {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn flatten_once(self, tolerance: f32) -> Option<Segment> {
|
pub fn flatten_once(self, tolerance: f32) -> Option<Segment> {
|
||||||
let s2inv;
|
|
||||||
unsafe {
|
|
||||||
let (baseline, ctrl) = (self.0.baseline.0, self.0.ctrl.0);
|
let (baseline, ctrl) = (self.0.baseline.0, self.0.ctrl.0);
|
||||||
let from_from = SimdImpl::shuffle_ps(baseline, baseline, 0b0100_0100);
|
let from_from = baseline.splat_low_half();
|
||||||
|
let v0102 = ctrl - from_from;
|
||||||
let v0102 = SimdImpl::sub_ps(ctrl, from_from);
|
|
||||||
|
|
||||||
// v01.x v01.y v02.x v02.y
|
// v01.x v01.y v02.x v02.y
|
||||||
// * v01.x v01.y v01.y v01.x
|
// * v01.x v01.y v01.y v01.x
|
||||||
|
@ -175,15 +171,14 @@ impl<'s> CubicSegment<'s> {
|
||||||
// +-------+ +-----+
|
// +-------+ +-----+
|
||||||
// + -
|
// + -
|
||||||
// v01 len^2 determinant
|
// v01 len^2 determinant
|
||||||
let products = SimdImpl::mul_ps(v0102, SimdImpl::shuffle_ps(v0102, v0102, 0b0001_0100));
|
let products = v0102 * F32x4::new(v0102[0], v0102[1], v0102[1], v0102[0]);
|
||||||
|
|
||||||
let det = products[2] - products[3];
|
let det = products[2] - products[3];
|
||||||
if det == 0.0 {
|
if det == 0.0 {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
s2inv = (products[0] + products[1]).sqrt() / det;
|
let s2inv = (products[0] + products[1]).sqrt() / det;
|
||||||
}
|
|
||||||
|
|
||||||
let t = 2.0 * ((tolerance / 3.0) * s2inv.abs()).sqrt();
|
let t = 2.0 * ((tolerance / 3.0) * s2inv.abs()).sqrt();
|
||||||
if t >= 1.0 - EPSILON || t == 0.0 {
|
if t >= 1.0 - EPSILON || t == 0.0 {
|
||||||
|
@ -197,71 +192,40 @@ impl<'s> CubicSegment<'s> {
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn split(self, t: f32) -> (Segment, Segment) {
|
pub fn split(self, t: f32) -> (Segment, Segment) {
|
||||||
unsafe {
|
let tttt = F32x4::splat(t);
|
||||||
let tttt = SimdImpl::set1_ps(t);
|
|
||||||
|
|
||||||
let p0p3 = self.0.baseline.0;
|
let p0p3 = self.0.baseline.0;
|
||||||
let p1p2 = self.0.ctrl.0;
|
let p1p2 = self.0.ctrl.0;
|
||||||
let p0p1 = assemble(&p0p3, &p1p2, 0, 0);
|
let p0p1 = F32x4::new(p0p3[0], p0p3[1], p1p2[0], p1p2[1]);
|
||||||
|
|
||||||
// p01 = lerp(p0, p1, t), p12 = lerp(p1, p2, t), p23 = lerp(p2, p3, t)
|
// p01 = lerp(p0, p1, t), p12 = lerp(p1, p2, t), p23 = lerp(p2, p3, t)
|
||||||
let p01p12 = SimdImpl::add_ps(p0p1, SimdImpl::mul_ps(tttt, SimdImpl::sub_ps(p1p2, p0p1)));
|
let p01p12 = p0p1 + tttt * (p1p2 - p0p1);
|
||||||
let pxxp23 = SimdImpl::add_ps(p1p2, SimdImpl::mul_ps(tttt, SimdImpl::sub_ps(p0p3, p1p2)));
|
let pxxp23 = p1p2 + tttt * (p0p3 - p1p2);
|
||||||
|
let p12p23 = F32x4::new(p01p12[2], p01p12[3], pxxp23[2], pxxp23[3]);
|
||||||
let p12p23 = assemble(&p01p12, &pxxp23, 1, 1);
|
|
||||||
|
|
||||||
// p012 = lerp(p01, p12, t), p123 = lerp(p12, p23, t)
|
// p012 = lerp(p01, p12, t), p123 = lerp(p12, p23, t)
|
||||||
let p012p123 =
|
let p012p123 = p01p12 + tttt * (p12p23 - p01p12);
|
||||||
SimdImpl::add_ps(p01p12, SimdImpl::mul_ps(tttt, SimdImpl::sub_ps(p12p23, p01p12)));
|
let p123 = p012p123.splat_high_half();
|
||||||
|
|
||||||
let p123 = pluck(&p012p123, 1);
|
|
||||||
|
|
||||||
// p0123 = lerp(p012, p123, t)
|
// p0123 = lerp(p012, p123, t)
|
||||||
let p0123 = SimdImpl::add_ps(p012p123, SimdImpl::mul_ps(tttt, SimdImpl::sub_ps(p123, p012p123)));
|
let p0123 = p012p123 + tttt * (p123 - p012p123);
|
||||||
|
|
||||||
let baseline0 = assemble(&p0p3, &p0123, 0, 0);
|
let baseline0 = F32x4::new(p0p3[0], p0p3[1], p0123[0], p0123[1]);
|
||||||
let ctrl0 = assemble(&p01p12, &p012p123, 0, 0);
|
let ctrl0 = F32x4::new(p01p12[0], p01p12[1], p012p123[0], p012p123[1]);
|
||||||
let baseline1 = assemble(&p0123, &p0p3, 0, 1);
|
let baseline1 = F32x4::new(p0123[0], p0123[1], p0p3[2], p0p3[3]);
|
||||||
let ctrl1 = assemble(&p012p123, &p12p23, 1, 1);
|
let ctrl1 = F32x4::new(p012p123[2], p012p123[3], p12p23[2], p12p23[3]);
|
||||||
|
|
||||||
// FIXME(pcwalton): Set flags appropriately!
|
(Segment {
|
||||||
return (
|
|
||||||
Segment {
|
|
||||||
baseline: LineSegmentF32(baseline0),
|
baseline: LineSegmentF32(baseline0),
|
||||||
ctrl: LineSegmentF32(ctrl0),
|
ctrl: LineSegmentF32(ctrl0),
|
||||||
kind: SegmentKind::Cubic,
|
kind: SegmentKind::Cubic,
|
||||||
flags: self.0.flags & SegmentFlags::FIRST_IN_SUBPATH,
|
flags: self.0.flags & SegmentFlags::FIRST_IN_SUBPATH,
|
||||||
},
|
}, Segment {
|
||||||
Segment {
|
|
||||||
baseline: LineSegmentF32(baseline1),
|
baseline: LineSegmentF32(baseline1),
|
||||||
ctrl: LineSegmentF32(ctrl1),
|
ctrl: LineSegmentF32(ctrl1),
|
||||||
kind: SegmentKind::Cubic,
|
kind: SegmentKind::Cubic,
|
||||||
flags: self.0.flags & SegmentFlags::CLOSES_SUBPATH,
|
flags: self.0.flags & SegmentFlags::CLOSES_SUBPATH,
|
||||||
},
|
})
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Constructs a new 4-element vector from two pairs of adjacent lanes in two input vectors.
|
|
||||||
unsafe fn assemble(
|
|
||||||
a_data: &<SimdImpl as Simd>::Vf32,
|
|
||||||
b_data: &<SimdImpl as Simd>::Vf32,
|
|
||||||
a_index: usize,
|
|
||||||
b_index: usize,
|
|
||||||
) -> <SimdImpl as Simd>::Vf32 {
|
|
||||||
let (a_data, b_data) = (SimdImpl::castps_pd(*a_data), SimdImpl::castps_pd(*b_data));
|
|
||||||
let mut result = SimdImpl::setzero_pd();
|
|
||||||
result[0] = a_data[a_index];
|
|
||||||
result[1] = b_data[b_index];
|
|
||||||
SimdImpl::castpd_ps(result)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Constructs a new 2-element vector from a pair of adjacent lanes in an input vector.
|
|
||||||
unsafe fn pluck(data: &<SimdImpl as Simd>::Vf32, index: usize) -> <SimdImpl as Simd>::Vf32 {
|
|
||||||
let data = SimdImpl::castps_pd(*data);
|
|
||||||
let mut result = SimdImpl::setzero_pd();
|
|
||||||
result[0] = data[index];
|
|
||||||
SimdImpl::castpd_ps(result)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[inline]
|
#[inline]
|
||||||
|
@ -272,15 +236,15 @@ impl<'s> CubicSegment<'s> {
|
||||||
#[inline]
|
#[inline]
|
||||||
pub fn y_extrema(self) -> (Option<f32>, Option<f32>) {
|
pub fn y_extrema(self) -> (Option<f32>, Option<f32>) {
|
||||||
let (t0, t1);
|
let (t0, t1);
|
||||||
unsafe {
|
let p0p1p2p3 = F32x4::new(self.0.baseline.from_y(),
|
||||||
let mut p0p1p2p3 = SimdImpl::setzero_ps();
|
self.0.ctrl.from_y(),
|
||||||
p0p1p2p3[0] = self.0.baseline.from_y();
|
self.0.ctrl.to_y(),
|
||||||
p0p1p2p3[1] = self.0.ctrl.from_y();
|
self.0.baseline.to_y());
|
||||||
p0p1p2p3[2] = self.0.ctrl.to_y();
|
let pxp0p1p2 = F32x4::new(self.0.baseline.from_y(),
|
||||||
p0p1p2p3[3] = self.0.baseline.to_y();
|
self.0.baseline.from_y(),
|
||||||
|
self.0.ctrl.from_y(),
|
||||||
let pxp0p1p2 = SimdImpl::shuffle_ps(p0p1p2p3, p0p1p2p3, 0b1001_0000);
|
self.0.ctrl.to_y());
|
||||||
let pxv0v1v2 = SimdImpl::sub_ps(p0p1p2p3, pxp0p1p2);
|
let pxv0v1v2 = p0p1p2p3 - pxp0p1p2;
|
||||||
let (v0, v1, v2) = (pxv0v1v2[1], pxv0v1v2[2], pxv0v1v2[3]);
|
let (v0, v1, v2) = (pxv0v1v2[1], pxv0v1v2[2], pxv0v1v2[3]);
|
||||||
|
|
||||||
let (v0_to_v1, v2_to_v1) = (v0 - v1, v2 - v1);
|
let (v0_to_v1, v2_to_v1) = (v0 - v1, v2 - v1);
|
||||||
|
@ -289,7 +253,6 @@ impl<'s> CubicSegment<'s> {
|
||||||
|
|
||||||
t0 = (v0_to_v1 + discrim) * denom;
|
t0 = (v0_to_v1 + discrim) * denom;
|
||||||
t1 = (v0_to_v1 - discrim) * denom;
|
t1 = (v0_to_v1 - discrim) * denom;
|
||||||
}
|
|
||||||
|
|
||||||
return match (
|
return match (
|
||||||
t0 > EPSILON && t0 < 1.0 - EPSILON,
|
t0 > EPSILON && t0 < 1.0 - EPSILON,
|
||||||
|
|
|
@ -0,0 +1,270 @@
|
||||||
|
// pathfinder/geometry/src/simd.rs
|
||||||
|
//
|
||||||
|
// Copyright © 2019 The Pathfinder Project Developers.
|
||||||
|
//
|
||||||
|
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
|
||||||
|
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
|
||||||
|
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
|
||||||
|
// option. This file may not be copied, modified, or distributed
|
||||||
|
// except according to those terms.
|
||||||
|
|
||||||
|
pub type F32x4 = x86::F32x4;
|
||||||
|
pub type I32x4 = x86::I32x4;
|
||||||
|
pub type U32x4 = x86::U32x4;
|
||||||
|
pub type U8x16 = x86::U8x16;
|
||||||
|
|
||||||
|
mod x86 {
|
||||||
|
use std::arch::x86_64::{self, __m128, __m128i};
|
||||||
|
use std::cmp::PartialEq;
|
||||||
|
use std::fmt::{self, Debug, Formatter};
|
||||||
|
use std::mem;
|
||||||
|
use std::ops::{Add, Mul, Sub, Index, IndexMut};
|
||||||
|
|
||||||
|
// 32-bit floats
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub struct F32x4(pub __m128);
|
||||||
|
|
||||||
|
impl F32x4 {
|
||||||
|
#[inline]
|
||||||
|
pub fn new(a: f32, b: f32, c: f32, d: f32) -> F32x4 {
|
||||||
|
unsafe {
|
||||||
|
let vector = [a, b, c, d];
|
||||||
|
F32x4(x86_64::_mm_loadu_ps(vector.as_ptr()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn splat(x: f32) -> F32x4 {
|
||||||
|
unsafe {
|
||||||
|
F32x4(x86_64::_mm_set1_ps(x))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn min(self, other: F32x4) -> F32x4 {
|
||||||
|
unsafe {
|
||||||
|
F32x4(x86_64::_mm_min_ps(self.0, other.0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn max(self, other: F32x4) -> F32x4 {
|
||||||
|
unsafe {
|
||||||
|
F32x4(x86_64::_mm_max_ps(self.0, other.0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn packed_eq(self, other: F32x4) -> U32x4 {
|
||||||
|
unsafe {
|
||||||
|
U32x4(x86_64::_mm_castps_si128(x86_64::_mm_cmpeq_ps(self.0, other.0)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn swap_halves(self) -> F32x4 {
|
||||||
|
unsafe {
|
||||||
|
F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0b0100_1110))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn splat_low_half(self) -> F32x4 {
|
||||||
|
unsafe {
|
||||||
|
F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0b0100_0100))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn splat_high_half(self) -> F32x4 {
|
||||||
|
unsafe {
|
||||||
|
F32x4(x86_64::_mm_shuffle_ps(self.0, self.0, 0b1110_1110))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn interleave(self, other: F32x4) -> (F32x4, F32x4) {
|
||||||
|
unsafe {
|
||||||
|
(F32x4(x86_64::_mm_unpacklo_ps(self.0, other.0)),
|
||||||
|
F32x4(x86_64::_mm_unpackhi_ps(self.0, other.0)))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn to_i32x4(self) -> I32x4 {
|
||||||
|
unsafe {
|
||||||
|
I32x4(x86_64::_mm_cvtps_epi32(self.0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for F32x4 {
|
||||||
|
#[inline]
|
||||||
|
fn default() -> F32x4 {
|
||||||
|
unsafe {
|
||||||
|
F32x4(x86_64::_mm_setzero_ps())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Index<usize> for F32x4 {
|
||||||
|
type Output = f32;
|
||||||
|
#[inline]
|
||||||
|
fn index(&self, index: usize) -> &f32 {
|
||||||
|
unsafe {
|
||||||
|
&mem::transmute::<&__m128, &[f32; 4]>(&self.0)[index]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl IndexMut<usize> for F32x4 {
|
||||||
|
#[inline]
|
||||||
|
fn index_mut(&mut self, index: usize) -> &mut f32 {
|
||||||
|
unsafe {
|
||||||
|
&mut mem::transmute::<&mut __m128, &mut [f32; 4]>(&mut self.0)[index]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Debug for F32x4 {
|
||||||
|
#[inline]
|
||||||
|
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
|
||||||
|
write!(f, "<{}, {}, {}, {}>", self[0], self[1], self[2], self[3])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl PartialEq for F32x4 {
|
||||||
|
#[inline]
|
||||||
|
fn eq(&self, other: &F32x4) -> bool {
|
||||||
|
self.packed_eq(*other).is_all_ones()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Add<F32x4> for F32x4 {
|
||||||
|
type Output = F32x4;
|
||||||
|
#[inline]
|
||||||
|
fn add(self, other: F32x4) -> F32x4 {
|
||||||
|
unsafe {
|
||||||
|
F32x4(x86_64::_mm_add_ps(self.0, other.0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Mul<F32x4> for F32x4 {
|
||||||
|
type Output = F32x4;
|
||||||
|
#[inline]
|
||||||
|
fn mul(self, other: F32x4) -> F32x4 {
|
||||||
|
unsafe {
|
||||||
|
F32x4(x86_64::_mm_mul_ps(self.0, other.0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Sub<F32x4> for F32x4 {
|
||||||
|
type Output = F32x4;
|
||||||
|
#[inline]
|
||||||
|
fn sub(self, other: F32x4) -> F32x4 {
|
||||||
|
unsafe {
|
||||||
|
F32x4(x86_64::_mm_sub_ps(self.0, other.0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 32-bit signed integers
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub struct I32x4(pub __m128i);
|
||||||
|
|
||||||
|
impl I32x4 {
|
||||||
|
#[inline]
|
||||||
|
pub fn new(a: i32, b: i32, c: i32, d: i32) -> I32x4 {
|
||||||
|
unsafe {
|
||||||
|
let vector = [a, b, c, d];
|
||||||
|
I32x4(x86_64::_mm_loadu_si128(vector.as_ptr() as *const __m128i))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn splat(x: i32) -> I32x4 {
|
||||||
|
unsafe {
|
||||||
|
I32x4(x86_64::_mm_set1_epi32(x))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn as_u8x16(self) -> U8x16 {
|
||||||
|
U8x16(self.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn min(self, other: I32x4) -> I32x4 {
|
||||||
|
unsafe {
|
||||||
|
I32x4(x86_64::_mm_min_epi32(self.0, other.0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Index<usize> for I32x4 {
|
||||||
|
type Output = i32;
|
||||||
|
#[inline]
|
||||||
|
fn index(&self, index: usize) -> &i32 {
|
||||||
|
unsafe {
|
||||||
|
&mem::transmute::<&__m128i, &[i32; 4]>(&self.0)[index]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Sub<I32x4> for I32x4 {
|
||||||
|
type Output = I32x4;
|
||||||
|
#[inline]
|
||||||
|
fn sub(self, other: I32x4) -> I32x4 {
|
||||||
|
unsafe {
|
||||||
|
I32x4(x86_64::_mm_sub_epi32(self.0, other.0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 32-bit unsigned integers
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub struct U32x4(pub __m128i);
|
||||||
|
|
||||||
|
impl U32x4 {
|
||||||
|
#[inline]
|
||||||
|
fn is_all_ones(&self) -> bool {
|
||||||
|
unsafe {
|
||||||
|
x86_64::_mm_test_all_ones(self.0) != 0
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Index<usize> for U32x4 {
|
||||||
|
type Output = u32;
|
||||||
|
#[inline]
|
||||||
|
fn index(&self, index: usize) -> &u32 {
|
||||||
|
unsafe {
|
||||||
|
&mem::transmute::<&__m128i, &[u32; 4]>(&self.0)[index]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 8-bit unsigned integers
|
||||||
|
|
||||||
|
#[derive(Clone, Copy)]
|
||||||
|
pub struct U8x16(pub __m128i);
|
||||||
|
|
||||||
|
impl U8x16 {
|
||||||
|
#[inline]
|
||||||
|
pub fn as_i32x4(self) -> I32x4 {
|
||||||
|
I32x4(self.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub fn shuffle(self, indices: U8x16) -> U8x16 {
|
||||||
|
unsafe {
|
||||||
|
U8x16(x86_64::_mm_shuffle_epi8(self.0, indices.0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -16,7 +16,6 @@ jemallocator = "0.1"
|
||||||
lyon_geom = "0.12"
|
lyon_geom = "0.12"
|
||||||
lyon_path = "0.12"
|
lyon_path = "0.12"
|
||||||
rayon = "1.0"
|
rayon = "1.0"
|
||||||
simdeez = "0.4"
|
|
||||||
svgtypes = "0.3"
|
svgtypes = "0.3"
|
||||||
usvg = "0.4"
|
usvg = "0.4"
|
||||||
|
|
||||||
|
|
|
@ -30,14 +30,11 @@ use lyon_path::iterator::PathIter;
|
||||||
use pathfinder_geometry::line_segment::{LineSegmentF32, LineSegmentU4, LineSegmentU8};
|
use pathfinder_geometry::line_segment::{LineSegmentF32, LineSegmentU4, LineSegmentU8};
|
||||||
use pathfinder_geometry::point::Point2DF32;
|
use pathfinder_geometry::point::Point2DF32;
|
||||||
use pathfinder_geometry::segment::{Segment, SegmentFlags, SegmentKind};
|
use pathfinder_geometry::segment::{Segment, SegmentFlags, SegmentKind};
|
||||||
|
use pathfinder_geometry::simd::{F32x4, I32x4};
|
||||||
use pathfinder_geometry::stroke::{StrokeStyle, StrokeToFillIter};
|
use pathfinder_geometry::stroke::{StrokeStyle, StrokeToFillIter};
|
||||||
use pathfinder_geometry::util;
|
use pathfinder_geometry::util;
|
||||||
use rayon::ThreadPoolBuilder;
|
use rayon::ThreadPoolBuilder;
|
||||||
use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
|
use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
|
||||||
use simdeez::Simd;
|
|
||||||
use simdeez::overloads::I32x4_41;
|
|
||||||
use simdeez::sse41::Sse41;
|
|
||||||
use std::arch::x86_64;
|
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
use std::fmt::{self, Debug, Formatter};
|
use std::fmt::{self, Debug, Formatter};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
@ -1193,32 +1190,26 @@ impl BuiltObject {
|
||||||
// TODO(pcwalton): SIMD-ify `tile_x` and `tile_y`.
|
// TODO(pcwalton): SIMD-ify `tile_x` and `tile_y`.
|
||||||
fn add_fill(&mut self, segment: &LineSegmentF32, tile_x: i16, tile_y: i16) {
|
fn add_fill(&mut self, segment: &LineSegmentF32, tile_x: i16, tile_y: i16) {
|
||||||
//println!("add_fill({:?} ({}, {}))", segment, tile_x, tile_y);
|
//println!("add_fill({:?} ({}, {}))", segment, tile_x, tile_y);
|
||||||
let (px, subpx);
|
let mut segment = (segment.0 * F32x4::splat(256.0)).to_i32x4();
|
||||||
unsafe {
|
|
||||||
let mut segment = Sse41::cvtps_epi32(Sse41::mul_ps(segment.0, Sse41::set1_ps(256.0)));
|
|
||||||
|
|
||||||
let mut tile_origin = Sse41::setzero_epi32();
|
let tile_origin_x = (tile_x as i32) * (TILE_WIDTH as i32) * 256;
|
||||||
tile_origin[0] = (tile_x as i32) * (TILE_WIDTH as i32) * 256;
|
let tile_origin_y = (tile_y as i32) * (TILE_HEIGHT as i32) * 256;
|
||||||
tile_origin[1] = (tile_y as i32) * (TILE_HEIGHT as i32) * 256;
|
let tile_origin = I32x4::new(tile_origin_x, tile_origin_y, tile_origin_x, tile_origin_y);
|
||||||
tile_origin = Sse41::shuffle_epi32(tile_origin, 0b0100_0100);
|
|
||||||
|
|
||||||
segment = Sse41::sub_epi32(segment, tile_origin);
|
segment = segment - tile_origin;
|
||||||
/*
|
/*
|
||||||
println!("... before min: {} {} {} {}",
|
println!("... before min: {} {} {} {}",
|
||||||
segment[0], segment[1], segment[2], segment[3]);
|
segment[0], segment[1], segment[2], segment[3]);
|
||||||
*/
|
*/
|
||||||
//segment = Sse41::max_epi32(segment, Sse41::setzero_epi32());
|
//segment = Sse41::max_epi32(segment, Sse41::setzero_epi32());
|
||||||
segment = Sse41::min_epi32(segment, Sse41::set1_epi32(0x0fff));
|
segment = segment.min(I32x4::splat(0x0fff));
|
||||||
//println!("... after min: {} {} {} {}", segment[0], segment[1], segment[2], segment[3]);
|
//println!("... after min: {} {} {} {}", segment[0], segment[1], segment[2], segment[3]);
|
||||||
|
|
||||||
let mut shuffle_mask = Sse41::setzero_epi32();
|
let shuffle_mask = I32x4::new(0x0c08_0400, 0x0d05_0901, 0, 0);
|
||||||
shuffle_mask[0] = 0x0c08_0400;
|
segment = segment.as_u8x16().shuffle(shuffle_mask.as_u8x16()).as_i32x4();
|
||||||
shuffle_mask[1] = 0x0d05_0901;
|
|
||||||
segment = Sse41::shuffle_epi8(segment, shuffle_mask);
|
|
||||||
|
|
||||||
px = LineSegmentU4((segment[1] | (segment[1] >> 12)) as u16);
|
let px = LineSegmentU4((segment[1] | (segment[1] >> 12)) as u16);
|
||||||
subpx = LineSegmentU8(segment[0] as u32);
|
let subpx = LineSegmentU8(segment[0] as u32);
|
||||||
}
|
|
||||||
|
|
||||||
let tile_index = self.tile_coords_to_index(tile_x, tile_y);
|
let tile_index = self.tile_coords_to_index(tile_x, tile_y);
|
||||||
|
|
||||||
|
@ -1930,87 +1921,63 @@ impl PartialOrd<ActiveEdge> for ActiveEdge {
|
||||||
#[derive(Clone, Copy)]
|
#[derive(Clone, Copy)]
|
||||||
struct Transform2DF32 {
|
struct Transform2DF32 {
|
||||||
// Row-major order.
|
// Row-major order.
|
||||||
matrix: <Sse41 as Simd>::Vf32,
|
matrix: F32x4,
|
||||||
vector: Point2DF32,
|
vector: Point2DF32,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for Transform2DF32 {
|
impl Default for Transform2DF32 {
|
||||||
fn default() -> Transform2DF32 {
|
fn default() -> Transform2DF32 {
|
||||||
unsafe {
|
Self::from_scale(&Point2DF32::splat(1.0))
|
||||||
let mut matrix = <Sse41 as Simd>::setzero_ps();
|
|
||||||
matrix[0] = 1.0;
|
|
||||||
matrix[3] = 1.0;
|
|
||||||
Transform2DF32 { matrix, vector: Point2DF32::default() }
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Transform2DF32 {
|
impl Transform2DF32 {
|
||||||
fn from_scale(scale: &Point2DF32) -> Transform2DF32 {
|
fn from_scale(scale: &Point2DF32) -> Transform2DF32 {
|
||||||
unsafe {
|
Transform2DF32 {
|
||||||
let mut matrix = Sse41::setzero_ps();
|
matrix: F32x4::new(scale.x(), 0.0, 0.0, scale.y()),
|
||||||
matrix[0] = scale.x();
|
vector: Point2DF32::default(),
|
||||||
matrix[3] = scale.y();
|
|
||||||
Transform2DF32 { matrix, vector: Point2DF32::default() }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn row_major(m11: f32, m12: f32, m21: f32, m22: f32, m31: f32, m32: f32) -> Transform2DF32 {
|
fn row_major(m11: f32, m12: f32, m21: f32, m22: f32, m31: f32, m32: f32) -> Transform2DF32 {
|
||||||
unsafe {
|
Transform2DF32 {
|
||||||
let mut matrix = Sse41::setzero_ps();
|
matrix: F32x4::new(m11, m12, m21, m22),
|
||||||
matrix[0] = m11;
|
vector: Point2DF32::new(m31, m32),
|
||||||
matrix[1] = m12;
|
|
||||||
matrix[2] = m21;
|
|
||||||
matrix[3] = m22;
|
|
||||||
Transform2DF32 { matrix, vector: Point2DF32::new(m31, m32) }
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn m11(&self) -> f32 { self.matrix[0] }
|
||||||
|
fn m12(&self) -> f32 { self.matrix[1] }
|
||||||
|
fn m21(&self) -> f32 { self.matrix[2] }
|
||||||
|
fn m22(&self) -> f32 { self.matrix[3] }
|
||||||
|
|
||||||
fn transform_point(&self, point: &Point2DF32) -> Point2DF32 {
|
fn transform_point(&self, point: &Point2DF32) -> Point2DF32 {
|
||||||
unsafe {
|
let xxyy = F32x4::new(point.x(), point.x(), point.y(), point.y());
|
||||||
let xxyy = Sse41::shuffle_ps(point.0, point.0, 0b0101_0000);
|
let x11_x12_y21_y22 = xxyy * self.matrix;
|
||||||
let x11_x12_y21_y22 = Sse41::mul_ps(xxyy, self.matrix);
|
let y21_y22 = x11_x12_y21_y22.splat_high_half();
|
||||||
let y21_y22 = Sse41::shuffle_ps(x11_x12_y21_y22, x11_x12_y21_y22, 0b0000_1110);
|
Point2DF32(x11_x12_y21_y22 + y21_y22 + self.vector.0)
|
||||||
Point2DF32(Sse41::add_ps(Sse41::add_ps(x11_x12_y21_y22, y21_y22), self.vector.0))
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn post_mul(&self, other: &Transform2DF32) -> Transform2DF32 {
|
fn post_mul(&self, other: &Transform2DF32) -> Transform2DF32 {
|
||||||
unsafe {
|
|
||||||
// Here `a` is self and `b` is `other`.
|
// Here `a` is self and `b` is `other`.
|
||||||
let a11a21a11a21 = Sse41::shuffle_ps(self.matrix, self.matrix, 0b1000_1000);
|
let a11a21a11a21 = F32x4::new(self.m11(), self.m21(), self.m11(), self.m21());
|
||||||
let b11b11b12b12 = Sse41::shuffle_ps(other.matrix, other.matrix, 0b0101_0000);
|
let b11b11b12b12 = F32x4::new(other.m11(), other.m11(), other.m12(), other.m12());
|
||||||
let lhs = Sse41::mul_ps(a11a21a11a21, b11b11b12b12);
|
let lhs = a11a21a11a21 * b11b11b12b12;
|
||||||
|
|
||||||
let a12a22a12a22 = Sse41::shuffle_ps(self.matrix, self.matrix, 0b1101_1101);
|
let a12a22a12a22 = F32x4::new(self.m12(), self.m22(), self.m12(), self.m22());
|
||||||
let b21b21b22b22 = Sse41::shuffle_ps(other.matrix, other.matrix, 0b1111_1010);
|
let b21b21b22b22 = F32x4::new(other.m21(), other.m21(), other.m22(), other.m22());
|
||||||
let rhs = Sse41::mul_ps(a12a22a12a22, b21b21b22b22);
|
let rhs = a12a22a12a22 * b21b21b22b22;
|
||||||
|
|
||||||
let matrix = Sse41::add_ps(lhs, rhs);
|
let matrix = lhs + rhs;
|
||||||
let vector = other.transform_point(&self.vector) + other.vector;
|
let vector = other.transform_point(&self.vector) + other.vector;
|
||||||
Transform2DF32 { matrix, vector }
|
Transform2DF32 { matrix, vector }
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
fn pre_mul(&self, other: &Transform2DF32) -> Transform2DF32 {
|
fn pre_mul(&self, other: &Transform2DF32) -> Transform2DF32 {
|
||||||
other.post_mul(self)
|
other.post_mul(self)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// SIMD extensions
|
|
||||||
|
|
||||||
trait SimdExt: Simd {
|
|
||||||
// TODO(pcwalton): Default scalar implementation.
|
|
||||||
unsafe fn shuffle_epi8(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32;
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SimdExt for Sse41 {
|
|
||||||
#[inline(always)]
|
|
||||||
unsafe fn shuffle_epi8(a: Self::Vi32, b: Self::Vi32) -> Self::Vi32 {
|
|
||||||
I32x4_41(x86_64::_mm_shuffle_epi8(a.0, b.0))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Testing
|
// Testing
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
|
Loading…
Reference in New Issue