Use 2-lane instead of 4-lane SIMD types for 2D vectors.

Also, this commit rewrites the `add_fill()` method to stop using shuffle
instructions, which can be slow and make the code overly complicated. The
shuffle instructions have been removed from the various SIMD backends.
This commit is contained in:
Patrick Walton 2019-06-25 14:43:13 -07:00
parent 222fa89b23
commit b886c157c1
24 changed files with 1561 additions and 417 deletions

View File

@ -24,15 +24,15 @@ struct Edge(LineSegment2F);
impl TEdge for Edge {
#[inline]
fn point_is_inside(&self, point: &Vector2F) -> bool {
let area = (self.0.to() - self.0.from()).det(*point - self.0.from());
fn point_is_inside(&self, point: Vector2F) -> bool {
let area = (self.0.to() - self.0.from()).det(point - self.0.from());
debug!("point_is_inside({:?}, {:?}), area={}", self, point, area);
area >= 0.0
}
fn intersect_line_segment(&self, segment: &LineSegment2F) -> ArrayVec<[f32; 3]> {
fn intersect_line_segment(&self, segment: LineSegment2F) -> ArrayVec<[f32; 3]> {
let mut results = ArrayVec::new();
if let Some(t) = segment.intersection_t(&self.0) {
if let Some(t) = segment.intersection_t(self.0) {
if t >= 0.0 && t <= 1.0 {
results.push(t);
}
@ -51,7 +51,7 @@ enum AxisAlignedEdge {
impl TEdge for AxisAlignedEdge {
#[inline]
fn point_is_inside(&self, point: &Vector2F) -> bool {
fn point_is_inside(&self, point: Vector2F) -> bool {
match *self {
AxisAlignedEdge::Left(x) => point.x() >= x,
AxisAlignedEdge::Top(y) => point.y() >= y,
@ -60,7 +60,7 @@ impl TEdge for AxisAlignedEdge {
}
}
fn intersect_line_segment(&self, segment: &LineSegment2F) -> ArrayVec<[f32; 3]> {
fn intersect_line_segment(&self, segment: LineSegment2F) -> ArrayVec<[f32; 3]> {
let mut results = ArrayVec::new();
let t = match *self {
AxisAlignedEdge::Left(x) | AxisAlignedEdge::Right(x) => segment.solve_t_for_x(x),
@ -74,26 +74,26 @@ impl TEdge for AxisAlignedEdge {
}
trait TEdge: Debug {
fn point_is_inside(&self, point: &Vector2F) -> bool;
fn intersect_line_segment(&self, segment: &LineSegment2F) -> ArrayVec<[f32; 3]>;
fn point_is_inside(&self, point: Vector2F) -> bool;
fn intersect_line_segment(&self, segment: LineSegment2F) -> ArrayVec<[f32; 3]>;
fn trivially_test_segment(&self, segment: &Segment) -> EdgeRelativeLocation {
let from_inside = self.point_is_inside(&segment.baseline.from());
let from_inside = self.point_is_inside(segment.baseline.from());
debug!(
"point {:?} inside {:?}: {:?}",
segment.baseline.from(),
self,
from_inside
);
if from_inside != self.point_is_inside(&segment.baseline.to()) {
if from_inside != self.point_is_inside(segment.baseline.to()) {
return EdgeRelativeLocation::Intersecting;
}
if !segment.is_line() {
if from_inside != self.point_is_inside(&segment.ctrl.from()) {
if from_inside != self.point_is_inside(segment.ctrl.from()) {
return EdgeRelativeLocation::Intersecting;
}
if !segment.is_quadratic() {
if from_inside != self.point_is_inside(&segment.ctrl.to()) {
if from_inside != self.point_is_inside(segment.ctrl.to()) {
return EdgeRelativeLocation::Intersecting;
}
}
@ -107,7 +107,7 @@ trait TEdge: Debug {
fn intersect_segment(&self, segment: &Segment) -> ArrayVec<[f32; 3]> {
if segment.is_line() {
return self.intersect_line_segment(&segment.baseline);
return self.intersect_line_segment(segment.baseline);
}
let mut segment = *segment;
@ -173,10 +173,10 @@ trait TEdge: Debug {
}
fn intersects_cubic_segment_hull(&self, cubic_segment: CubicSegment) -> bool {
let inside = self.point_is_inside(&cubic_segment.0.baseline.from());
inside != self.point_is_inside(&cubic_segment.0.ctrl.from())
|| inside != self.point_is_inside(&cubic_segment.0.ctrl.to())
|| inside != self.point_is_inside(&cubic_segment.0.baseline.to())
let inside = self.point_is_inside(cubic_segment.0.baseline.from());
inside != self.point_is_inside(cubic_segment.0.ctrl.from())
|| inside != self.point_is_inside(cubic_segment.0.ctrl.to())
|| inside != self.point_is_inside(cubic_segment.0.baseline.to())
}
}
@ -222,7 +222,7 @@ where
// We have a potential intersection.
debug!("potential intersection: {:?} edge: {:?}", segment, edge);
let mut starts_inside = edge.point_is_inside(&segment.baseline.from());
let mut starts_inside = edge.point_is_inside(segment.baseline.from());
let intersection_ts = edge.intersect_segment(&segment);
let mut last_t = 0.0;
debug!("... intersections: {:?}", intersection_ts);

View File

@ -434,25 +434,25 @@ impl Contour {
debug_assert!(self.point_is_endpoint(point_index));
let mut segment = Segment::none();
segment.baseline.set_from(&self.position_of(point_index));
segment.baseline.set_from(self.position_of(point_index));
let point1_index = self.add_to_point_index(point_index, 1);
if self.point_is_endpoint(point1_index) {
segment.baseline.set_to(&self.position_of(point1_index));
segment.baseline.set_to(self.position_of(point1_index));
segment.kind = SegmentKind::Line;
} else {
segment.ctrl.set_from(&self.position_of(point1_index));
segment.ctrl.set_from(self.position_of(point1_index));
let point2_index = self.add_to_point_index(point_index, 2);
if self.point_is_endpoint(point2_index) {
segment.baseline.set_to(&self.position_of(point2_index));
segment.baseline.set_to(self.position_of(point2_index));
segment.kind = SegmentKind::Quadratic;
} else {
segment.ctrl.set_to(&self.position_of(point2_index));
segment.ctrl.set_to(self.position_of(point2_index));
segment.kind = SegmentKind::Cubic;
let point3_index = self.add_to_point_index(point_index, 3);
segment.baseline.set_to(&self.position_of(point3_index));
segment.baseline.set_to(self.position_of(point3_index));
}
}
@ -541,7 +541,7 @@ impl Contour {
pub fn apply_perspective(&mut self, perspective: &Perspective) {
for (point_index, point) in self.points.iter_mut().enumerate() {
*point = perspective.transform_point_2d(point);
*point = perspective.transform_point_2d(*point);
union_rect(&mut self.bounds, *point, point_index == 0);
}
}
@ -610,14 +610,14 @@ impl Contour {
let ctrl_position = &contour.points[ctrl_point_index];
handle_cubic(
self,
&Segment::quadratic(&baseline, *ctrl_position).to_cubic(),
&Segment::quadratic(baseline, *ctrl_position).to_cubic(),
);
} else if point_count == 4 {
let first_ctrl_point_index = last_endpoint_index as usize + 1;
let ctrl_position_0 = &contour.points[first_ctrl_point_index + 0];
let ctrl_position_1 = &contour.points[first_ctrl_point_index + 1];
let ctrl = LineSegment2F::new(*ctrl_position_0, *ctrl_position_1);
handle_cubic(self, &Segment::cubic(&baseline, &ctrl));
handle_cubic(self, &Segment::cubic(baseline, ctrl));
}
self.push_point(
@ -802,21 +802,21 @@ impl<'a> Iterator for ContourIter<'a> {
if self.index == contour.len() {
let point1 = contour.position_of(0);
self.index += 1;
return Some(Segment::line(&LineSegment2F::new(point0, point1)));
return Some(Segment::line(LineSegment2F::new(point0, point1)));
}
let point1_index = self.index;
self.index += 1;
let point1 = contour.position_of(point1_index);
if contour.point_is_endpoint(point1_index) {
return Some(Segment::line(&LineSegment2F::new(point0, point1)));
return Some(Segment::line(LineSegment2F::new(point0, point1)));
}
let point2_index = self.index;
let point2 = contour.position_of(point2_index);
self.index += 1;
if contour.point_is_endpoint(point2_index) {
return Some(Segment::quadratic(&LineSegment2F::new(point0, point2), point1));
return Some(Segment::quadratic(LineSegment2F::new(point0, point2), point1));
}
let point3_index = self.index;
@ -824,8 +824,8 @@ impl<'a> Iterator for ContourIter<'a> {
self.index += 1;
debug_assert!(contour.point_is_endpoint(point3_index));
return Some(Segment::cubic(
&LineSegment2F::new(point0, point3),
&LineSegment2F::new(point1, point2),
LineSegment2F::new(point0, point3),
LineSegment2F::new(point1, point2),
));
}
}

View File

@ -39,9 +39,9 @@ impl Segment {
}
#[inline]
pub fn line(line: &LineSegment2F) -> Segment {
pub fn line(line: LineSegment2F) -> Segment {
Segment {
baseline: *line,
baseline: line,
ctrl: LineSegment2F::default(),
kind: SegmentKind::Line,
flags: SegmentFlags::empty(),
@ -49,9 +49,9 @@ impl Segment {
}
#[inline]
pub fn quadratic(baseline: &LineSegment2F, ctrl: Vector2F) -> Segment {
pub fn quadratic(baseline: LineSegment2F, ctrl: Vector2F) -> Segment {
Segment {
baseline: *baseline,
baseline,
ctrl: LineSegment2F::new(ctrl, Vector2F::default()),
kind: SegmentKind::Quadratic,
flags: SegmentFlags::empty(),
@ -59,10 +59,10 @@ impl Segment {
}
#[inline]
pub fn cubic(baseline: &LineSegment2F, ctrl: &LineSegment2F) -> Segment {
pub fn cubic(baseline: LineSegment2F, ctrl: LineSegment2F) -> Segment {
Segment {
baseline: *baseline,
ctrl: *ctrl,
baseline,
ctrl,
kind: SegmentKind::Cubic,
flags: SegmentFlags::empty(),
}
@ -91,7 +91,7 @@ impl Segment {
let (p0x, p0y) = (p3p0.z(), p3p0.w());
let (p1x, p1y) = (4.0 - p0x, (1.0 - p0x) * (3.0 - p0x) / p0y);
let p2p1 = F32x4::new(p1x, -p1y, p1x, p1y) * F32x4::splat(1.0 / 3.0);
return Segment::cubic(&LineSegment2F(p3p0), &LineSegment2F(p2p1));
return Segment::cubic(LineSegment2F(p3p0), LineSegment2F(p2p1));
}
#[inline]
@ -100,7 +100,7 @@ impl Segment {
let p1 = Vector2F::new(-SQRT_2 / 6.0 + 4.0 / 3.0, 7.0 * SQRT_2 / 6.0 - 4.0 / 3.0);
let flip = Vector2F::new(1.0, -1.0);
let (p2, p3) = (p1.scale_xy(flip), p0.scale_xy(flip));
Segment::cubic(&LineSegment2F::new(p3, p0), &LineSegment2F::new(p2, p1))
Segment::cubic(LineSegment2F::new(p3, p0), LineSegment2F::new(p2, p1))
}
#[inline]
@ -198,7 +198,7 @@ impl Segment {
// FIXME(pcwalton): Don't degree elevate!
if self.is_line() {
let (before, after) = self.as_line_segment().split(t);
(Segment::line(&before), Segment::line(&after))
(Segment::line(before), Segment::line(after))
} else {
self.to_cubic().as_cubic_segment().split(t)
}
@ -217,8 +217,8 @@ impl Segment {
#[inline]
pub fn transform(self, transform: &Transform2DF) -> Segment {
Segment {
baseline: transform.transform_line_segment(&self.baseline),
ctrl: transform.transform_line_segment(&self.ctrl),
baseline: transform.transform_line_segment(self.baseline),
ctrl: transform.transform_line_segment(self.ctrl),
kind: self.kind,
flags: self.flags,
}

View File

@ -104,7 +104,7 @@ impl<'a> OutlineStrokeToFill<'a> {
stroker.output.add_join(self.style.line_width * 0.5,
self.style.line_join,
stroker.input.position_of(0),
&final_segment);
final_segment);
}
stroker.output.closed = true;
@ -235,7 +235,7 @@ impl Offset for Segment {
self.ctrl.from()
};
contour.add_join(distance, join, join_point, &LineSegment2F::new(p4, p3));
contour.add_join(distance, join, join_point, LineSegment2F::new(p4, p3));
}
// Push segment.
@ -245,7 +245,7 @@ impl Offset for Segment {
fn offset_once(&self, distance: f32) -> Segment {
if self.is_line() {
return Segment::line(&self.baseline.offset(distance));
return Segment::line(self.baseline.offset(distance));
}
if self.is_quadratic() {
@ -253,12 +253,12 @@ impl Offset for Segment {
let mut segment_1 = LineSegment2F::new(self.ctrl.from(), self.baseline.to());
segment_0 = segment_0.offset(distance);
segment_1 = segment_1.offset(distance);
let ctrl = match segment_0.intersection_t(&segment_1) {
let ctrl = match segment_0.intersection_t(segment_1) {
Some(t) => segment_0.sample(t),
None => segment_0.to().lerp(segment_1.from(), 0.5),
};
let baseline = LineSegment2F::new(segment_0.from(), segment_1.to());
return Segment::quadratic(&baseline, ctrl);
return Segment::quadratic(baseline, ctrl);
}
debug_assert!(self.is_cubic());
@ -268,13 +268,13 @@ impl Offset for Segment {
let mut segment_1 = LineSegment2F::new(self.ctrl.to(), self.baseline.to());
segment_0 = segment_0.offset(distance);
segment_1 = segment_1.offset(distance);
let ctrl = match segment_0.intersection_t(&segment_1) {
let ctrl = match segment_0.intersection_t(segment_1) {
Some(t) => segment_0.sample(t),
None => segment_0.to().lerp(segment_1.from(), 0.5),
};
let baseline = LineSegment2F::new(segment_0.from(), segment_1.to());
let ctrl = LineSegment2F::new(segment_0.from(), ctrl);
return Segment::cubic(&baseline, &ctrl);
return Segment::cubic(baseline, ctrl);
}
if self.ctrl.to() == self.baseline.to() {
@ -282,13 +282,13 @@ impl Offset for Segment {
let mut segment_1 = LineSegment2F::new(self.ctrl.from(), self.baseline.to());
segment_0 = segment_0.offset(distance);
segment_1 = segment_1.offset(distance);
let ctrl = match segment_0.intersection_t(&segment_1) {
let ctrl = match segment_0.intersection_t(segment_1) {
Some(t) => segment_0.sample(t),
None => segment_0.to().lerp(segment_1.from(), 0.5),
};
let baseline = LineSegment2F::new(segment_0.from(), segment_1.to());
let ctrl = LineSegment2F::new(ctrl, segment_1.to());
return Segment::cubic(&baseline, &ctrl);
return Segment::cubic(baseline, ctrl);
}
let mut segment_0 = LineSegment2F::new(self.baseline.from(), self.ctrl.from());
@ -298,8 +298,8 @@ impl Offset for Segment {
segment_1 = segment_1.offset(distance);
segment_2 = segment_2.offset(distance);
let (ctrl_0, ctrl_1) = match (
segment_0.intersection_t(&segment_1),
segment_1.intersection_t(&segment_2),
segment_0.intersection_t(segment_1),
segment_1.intersection_t(segment_2),
) {
(Some(t0), Some(t1)) => (segment_0.sample(t0), segment_1.sample(t1)),
_ => (
@ -309,7 +309,7 @@ impl Offset for Segment {
};
let baseline = LineSegment2F::new(segment_0.from(), segment_2.to());
let ctrl = LineSegment2F::new(ctrl_0, ctrl_1);
Segment::cubic(&baseline, &ctrl)
Segment::cubic(baseline, ctrl)
}
fn error_is_within_tolerance(&self, other: &Segment, distance: f32) -> bool {
@ -357,14 +357,14 @@ impl Contour {
distance: f32,
join: LineJoin,
join_point: Vector2F,
next_tangent: &LineSegment2F) {
next_tangent: LineSegment2F) {
let (p0, p1) = (self.position_of_last(2), self.position_of_last(1));
let prev_tangent = LineSegment2F::new(p0, p1);
match join {
LineJoin::Bevel => {}
LineJoin::Miter(miter_limit) => {
if let Some(prev_tangent_t) = prev_tangent.intersection_t(&next_tangent) {
if let Some(prev_tangent_t) = prev_tangent.intersection_t(next_tangent) {
let miter_endpoint = prev_tangent.sample(prev_tangent_t);
let threshold = miter_limit * distance;
if (miter_endpoint - join_point).square_length() <= threshold * threshold {

View File

@ -34,20 +34,12 @@ where
// TODO(pcwalton): Can we go faster by transforming an entire line segment with SIMD?
let mut segment = self.iter.next()?;
if !segment.is_none() {
segment
.baseline
.set_from(&self.transform.transform_point(segment.baseline.from()));
segment
.baseline
.set_to(&self.transform.transform_point(segment.baseline.to()));
segment.baseline.set_from(self.transform.transform_point(segment.baseline.from()));
segment.baseline.set_to(self.transform.transform_point(segment.baseline.to()));
if !segment.is_line() {
segment
.ctrl
.set_from(&self.transform.transform_point(segment.ctrl.from()));
segment.ctrl.set_from(self.transform.transform_point(segment.ctrl.from()));
if !segment.is_quadratic() {
segment
.ctrl
.set_to(&self.transform.transform_point(segment.ctrl.to()));
segment.ctrl.set_to(self.transform.transform_point(segment.ctrl.to()));
}
}
}
@ -88,21 +80,13 @@ where
let mut segment = self.iter.next()?;
if !segment.is_none() {
segment.baseline.set_from(
&self
.perspective
.transform_point_2d(&segment.baseline.from()),
self.perspective.transform_point_2d(segment.baseline.from()),
);
segment
.baseline
.set_to(&self.perspective.transform_point_2d(&segment.baseline.to()));
segment.baseline.set_to(self.perspective.transform_point_2d(segment.baseline.to()));
if !segment.is_line() {
segment
.ctrl
.set_from(&self.perspective.transform_point_2d(&segment.ctrl.from()));
segment.ctrl.set_from(self.perspective.transform_point_2d(segment.ctrl.from()));
if !segment.is_quadratic() {
segment
.ctrl
.set_to(&self.perspective.transform_point_2d(&segment.ctrl.to()));
segment.ctrl.set_to(self.perspective.transform_point_2d(segment.ctrl.to()));
}
}
}

View File

@ -10,8 +10,8 @@
//! Line segment types, optimized with SIMD.
use crate::vector::Vector2F;
use crate::transform2d::Matrix2x2F;
use crate::vector::Vector2F;
use crate::util;
use pathfinder_simd::default::F32x4;
use std::ops::{Add, Sub};
@ -26,44 +26,44 @@ impl LineSegment2F {
}
#[inline]
pub fn from(&self) -> Vector2F {
Vector2F(self.0)
pub fn from(self) -> Vector2F {
Vector2F(self.0.xy())
}
#[inline]
pub fn to(&self) -> Vector2F {
Vector2F(self.0.zwxy())
pub fn to(self) -> Vector2F {
Vector2F(self.0.zw())
}
#[inline]
pub fn set_from(&mut self, point: &Vector2F) {
self.0 = point.0.concat_xy_zw(self.0)
pub fn set_from(&mut self, point: Vector2F) {
self.0 = point.0.to_f32x4().concat_xy_zw(self.0)
}
#[inline]
pub fn set_to(&mut self, point: &Vector2F) {
self.0 = self.0.concat_xy_xy(point.0)
pub fn set_to(&mut self, point: Vector2F) {
self.0 = self.0.concat_xy_xy(point.0.to_f32x4())
}
#[allow(clippy::wrong_self_convention)]
#[inline]
pub fn from_x(&self) -> f32 {
pub fn from_x(self) -> f32 {
self.0[0]
}
#[allow(clippy::wrong_self_convention)]
#[inline]
pub fn from_y(&self) -> f32 {
pub fn from_y(self) -> f32 {
self.0[1]
}
#[inline]
pub fn to_x(&self) -> f32 {
pub fn to_x(self) -> f32 {
self.0[2]
}
#[inline]
pub fn to_y(&self) -> f32 {
pub fn to_y(self) -> f32 {
self.0[3]
}
@ -88,22 +88,22 @@ impl LineSegment2F {
}
#[inline]
pub fn translate(&self, offset: Vector2F) -> LineSegment2F {
LineSegment2F(self.0 + offset.0.xyxy())
pub fn translate(self, offset: Vector2F) -> LineSegment2F {
LineSegment2F(self.0 + offset.0.to_f32x4().xyxy())
}
#[inline]
pub fn scale(&self, factor: f32) -> LineSegment2F {
pub fn scale(self, factor: f32) -> LineSegment2F {
LineSegment2F(self.0 * F32x4::splat(factor))
}
#[inline]
pub fn scale_xy(&self, factors: Vector2F) -> LineSegment2F {
LineSegment2F(self.0 * factors.0.xyxy())
pub fn scale_xy(self, factors: Vector2F) -> LineSegment2F {
LineSegment2F(self.0 * factors.0.to_f32x4().xyxy())
}
#[inline]
pub fn split(&self, t: f32) -> (LineSegment2F, LineSegment2F) {
pub fn split(self, t: f32) -> (LineSegment2F, LineSegment2F) {
debug_assert!(t >= 0.0 && t <= 1.0);
let (from_from, to_to) = (self.0.xyxy(), self.0.zwzw());
let d_d = to_to - from_from;
@ -116,7 +116,7 @@ impl LineSegment2F {
// Returns the left segment first, followed by the right segment.
#[inline]
pub fn split_at_x(&self, x: f32) -> (LineSegment2F, LineSegment2F) {
pub fn split_at_x(self, x: f32) -> (LineSegment2F, LineSegment2F) {
let (min_part, max_part) = self.split(self.solve_t_for_x(x));
if min_part.from_x() < max_part.from_x() {
(min_part, max_part)
@ -127,7 +127,7 @@ impl LineSegment2F {
// Returns the upper segment first, followed by the lower segment.
#[inline]
pub fn split_at_y(&self, y: f32) -> (LineSegment2F, LineSegment2F) {
pub fn split_at_y(self, y: f32) -> (LineSegment2F, LineSegment2F) {
let (min_part, max_part) = self.split(self.solve_t_for_y(y));
// Make sure we compare `from_y` and `to_y` to properly handle the case in which one of the
@ -140,32 +140,32 @@ impl LineSegment2F {
}
#[inline]
pub fn solve_t_for_x(&self, x: f32) -> f32 {
pub fn solve_t_for_x(self, x: f32) -> f32 {
(x - self.from_x()) / (self.to_x() - self.from_x())
}
#[inline]
pub fn solve_t_for_y(&self, y: f32) -> f32 {
pub fn solve_t_for_y(self, y: f32) -> f32 {
(y - self.from_y()) / (self.to_y() - self.from_y())
}
#[inline]
pub fn solve_x_for_y(&self, y: f32) -> f32 {
pub fn solve_x_for_y(self, y: f32) -> f32 {
util::lerp(self.from_x(), self.to_x(), self.solve_t_for_y(y))
}
#[inline]
pub fn solve_y_for_x(&self, x: f32) -> f32 {
pub fn solve_y_for_x(self, x: f32) -> f32 {
util::lerp(self.from_y(), self.to_y(), self.solve_t_for_x(x))
}
#[inline]
pub fn reversed(&self) -> LineSegment2F {
pub fn reversed(self) -> LineSegment2F {
LineSegment2F(self.0.zwxy())
}
#[inline]
pub fn upper_point(&self) -> Vector2F {
pub fn upper_point(self) -> Vector2F {
if self.from_y() < self.to_y() {
self.from()
} else {
@ -174,27 +174,27 @@ impl LineSegment2F {
}
#[inline]
pub fn min_x(&self) -> f32 {
pub fn min_x(self) -> f32 {
f32::min(self.from_x(), self.to_x())
}
#[inline]
pub fn max_x(&self) -> f32 {
pub fn max_x(self) -> f32 {
f32::max(self.from_x(), self.to_x())
}
#[inline]
pub fn min_y(&self) -> f32 {
pub fn min_y(self) -> f32 {
f32::min(self.from_y(), self.to_y())
}
#[inline]
pub fn max_y(&self) -> f32 {
pub fn max_y(self) -> f32 {
f32::max(self.from_y(), self.to_y())
}
#[inline]
pub fn y_winding(&self) -> i32 {
pub fn y_winding(self) -> i32 {
if self.from_y() < self.to_y() {
1
} else {
@ -205,9 +205,9 @@ impl LineSegment2F {
// Reverses if necessary so that the from point is above the to point. Calling this method
// again will undo the transformation.
#[inline]
pub fn orient(&self, y_winding: i32) -> LineSegment2F {
pub fn orient(self, y_winding: i32) -> LineSegment2F {
if y_winding >= 0 {
*self
self
} else {
self.reversed()
}
@ -215,18 +215,18 @@ impl LineSegment2F {
// TODO(pcwalton): Optimize with SIMD.
#[inline]
pub fn square_length(&self) -> f32 {
pub fn square_length(self) -> f32 {
let (dx, dy) = (self.to_x() - self.from_x(), self.to_y() - self.from_y());
dx * dx + dy * dy
}
#[inline]
pub fn vector(&self) -> Vector2F {
pub fn vector(self) -> Vector2F {
self.to() - self.from()
}
// http://www.cs.swan.ac.uk/~cssimon/line_intersection.html
pub fn intersection_t(&self, other: &LineSegment2F) -> Option<f32> {
pub fn intersection_t(self, other: LineSegment2F) -> Option<f32> {
let p0p1 = self.vector();
let matrix = Matrix2x2F(other.vector().0.concat_xy_xy((-p0p1).0));
if f32::abs(matrix.det()) < EPSILON {
@ -238,32 +238,27 @@ impl LineSegment2F {
}
#[inline]
pub fn sample(&self, t: f32) -> Vector2F {
pub fn sample(self, t: f32) -> Vector2F {
self.from() + self.vector().scale(t)
}
#[inline]
pub fn midpoint(&self) -> Vector2F {
pub fn midpoint(self) -> Vector2F {
self.sample(0.5)
}
#[inline]
pub fn offset(&self, distance: f32) -> LineSegment2F {
pub fn offset(self, distance: f32) -> LineSegment2F {
if self.is_zero_length() {
*self
self
} else {
*self
+ self
.vector()
.yx()
.normalize()
.scale_xy(Vector2F::new(-distance, distance))
self + self.vector().yx().normalize().scale_xy(Vector2F::new(-distance, distance))
}
}
#[inline]
pub fn is_zero_length(&self) -> bool {
pub fn is_zero_length(self) -> bool {
self.vector().is_zero()
}
}
@ -272,7 +267,7 @@ impl Add<Vector2F> for LineSegment2F {
type Output = LineSegment2F;
#[inline]
fn add(self, point: Vector2F) -> LineSegment2F {
LineSegment2F(self.0 + point.0.xyxy())
LineSegment2F(self.0 + point.0.to_f32x4().xyxy())
}
}
@ -280,14 +275,22 @@ impl Sub<Vector2F> for LineSegment2F {
type Output = LineSegment2F;
#[inline]
fn sub(self, point: Vector2F) -> LineSegment2F {
LineSegment2F(self.0 - point.0.xyxy())
LineSegment2F(self.0 - point.0.to_f32x4().xyxy())
}
}
#[derive(Clone, Copy, Debug, Default)]
#[repr(transparent)]
pub struct LineSegmentU4(pub u16);
#[repr(C)]
pub struct LineSegmentU4 {
pub from: u8,
pub to: u8,
}
#[derive(Clone, Copy, Debug, Default)]
#[repr(transparent)]
pub struct LineSegmentU8(pub u32);
#[repr(C)]
pub struct LineSegmentU8 {
pub from_x: u8,
pub from_y: u8,
pub to_x: u8,
pub to_y: u8,
}

View File

@ -29,36 +29,34 @@ impl RectF {
#[inline]
pub fn origin(&self) -> Vector2F {
Vector2F(self.0)
Vector2F(self.0.xy())
}
#[inline]
pub fn size(&self) -> Vector2F {
Vector2F(self.0.zwxy() - self.0.xyxy())
Vector2F(self.0.zw() - self.0.xy())
}
#[inline]
pub fn upper_right(&self) -> Vector2F {
Vector2F(self.0.zyxw())
Vector2F(self.0.zy())
}
#[inline]
pub fn lower_left(&self) -> Vector2F {
Vector2F(self.0.xwzy())
Vector2F(self.0.xw())
}
#[inline]
pub fn lower_right(&self) -> Vector2F {
Vector2F(self.0.zwxy())
Vector2F(self.0.zw())
}
#[inline]
pub fn contains_point(&self, point: Vector2F) -> bool {
// self.origin <= point && point <= self.lower_right
self.0
.concat_xy_xy(point.0)
.packed_le(point.0.concat_xy_zw(self.0))
.is_all_ones()
let point = point.0.to_f32x4();
self.0.concat_xy_xy(point).packed_le(point.concat_xy_zw(self.0)).is_all_ones()
}
#[inline]
@ -166,27 +164,27 @@ impl RectI {
#[inline]
pub fn origin(&self) -> Vector2I {
Vector2I(self.0)
Vector2I(self.0.xy())
}
#[inline]
pub fn size(&self) -> Vector2I {
Vector2I(self.0.zwxy() - self.0.xyxy())
Vector2I(self.0.zw() - self.0.xy())
}
#[inline]
pub fn upper_right(&self) -> Vector2I {
Vector2I(self.0.zyxw())
Vector2I(self.0.zy())
}
#[inline]
pub fn lower_left(&self) -> Vector2I {
Vector2I(self.0.xwzy())
Vector2I(self.0.xw())
}
#[inline]
pub fn lower_right(&self) -> Vector2I {
Vector2I(self.0.zwxy())
Vector2I(self.0.zw())
}
#[inline]
@ -213,7 +211,8 @@ impl RectI {
pub fn contains_point(&self, point: Vector2I) -> bool {
// self.origin <= point && point <= self.lower_right - 1
let lower_right = self.lower_right() - Vector2I::splat(1);
self.0
self.origin()
.0
.concat_xy_xy(point.0)
.packed_le(point.0.concat_xy_xy(lower_right.0))
.is_all_ones()

View File

@ -42,7 +42,7 @@ impl Matrix2x2F {
#[inline]
pub fn from_rotation_vector(vector: UnitVector) -> Matrix2x2F {
Matrix2x2F((vector.0).0.xyyx() * F32x4::new(1.0, 1.0, -1.0, 1.0))
Matrix2x2F((vector.0).0.to_f32x4().xyyx() * F32x4::new(1.0, 1.0, -1.0, 1.0))
}
#[inline]
@ -72,8 +72,8 @@ impl Matrix2x2F {
#[inline]
pub fn transform_point(&self, point: Vector2F) -> Vector2F {
let halves = self.0 * point.0.xxyy();
Vector2F(halves + halves.zwzw())
let halves = self.0 * point.0.to_f32x4().xxyy();
Vector2F(halves.xy() + halves.zw())
}
#[inline]
@ -182,7 +182,7 @@ impl Transform2DF {
}
#[inline]
pub fn transform_line_segment(&self, line_segment: &LineSegment2F) -> LineSegment2F {
pub fn transform_line_segment(&self, line_segment: LineSegment2F) -> LineSegment2F {
LineSegment2F::new(self.transform_point(line_segment.from()),
self.transform_point(line_segment.to()))
}
@ -291,6 +291,6 @@ impl Transform2DF {
/// This decomposition assumes that scale, rotation, and translation are applied in that order.
#[inline]
pub fn scale_factor(&self) -> f32 {
Vector2F(self.matrix.0.zwxy()).length()
Vector2F(self.matrix.0.zw()).length()
}
}

View File

@ -345,7 +345,7 @@ impl Perspective {
}
#[inline]
pub fn transform_point_2d(&self, point: &Vector2F) -> Vector2F {
pub fn transform_point_2d(&self, point: Vector2F) -> Vector2F {
let point = self
.transform
.transform_point(point.to_3d())
@ -358,10 +358,10 @@ impl Perspective {
// TODO(pcwalton): SIMD?
#[inline]
pub fn transform_rect(&self, rect: RectF) -> RectF {
let upper_left = self.transform_point_2d(&rect.origin());
let upper_right = self.transform_point_2d(&rect.upper_right());
let lower_left = self.transform_point_2d(&rect.lower_left());
let lower_right = self.transform_point_2d(&rect.lower_right());
let upper_left = self.transform_point_2d(rect.origin());
let upper_right = self.transform_point_2d(rect.upper_right());
let lower_left = self.transform_point_2d(rect.lower_left());
let lower_right = self.transform_point_2d(rect.lower_right());
let min_point = upper_left.min(upper_right).min(lower_left).min(lower_right);
let max_point = upper_left.max(upper_right).max(lower_left).max(lower_right);
RectF::from_points(min_point, max_point)

View File

@ -11,7 +11,7 @@
//! A utility module that allows unit vectors to be treated like angles.
use crate::vector::Vector2F;
use pathfinder_simd::default::F32x4;
use pathfinder_simd::default::F32x2;
#[derive(Clone, Copy, Debug)]
pub struct UnitVector(pub Vector2F);
@ -25,14 +25,14 @@ impl UnitVector {
/// Angle addition formula.
#[inline]
pub fn rotate_by(&self, other: UnitVector) -> UnitVector {
let products = (self.0).0.xyyx() * (other.0).0.xyxy();
let products = (self.0).0.to_f32x4().xyyx() * (other.0).0.to_f32x4().xyxy();
UnitVector(Vector2F::new(products[0] - products[1], products[2] + products[3]))
}
/// Angle subtraction formula.
#[inline]
pub fn rev_rotate_by(&self, other: UnitVector) -> UnitVector {
let products = (self.0).0.xyyx() * (other.0).0.xyxy();
let products = (self.0).0.to_f32x4().xyyx() * (other.0).0.to_f32x4().xyxy();
UnitVector(Vector2F::new(products[0] + products[1], products[2] - products[3]))
}
@ -40,7 +40,7 @@ impl UnitVector {
#[inline]
pub fn halve_angle(&self) -> UnitVector {
let x = self.0.x();
let term = F32x4::new(x, -x, 0.0, 0.0);
UnitVector(Vector2F((F32x4::splat(0.5) * (F32x4::splat(1.0) + term)).sqrt()))
let term = F32x2::new(x, -x);
UnitVector(Vector2F((F32x2::splat(0.5) * (F32x2::splat(1.0) + term)).sqrt()))
}
}

View File

@ -10,36 +10,36 @@
//! A SIMD-optimized point type.
use pathfinder_simd::default::{F32x4, I32x4};
use pathfinder_simd::default::{F32x2, F32x4, I32x2};
use std::ops::{Add, AddAssign, Mul, Neg, Sub};
/// 2D points with 32-bit floating point coordinates.
#[derive(Clone, Copy, Debug, Default)]
pub struct Vector2F(pub F32x4);
pub struct Vector2F(pub F32x2);
impl Vector2F {
#[inline]
pub fn new(x: f32, y: f32) -> Vector2F {
Vector2F(F32x4::new(x, y, 0.0, 0.0))
Vector2F(F32x2::new(x, y))
}
#[inline]
pub fn splat(value: f32) -> Vector2F {
Vector2F(F32x4::splat(value))
Vector2F(F32x2::splat(value))
}
#[inline]
pub fn to_3d(self) -> Vector4F {
Vector4F(self.0.concat_xy_xy(F32x4::new(0.0, 1.0, 0.0, 0.0)))
Vector4F(self.0.to_f32x4().concat_xy_zw(F32x4::new(0.0, 0.0, 0.0, 1.0)))
}
#[inline]
pub fn x(&self) -> f32 {
pub fn x(self) -> f32 {
self.0[0]
}
#[inline]
pub fn y(&self) -> f32 {
pub fn y(self) -> f32 {
self.0[1]
}
@ -54,97 +54,96 @@ impl Vector2F {
}
#[inline]
pub fn min(&self, other: Vector2F) -> Vector2F {
pub fn min(self, other: Vector2F) -> Vector2F {
Vector2F(self.0.min(other.0))
}
#[inline]
pub fn max(&self, other: Vector2F) -> Vector2F {
pub fn max(self, other: Vector2F) -> Vector2F {
Vector2F(self.0.max(other.0))
}
#[inline]
pub fn clamp(&self, min_val: Vector2F, max_val: Vector2F) -> Vector2F {
pub fn clamp(self, min_val: Vector2F, max_val: Vector2F) -> Vector2F {
self.max(min_val).min(max_val)
}
#[inline]
pub fn det(&self, other: Vector2F) -> f32 {
pub fn det(self, other: Vector2F) -> f32 {
self.x() * other.y() - self.y() * other.x()
}
#[inline]
pub fn dot(&self, other: Vector2F) -> f32 {
pub fn dot(self, other: Vector2F) -> f32 {
let xy = self.0 * other.0;
xy.x() + xy.y()
}
#[inline]
pub fn scale(&self, x: f32) -> Vector2F {
Vector2F(self.0 * F32x4::splat(x))
pub fn scale(self, x: f32) -> Vector2F {
Vector2F(self.0 * F32x2::splat(x))
}
#[inline]
pub fn scale_xy(&self, factors: Vector2F) -> Vector2F {
pub fn scale_xy(self, factors: Vector2F) -> Vector2F {
Vector2F(self.0 * factors.0)
}
#[inline]
pub fn floor(&self) -> Vector2F {
pub fn floor(self) -> Vector2F {
Vector2F(self.0.floor())
}
#[inline]
pub fn ceil(&self) -> Vector2F {
pub fn ceil(self) -> Vector2F {
Vector2F(self.0.ceil())
}
/// Treats this point as a vector and calculates its squared length.
#[inline]
pub fn square_length(&self) -> f32 {
pub fn square_length(self) -> f32 {
let squared = self.0 * self.0;
squared[0] + squared[1]
}
/// Treats this point as a vector and calculates its length.
#[inline]
pub fn length(&self) -> f32 {
pub fn length(self) -> f32 {
f32::sqrt(self.square_length())
}
/// Treats this point as a vector and normalizes it.
#[inline]
pub fn normalize(&self) -> Vector2F {
pub fn normalize(self) -> Vector2F {
self.scale(1.0 / self.length())
}
/// Swaps y and x.
#[inline]
pub fn yx(&self) -> Vector2F {
Vector2F(self.0.yxwz())
pub fn yx(self) -> Vector2F {
Vector2F(self.0.yx())
}
#[inline]
pub fn is_zero(&self) -> bool {
*self == Vector2F::default()
pub fn is_zero(self) -> bool {
self == Vector2F::default()
}
#[inline]
pub fn lerp(&self, other: Vector2F, t: f32) -> Vector2F {
*self + (other - *self).scale(t)
pub fn lerp(self, other: Vector2F, t: f32) -> Vector2F {
self + (other - self).scale(t)
}
#[inline]
pub fn to_i32(&self) -> Vector2I {
Vector2I(self.0.to_i32x4())
pub fn to_i32(self) -> Vector2I {
Vector2I(self.0.to_i32x2())
}
}
impl PartialEq for Vector2F {
#[inline]
fn eq(&self, other: &Vector2F) -> bool {
let results = self.0.packed_eq(other.0);
results[0] != 0 && results[1] != 0
self.0.packed_eq(other.0).is_all_ones()
}
}
@ -182,26 +181,26 @@ impl Neg for Vector2F {
/// 2D points with 32-bit signed integer coordinates.
#[derive(Clone, Copy, Debug, Default)]
pub struct Vector2I(pub I32x4);
pub struct Vector2I(pub I32x2);
impl Vector2I {
#[inline]
pub fn new(x: i32, y: i32) -> Vector2I {
Vector2I(I32x4::new(x, y, 0, 0))
Vector2I(I32x2::new(x, y))
}
#[inline]
pub fn splat(value: i32) -> Vector2I {
Vector2I(I32x4::splat(value))
Vector2I(I32x2::splat(value))
}
#[inline]
pub fn x(&self) -> i32 {
pub fn x(self) -> i32 {
self.0[0]
}
#[inline]
pub fn y(&self) -> i32 {
pub fn y(self) -> i32 {
self.0[1]
}
@ -216,18 +215,18 @@ impl Vector2I {
}
#[inline]
pub fn scale(&self, factor: i32) -> Vector2I {
Vector2I(self.0 * I32x4::splat(factor))
pub fn scale(self, factor: i32) -> Vector2I {
Vector2I(self.0 * I32x2::splat(factor))
}
#[inline]
pub fn scale_xy(&self, factors: Vector2I) -> Vector2I {
pub fn scale_xy(self, factors: Vector2I) -> Vector2I {
Vector2I(self.0 * factors.0)
}
#[inline]
pub fn to_f32(&self) -> Vector2F {
Vector2F(self.0.to_f32x4())
pub fn to_f32(self) -> Vector2F {
Vector2F(self.0.to_f32x2())
}
}
@ -257,8 +256,7 @@ impl Sub<Vector2I> for Vector2I {
impl PartialEq for Vector2I {
#[inline]
fn eq(&self, other: &Vector2I) -> bool {
let results = self.0.packed_eq(other.0);
results[0] != 0 && results[1] != 0
self.0.packed_eq(other.0).is_all_ones()
}
}
@ -279,7 +277,7 @@ impl Vector4F {
#[inline]
pub fn to_2d(self) -> Vector2F {
Vector2F(self.0)
Vector2F(self.0.xy())
}
#[inline]
@ -303,7 +301,7 @@ impl Vector4F {
}
#[inline]
pub fn scale(&self, x: f32) -> Vector4F {
pub fn scale(self, x: f32) -> Vector4F {
let mut factors = F32x4::splat(x);
factors[3] = 1.0;
Vector4F(self.0 * factors)
@ -335,7 +333,7 @@ impl Vector4F {
}
#[inline]
pub fn approx_eq(&self, other: &Vector4F, epsilon: f32) -> bool {
pub fn approx_eq(self, other: Vector4F, epsilon: f32) -> bool {
self.0.approx_eq(other.0, epsilon)
}

View File

@ -16,7 +16,7 @@ use pathfinder_content::color::ColorF;
use pathfinder_geometry::rect::RectI;
use pathfinder_geometry::transform3d::Transform3DF;
use pathfinder_geometry::vector::Vector2I;
use pathfinder_simd::default::F32x4;
use pathfinder_simd::default::{F32x2, F32x4};
use std::time::Duration;
pub mod resources;
@ -153,7 +153,7 @@ pub enum ShaderKind {
pub enum UniformData {
Int(i32),
Mat4([F32x4; 4]),
Vec2(F32x4),
Vec2(F32x2),
Vec4(F32x4),
TextureUnit(u32),
}

View File

@ -47,7 +47,7 @@ use pathfinder_gpu::{BlendState, BufferData, BufferTarget, BufferUploadMode, Dep
use pathfinder_gpu::{Primitive, RenderState, RenderTarget, ShaderKind, StencilFunc, TextureData};
use pathfinder_gpu::{TextureFormat, UniformData, VertexAttrClass};
use pathfinder_gpu::{VertexAttrDescriptor, VertexAttrType};
use pathfinder_simd::default::F32x4;
use pathfinder_simd::default::{F32x2, F32x4};
use std::cell::{Cell, RefCell};
use std::mem;
use std::ptr;
@ -1146,7 +1146,7 @@ impl UniformDataExt for UniformData {
Some(slice::from_raw_parts(&data[0] as *const F32x4 as *const u8, 4 * 16))
}
UniformData::Vec2(ref data) => {
Some(slice::from_raw_parts(data as *const F32x4 as *const u8, 4 * 2))
Some(slice::from_raw_parts(data as *const F32x2 as *const u8, 4 * 2))
}
UniformData::Vec4(ref data) => {
Some(slice::from_raw_parts(data as *const F32x4 as *const u8, 4 * 4))

View File

@ -160,7 +160,7 @@ impl BuiltObject {
fn add_fill(
&mut self,
builder: &SceneBuilder,
segment: &LineSegment2F,
segment: LineSegment2F,
tile_coords: Vector2I,
) {
debug!("add_fill({:?} ({:?}))", segment, tile_coords);
@ -171,31 +171,19 @@ impl BuiltObject {
};
debug_assert_eq!(TILE_WIDTH, TILE_HEIGHT);
// Compute the upper left corner of the tile.
let tile_size = F32x4::splat(TILE_WIDTH as f32);
let (min, max) = (
F32x4::default(),
F32x4::splat((TILE_WIDTH * 256 - 1) as f32),
);
let shuffle_mask = I32x4::new(0x0c08_0400, 0x0d05_0901, 0, 0).as_u8x16();
let tile_upper_left = tile_coords.to_f32().0.xyxy() * tile_size;
let tile_upper_left = tile_coords.to_f32().0.to_f32x4().xyxy() * tile_size;
// Convert to 4.8 fixed point.
let segment = (segment.0 - tile_upper_left) * F32x4::splat(256.0);
let segment = segment
.clamp(min, max)
.to_i32x4()
.as_u8x16()
.shuffle(shuffle_mask)
.as_i32x4();
// Unpack whole and fractional pixels.
let px = LineSegmentU4((segment[1] | (segment[1] >> 12)) as u16);
let subpx = LineSegmentU8(segment[0] as u32);
let (min, max) = (F32x4::default(), F32x4::splat((TILE_WIDTH * 256 - 1) as f32));
let segment = segment.clamp(min, max).to_i32x4();
let (from_x, from_y, to_x, to_y) = (segment[0], segment[1], segment[2], segment[3]);
// Cull degenerate fills.
if (px.0 & 0xf) as u8 == ((px.0 >> 8) & 0xf) as u8
&& (subpx.0 & 0xff) as u8 == ((subpx.0 >> 16) & 0xff) as u8
{
if from_x == to_x {
debug!("... culling!");
return;
}
@ -203,10 +191,20 @@ impl BuiltObject {
// Allocate global tile if necessary.
let alpha_tile_index = self.get_or_allocate_alpha_tile_index(builder, tile_coords);
// Pack whole pixels.
let mut px = (segment & I32x4::splat(0xf00)) >> I32x4::new(8, 4, 8, 4);
px = px | px.yxwz();
// Pack instance data.
debug!("... OK, pushing");
self.fills.push(FillBatchPrimitive {
px,
subpx,
px: LineSegmentU4 { from: px[0] as u8, to: px[2] as u8 },
subpx: LineSegmentU8 {
from_x: from_x as u8,
from_y: from_y as u8,
to_x: to_x as u8,
to_y: to_y as u8,
},
alpha_tile_index,
});
}
@ -256,7 +254,7 @@ impl BuiltObject {
);
while winding != 0 {
self.add_fill(builder, &segment, tile_coords);
self.add_fill(builder, segment, tile_coords);
if winding < 0 {
winding += 1
} else {
@ -315,7 +313,7 @@ impl BuiltObject {
let fill_segment = LineSegment2F::new(fill_from, fill_to);
let fill_tile_coords = Vector2I::new(subsegment_tile_x, tile_y);
self.add_fill(builder, &fill_segment, fill_tile_coords);
self.add_fill(builder, fill_segment, fill_tile_coords);
}
}

View File

@ -23,7 +23,7 @@ use pathfinder_gpu::{BlendState, BufferData, BufferTarget, BufferUploadMode, Cle
use pathfinder_gpu::{DepthFunc, DepthState, Device, Primitive, RenderOptions, RenderState};
use pathfinder_gpu::{RenderTarget, StencilFunc, StencilState, TextureFormat, UniformData};
use pathfinder_gpu::{VertexAttrClass, VertexAttrDescriptor, VertexAttrType};
use pathfinder_simd::default::{F32x4, I32x4};
use pathfinder_simd::default::{F32x2, F32x4};
use std::cmp;
use std::collections::VecDeque;
use std::mem;
@ -447,15 +447,10 @@ where
textures: &[&self.area_lut_texture],
uniforms: &[
(&self.fill_program.framebuffer_size_uniform,
UniformData::Vec2(I32x4::new(MASK_FRAMEBUFFER_WIDTH,
MASK_FRAMEBUFFER_HEIGHT,
0,
0).to_f32x4())),
UniformData::Vec2(F32x2::new(MASK_FRAMEBUFFER_WIDTH as f32,
MASK_FRAMEBUFFER_HEIGHT as f32))),
(&self.fill_program.tile_size_uniform,
UniformData::Vec2(I32x4::new(TILE_WIDTH as i32,
TILE_HEIGHT as i32,
0,
0).to_f32x4())),
UniformData::Vec2(F32x2::new(TILE_WIDTH as f32, TILE_HEIGHT as f32))),
(&self.fill_program.area_lut_uniform, UniformData::TextureUnit(0)),
],
viewport: self.mask_viewport(),
@ -475,7 +470,7 @@ where
fn tile_transform(&self) -> Transform3DF {
let draw_viewport = self.draw_viewport().size().to_f32();
let scale = F32x4::new(2.0 / draw_viewport.x(), -2.0 / draw_viewport.y(), 1.0, 1.0);
let scale = F32x2::new(2.0 / draw_viewport.x(), -2.0 / draw_viewport.y());
let transform = Transform3DF::from_scale(scale.x(), scale.y(), 1.0);
Transform3DF::from_translation(-1.0, 1.0, 0.0).post_mul(&transform)
}
@ -491,16 +486,11 @@ where
(&alpha_tile_program.transform_uniform,
UniformData::Mat4(self.tile_transform().to_columns())),
(&alpha_tile_program.tile_size_uniform,
UniformData::Vec2(I32x4::new(TILE_WIDTH as i32,
TILE_HEIGHT as i32,
0,
0).to_f32x4())),
UniformData::Vec2(F32x2::new(TILE_WIDTH as f32, TILE_HEIGHT as f32))),
(&alpha_tile_program.stencil_texture_uniform, UniformData::TextureUnit(0)),
(&alpha_tile_program.stencil_texture_size_uniform,
UniformData::Vec2(I32x4::new(MASK_FRAMEBUFFER_WIDTH,
MASK_FRAMEBUFFER_HEIGHT,
0,
0).to_f32x4())),
UniformData::Vec2(F32x2::new(MASK_FRAMEBUFFER_WIDTH as f32,
MASK_FRAMEBUFFER_HEIGHT as f32))),
];
match self.render_mode {
@ -513,7 +503,7 @@ where
UniformData::Vec2(self.device
.texture_size(paint_texture)
.0
.to_f32x4())));
.to_f32x2())));
}
RenderMode::Monochrome { .. } if self.postprocessing_needed() => {
uniforms.push((&self.alpha_monochrome_tile_program.color_uniform,
@ -555,10 +545,7 @@ where
(&solid_tile_program.transform_uniform,
UniformData::Mat4(self.tile_transform().to_columns())),
(&solid_tile_program.tile_size_uniform,
UniformData::Vec2(I32x4::new(TILE_WIDTH as i32,
TILE_HEIGHT as i32,
0,
0).to_f32x4())),
UniformData::Vec2(F32x2::new(TILE_WIDTH as f32, TILE_HEIGHT as f32))),
];
match self.render_mode {
@ -571,7 +558,7 @@ where
UniformData::Vec2(self.device
.texture_size(paint_texture)
.0
.to_f32x4())));
.to_f32x2())));
}
RenderMode::Monochrome { .. } if self.postprocessing_needed() => {
uniforms.push((&self.solid_monochrome_tile_program.color_uniform,
@ -636,7 +623,7 @@ where
UniformData::Vec2(main_viewport.size().to_f32().0)),
(&self.postprocess_program.source_uniform, UniformData::TextureUnit(0)),
(&self.postprocess_program.source_size_uniform,
UniformData::Vec2(source_texture_size.0.to_f32x4())),
UniformData::Vec2(source_texture_size.0.to_f32x2())),
(&self.postprocess_program.gamma_lut_uniform, UniformData::TextureUnit(1)),
(&self.postprocess_program.fg_color_uniform, UniformData::Vec4(fg_color.0)),
(&self.postprocess_program.bg_color_uniform, UniformData::Vec4(bg_color.0)),

View File

@ -44,15 +44,11 @@ impl<T> DenseTileMap<T> {
#[inline]
pub fn coords_to_index(&self, coords: Vector2I) -> Option<usize> {
// TODO(pcwalton): SIMD?
if coords.x() < self.rect.min_x()
|| coords.x() >= self.rect.max_x()
|| coords.y() < self.rect.min_y()
|| coords.y() >= self.rect.max_y()
{
return None;
if self.rect.contains_point(coords) {
Some(self.coords_to_index_unchecked(coords))
} else {
None
}
Some(self.coords_to_index_unchecked(coords))
}
#[inline]

View File

@ -413,14 +413,11 @@ impl ActiveEdge {
} else {
segment.baseline.to()
};
ActiveEdge::from_segment_and_crossing(segment, &crossing)
ActiveEdge::from_segment_and_crossing(segment, crossing)
}
fn from_segment_and_crossing(segment: &Segment, crossing: &Vector2F) -> ActiveEdge {
ActiveEdge {
segment: *segment,
crossing: *crossing,
}
fn from_segment_and_crossing(segment: &Segment, crossing: Vector2F) -> ActiveEdge {
ActiveEdge { segment: *segment, crossing }
}
fn process(&mut self, builder: &SceneBuilder, built_object: &mut BuiltObject, tile_y: i32) {
@ -436,8 +433,8 @@ impl ActiveEdge {
if segment.is_line() {
let line_segment = segment.as_line_segment();
self.segment =
match self.process_line_segment(&line_segment, builder, built_object, tile_y) {
Some(lower_part) => Segment::line(&lower_part),
match self.process_line_segment(line_segment, builder, built_object, tile_y) {
Some(lower_part) => Segment::line(lower_part),
None => Segment::none(),
};
return;
@ -453,7 +450,7 @@ impl ActiveEdge {
let first_line_segment =
LineSegment2F::new(self.crossing, segment.baseline.upper_point()).orient(winding);
if self
.process_line_segment(&first_line_segment, builder, built_object, tile_y)
.process_line_segment(first_line_segment, builder, built_object, tile_y)
.is_some()
{
return;
@ -484,9 +481,9 @@ impl ActiveEdge {
);
let line = before_segment.baseline.orient(winding);
match self.process_line_segment(&line, builder, built_object, tile_y) {
Some(ref lower_part) if split_t == 1.0 => {
self.segment = Segment::line(&lower_part);
match self.process_line_segment(line, builder, built_object, tile_y) {
Some(lower_part) if split_t == 1.0 => {
self.segment = Segment::line(lower_part);
return;
}
None if split_t == 1.0 => {
@ -504,7 +501,7 @@ impl ActiveEdge {
fn process_line_segment(
&mut self,
line_segment: &LineSegment2F,
line_segment: LineSegment2F,
builder: &SceneBuilder,
built_object: &mut BuiltObject,
tile_y: i32,
@ -516,7 +513,7 @@ impl ActiveEdge {
);
if line_segment.max_y() <= tile_bottom {
built_object.generate_fill_primitives_for_line(builder, *line_segment, tile_y);
built_object.generate_fill_primitives_for_line(builder, line_segment, tile_y);
return None;
}

View File

@ -8,17 +8,198 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use std::arch::aarch64::{self, float32x4_t, int32x4_t, uint32x4_t, uint64x2_t, uint8x16_t};
use std::arch::aarch64::{uint8x8_t, uint8x8x2_t};
use std::arch::aarch64::{self, float32x2_t, float32x4_t, int32x2_t, int32x4_t};
use std::arch::aarch64::{uint32x2_t, uint32x4_t};
use std::f32;
use std::fmt::{self, Debug, Formatter};
use std::mem;
use std::ops::{Add, Index, IndexMut, Mul, Sub};
use std::ops::{Add, BitAnd, BitOr, Index, IndexMut, Mul, Shr, Sub};
mod swizzle_f32x4;
mod swizzle_i32x4;
// 32-bit floats
// Two 32-bit floats
#[derive(Clone, Copy)]
pub struct F32x2(pub float32x2_t);
impl F32x2 {
// Constructors
#[inline]
pub fn new(a: f32, b: f32) -> F32x2 {
unsafe { F32x2(mem::transmute([a, b])) }
}
#[inline]
pub fn splat(x: f32) -> F32x2 {
F32x2::new(x, x)
}
// Basic operations
#[inline]
pub fn approx_recip(self) -> F32x2 {
unsafe { F32x2(vrecpe_v2f32(self.0)) }
}
#[inline]
pub fn min(self, other: F32x2) -> F32x2 {
unsafe { F32x2(simd_fmin(self.0, other.0)) }
}
#[inline]
pub fn max(self, other: F32x2) -> F32x2 {
unsafe { F32x2(simd_fmax(self.0, other.0)) }
}
#[inline]
pub fn clamp(self, min: F32x2, max: F32x2) -> F32x2 {
self.max(min).min(max)
}
#[inline]
pub fn abs(self) -> F32x2 {
unsafe { F32x2(fabs_v2f32(self.0)) }
}
#[inline]
pub fn floor(self) -> F32x2 {
unsafe { F32x2(floor_v2f32(self.0)) }
}
#[inline]
pub fn ceil(self) -> F32x2 {
unsafe { F32x2(ceil_v2f32(self.0)) }
}
#[inline]
pub fn round(self) -> F32x2 {
unsafe { F32x2(round_v2f32(self.0)) }
}
#[inline]
pub fn sqrt(self) -> F32x2 {
unsafe { F32x2(sqrt_v2f32(self.0)) }
}
// Packed comparisons
#[inline]
pub fn packed_eq(self, other: F32x2) -> U32x2 {
unsafe { U32x2(simd_eq(self.0, other.0)) }
}
#[inline]
pub fn packed_gt(self, other: F32x2) -> U32x2 {
unsafe { U32x2(simd_gt(self.0, other.0)) }
}
#[inline]
pub fn packed_lt(self, other: F32x2) -> U32x2 {
unsafe { U32x2(simd_lt(self.0, other.0)) }
}
#[inline]
pub fn packed_le(self, other: F32x2) -> U32x2 {
unsafe { U32x2(simd_le(self.0, other.0)) }
}
// Conversions
#[inline]
pub fn to_f32x4(self) -> F32x4 {
self.concat_xy_xy(F32x2::default())
}
#[inline]
pub fn to_i32x2(self) -> I32x2 {
unsafe { I32x2(simd_cast(self.0)) }
}
#[inline]
pub fn to_i32x4(self) -> I32x4 {
self.to_i32x2().concat_xy_xy(I32x2::default())
}
// Swizzle
#[inline]
pub fn yx(self) -> F32x2 {
unsafe { F32x2(simd_shuffle2(self.0, self.0, [1, 0])) }
}
// Concatenations
#[inline]
pub fn concat_xy_xy(self, other: F32x2) -> F32x4 {
unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 0, 1])) }
}
}
impl Default for F32x2 {
#[inline]
fn default() -> F32x2 {
F32x2::new(0.0, 0.0)
}
}
impl Debug for F32x2 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "<{}, {}>", self[0], self[1])
}
}
impl Index<usize> for F32x2 {
type Output = f32;
#[inline]
fn index(&self, index: usize) -> &f32 {
unsafe {
assert!(index < 2);
let ptr = &self.0 as *const float32x2_t as *const f32;
mem::transmute::<*const f32, &f32>(ptr.offset(index as isize))
}
}
}
impl IndexMut<usize> for F32x2 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut f32 {
unsafe {
assert!(index < 2);
let ptr = &mut self.0 as *mut float32x2_t as *mut f32;
mem::transmute::<*mut f32, &mut f32>(ptr.offset(index as isize))
}
}
}
impl Add<F32x2> for F32x2 {
type Output = F32x2;
#[inline]
fn add(self, other: F32x2) -> F32x2 {
unsafe { F32x2(simd_add(self.0, other.0)) }
}
}
impl Mul<F32x2> for F32x2 {
type Output = F32x2;
#[inline]
fn mul(self, other: F32x2) -> F32x2 {
unsafe { F32x2(simd_mul(self.0, other.0)) }
}
}
impl Sub<F32x2> for F32x2 {
type Output = F32x2;
#[inline]
fn sub(self, other: F32x2) -> F32x2 {
unsafe { F32x2(simd_sub(self.0, other.0)) }
}
}
// Four 32-bit floats
#[derive(Clone, Copy)]
pub struct F32x4(pub float32x4_t);
@ -103,32 +284,56 @@ impl F32x4 {
unsafe { U32x4(simd_lt(self.0, other.0)) }
}
// Converts these packed floats to integers.
// Swizzle conversions
#[inline]
pub fn to_i32x4(self) -> I32x4 {
unsafe { I32x4(simd_cast(self.0)) }
pub fn xy(self) -> F32x2 {
unsafe { F32x2(simd_shuffle2(self.0, self.0, [0, 1])) }
}
#[inline]
pub fn yx(self) -> F32x2 {
unsafe { F32x2(simd_shuffle2(self.0, self.0, [1, 0])) }
}
#[inline]
pub fn xw(self) -> F32x2 {
unsafe { F32x2(simd_shuffle2(self.0, self.0, [0, 3])) }
}
#[inline]
pub fn zy(self) -> F32x2 {
unsafe { F32x2(simd_shuffle2(self.0, self.0, [2, 1])) }
}
#[inline]
pub fn zw(self) -> F32x2 {
unsafe { F32x2(simd_shuffle2(self.0, self.0, [2, 3])) }
}
// Concatenations
#[inline]
pub fn concat_xy_xy(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 4, 5])) }
unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 0, 1])) }
}
#[inline]
pub fn concat_xy_zw(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 6, 7])) }
unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 2, 3])) }
}
#[inline]
pub fn concat_zw_zw(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_shuffle4(self.0, other.0, [2, 3, 6, 7])) }
unsafe { F32x4(simd_shuffle4(self.0, other.0, [2, 3, 2, 3])) }
}
// Conversions
// Converts these packed floats to integers.
#[inline]
pub fn concat_wz_yx(self, other: F32x4) -> F32x4 {
unsafe { F32x4(simd_shuffle4(self.0, other.0, [3, 2, 5, 4])) }
pub fn to_i32x4(self) -> I32x4 {
unsafe { I32x4(simd_cast(self.0)) }
}
}
@ -200,7 +405,105 @@ impl Sub<F32x4> for F32x4 {
}
}
// 32-bit signed integers
// Two 32-bit signed integers
#[derive(Clone, Copy, Debug)]
pub struct I32x2(pub int32x2_t);
impl I32x2 {
#[inline]
pub fn new(x: i32, y: i32) -> I32x2 {
unsafe { I32x2(mem::transmute([x, y])) }
}
#[inline]
pub fn splat(x: i32) -> I32x2 {
I32x2::new(x, x)
}
#[inline]
pub fn packed_eq(self, other: I32x2) -> U32x2 {
unsafe { U32x2(simd_eq(self.0, other.0)) }
}
// Concatenations
#[inline]
pub fn concat_xy_xy(self, other: I32x2) -> I32x4 {
unsafe { I32x4(simd_shuffle4(self.0, other.0, [0, 1, 0, 1])) }
}
// Conversions
/// Converts these packed integers to floats.
#[inline]
pub fn to_f32x2(self) -> F32x2 {
unsafe { F32x2(simd_cast(self.0)) }
}
}
impl Default for I32x2 {
#[inline]
fn default() -> I32x2 {
I32x2::splat(0)
}
}
impl PartialEq for I32x2 {
#[inline]
fn eq(&self, other: &I32x2) -> bool {
self.packed_eq(*other).is_all_ones()
}
}
impl Index<usize> for I32x2 {
type Output = i32;
#[inline]
fn index(&self, index: usize) -> &i32 {
unsafe {
assert!(index < 2);
let ptr = &self.0 as *const int32x2_t as *const i32;
mem::transmute::<*const i32, &i32>(ptr.offset(index as isize))
}
}
}
impl IndexMut<usize> for I32x2 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut i32 {
unsafe {
assert!(index < 2);
let ptr = &mut self.0 as *mut int32x2_t as *mut i32;
mem::transmute::<*mut i32, &mut i32>(ptr.offset(index as isize))
}
}
}
impl Add<I32x2> for I32x2 {
type Output = I32x2;
#[inline]
fn add(self, other: I32x2) -> I32x2 {
unsafe { I32x2(simd_add(self.0, other.0)) }
}
}
impl Sub<I32x2> for I32x2 {
type Output = I32x2;
#[inline]
fn sub(self, other: I32x2) -> I32x2 {
unsafe { I32x2(simd_sub(self.0, other.0)) }
}
}
impl Mul<I32x2> for I32x2 {
type Output = I32x2;
#[inline]
fn mul(self, other: I32x2) -> I32x2 {
unsafe { I32x2(simd_mul(self.0, other.0)) }
}
}
// Four 32-bit signed integers
#[derive(Clone, Copy, Debug)]
pub struct I32x4(pub int32x4_t);
@ -216,11 +519,6 @@ impl I32x4 {
I32x4::new(x, x, x, x)
}
#[inline]
pub fn as_u8x16(self) -> U8x16 {
unsafe { U8x16(*mem::transmute::<&int32x4_t, &uint8x16_t>(&self.0)) }
}
#[inline]
pub fn min(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_fmin(self.0, other.0)) }
@ -245,6 +543,33 @@ impl I32x4 {
unsafe { I32x4(simd_shuffle4(self.0, other.0, [0, 1, 4, 5])) }
}
// Swizzle conversions
#[inline]
pub fn xy(self) -> I32x2 {
unsafe { I32x2(simd_shuffle2(self.0, self.0, [0, 1])) }
}
#[inline]
pub fn yx(self) -> I32x2 {
unsafe { I32x2(simd_shuffle2(self.0, self.0, [1, 0])) }
}
#[inline]
pub fn xw(self) -> I32x2 {
unsafe { I32x2(simd_shuffle2(self.0, self.0, [0, 3])) }
}
#[inline]
pub fn zy(self) -> I32x2 {
unsafe { I32x2(simd_shuffle2(self.0, self.0, [2, 1])) }
}
#[inline]
pub fn zw(self) -> I32x2 {
unsafe { I32x2(simd_shuffle2(self.0, self.0, [2, 3])) }
}
// Conversions
/// Converts these packed integers to floats.
@ -315,7 +640,60 @@ impl PartialEq for I32x4 {
}
}
// 32-bit unsigned integers
impl BitAnd<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn bitand(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_and(self.0, other.0)) }
}
}
impl BitOr<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn bitor(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_or(self.0, other.0)) }
}
}
impl Shr<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn shr(self, other: I32x4) -> I32x4 {
unsafe { I32x4(simd_shr(self.0, other.0)) }
}
}
// Two 32-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U32x2(pub uint32x2_t);
impl U32x2 {
#[inline]
pub fn is_all_ones(&self) -> bool {
unsafe { aarch64::vminv_u32(self.0) == !0 }
}
#[inline]
pub fn is_all_zeroes(&self) -> bool {
unsafe { aarch64::vmaxv_u32(self.0) == 0 }
}
}
impl Index<usize> for U32x2 {
type Output = u32;
#[inline]
fn index(&self, index: usize) -> &u32 {
unsafe {
assert!(index < 2);
let ptr = &self.0 as *const uint32x2_t as *const u32;
mem::transmute::<*const u32, &u32>(ptr.offset(index as isize))
}
}
}
// Four 32-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U32x4(pub uint32x4_t);
@ -344,44 +722,6 @@ impl Index<usize> for U32x4 {
}
}
// 8-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U8x16(pub uint8x16_t);
impl U8x16 {
#[inline]
pub fn as_i32x4(self) -> I32x4 {
unsafe { I32x4(*mem::transmute::<&uint8x16_t, &int32x4_t>(&self.0)) }
}
#[inline]
pub fn shuffle(self, indices: U8x16) -> U8x16 {
unsafe {
let table = mem::transmute::<uint8x16_t, uint8x8x2_t>(self.0);
let low = aarch64::vtbl2_u8(table, indices.extract_low());
let high = aarch64::vtbl2_u8(table, indices.extract_high());
U8x16(aarch64::vcombine_u8(low, high))
}
}
#[inline]
fn extract_low(self) -> uint8x8_t {
unsafe {
let low = simd_extract(mem::transmute::<uint8x16_t, uint64x2_t>(self.0), 0);
mem::transmute::<u64, uint8x8_t>(low)
}
}
#[inline]
fn extract_high(self) -> uint8x8_t {
unsafe {
let high = simd_extract(mem::transmute::<uint8x16_t, uint64x2_t>(self.0), 1);
mem::transmute::<u64, uint8x8_t>(high)
}
}
}
// Intrinsics
extern "platform-intrinsic" {
@ -389,6 +729,11 @@ extern "platform-intrinsic" {
fn simd_mul<T>(x: T, y: T) -> T;
fn simd_sub<T>(x: T, y: T) -> T;
fn simd_shr<T>(x: T, y: T) -> T;
fn simd_and<T>(x: T, y: T) -> T;
fn simd_or<T>(x: T, y: T) -> T;
fn simd_fmin<T>(x: T, y: T) -> T;
fn simd_fmax<T>(x: T, y: T) -> T;
@ -397,15 +742,24 @@ extern "platform-intrinsic" {
fn simd_le<T, U>(x: T, y: T) -> U;
fn simd_lt<T, U>(x: T, y: T) -> U;
fn simd_shuffle2<T, U>(x: T, y: T, idx: [u32; 2]) -> U;
fn simd_shuffle4<T, U>(x: T, y: T, idx: [u32; 4]) -> U;
fn simd_cast<T, U>(x: T) -> U;
fn simd_insert<T, U>(x: T, index: u32, value: U) -> T;
fn simd_extract<T, U>(x: T, index: u32) -> U;
}
extern "C" {
#[link_name = "llvm.fabs.v2f32"]
fn fabs_v2f32(a: float32x2_t) -> float32x2_t;
#[link_name = "llvm.floor.v2f32"]
fn floor_v2f32(a: float32x2_t) -> float32x2_t;
#[link_name = "llvm.ceil.v2f32"]
fn ceil_v2f32(a: float32x2_t) -> float32x2_t;
#[link_name = "llvm.round.v2f32"]
fn round_v2f32(a: float32x2_t) -> float32x2_t;
#[link_name = "llvm.sqrt.v2f32"]
fn sqrt_v2f32(a: float32x2_t) -> float32x2_t;
#[link_name = "llvm.fabs.v4f32"]
fn fabs_v4f32(a: float32x4_t) -> float32x4_t;
#[link_name = "llvm.floor.v4f32"]
@ -417,6 +771,9 @@ extern "C" {
#[link_name = "llvm.sqrt.v4f32"]
fn sqrt_v4f32(a: float32x4_t) -> float32x4_t;
#[link_name = "llvm.aarch64.neon.frecpe.v2f32"]
fn vrecpe_v2f32(a: float32x2_t) -> float32x2_t;
#[link_name = "llvm.aarch64.neon.frecpe.v4f32"]
fn vrecpe_v4f32(a: float32x4_t) -> float32x4_t;
}

View File

@ -8,10 +8,84 @@
// option. This file may not be copied, modified, or distributed
// except according to those terms.
use crate::default::{F32x4, I32x4};
use crate::default::{F32x2, F32x4, I32x2, I32x4};
use std::ops::{AddAssign, MulAssign, Neg, SubAssign};
// 32-bit floats
// Two 32-bit floats
impl F32x2 {
// Constructors
#[inline]
pub fn from_slice(slice: &[f32]) -> F32x2 {
F32x2::new(slice[0], slice[1])
}
// Accessors
#[inline]
pub fn x(self) -> f32 {
self[0]
}
#[inline]
pub fn y(self) -> f32 {
self[1]
}
// Mutators
#[inline]
pub fn set_x(&mut self, x: f32) {
self[0] = x
}
#[inline]
pub fn set_y(&mut self, y: f32) {
self[1] = y
}
// Comparisons
#[inline]
pub fn approx_eq(self, other: F32x2, epsilon: f32) -> bool {
(self - other)
.abs()
.packed_gt(F32x2::splat(epsilon))
.is_all_zeroes()
}
}
impl AddAssign for F32x2 {
#[inline]
fn add_assign(&mut self, other: F32x2) {
*self = *self + other
}
}
impl SubAssign for F32x2 {
#[inline]
fn sub_assign(&mut self, other: F32x2) {
*self = *self - other
}
}
impl MulAssign for F32x2 {
#[inline]
fn mul_assign(&mut self, other: F32x2) {
*self = *self * other
}
}
impl Neg for F32x2 {
type Output = F32x2;
#[inline]
fn neg(self) -> F32x2 {
F32x2::default() - self
}
}
// Four 32-bit floats
impl F32x4 {
// Constructors
@ -105,7 +179,38 @@ impl Neg for F32x4 {
}
}
// 32-bit integers
// Two 32-bit integers
impl AddAssign for I32x2 {
#[inline]
fn add_assign(&mut self, other: I32x2) {
*self = *self + other
}
}
impl SubAssign for I32x2 {
#[inline]
fn sub_assign(&mut self, other: I32x2) {
*self = *self - other
}
}
impl MulAssign for I32x2 {
#[inline]
fn mul_assign(&mut self, other: I32x2) {
*self = *self * other
}
}
impl Neg for I32x2 {
type Output = I32x2;
#[inline]
fn neg(self) -> I32x2 {
I32x2::default() - self
}
}
// Four 32-bit integers
impl AddAssign for I32x4 {
#[inline]

View File

@ -10,13 +10,182 @@
use std::f32;
use std::fmt::{self, Debug, Formatter};
use std::mem;
use std::ops::{Add, Index, IndexMut, Mul, Sub};
use std::ops::{Add, BitAnd, BitOr, Index, IndexMut, Mul, Shr, Sub};
mod swizzle_f32x4;
mod swizzle_i32x4;
// 32-bit floats
// Two 32-bit floats
#[derive(Clone, Copy, Debug, Default, PartialEq)]
pub struct F32x2(pub [f32; 2]);
impl F32x2 {
// Constructors
#[inline]
pub fn new(a: f32, b: f32) -> F32x2 {
F32x2([a, b])
}
#[inline]
pub fn splat(x: f32) -> F32x2 {
F32x2([x, x])
}
// Basic operations
#[inline]
pub fn approx_recip(self) -> F32x2 {
F32x2([1.0 / self[0], 1.0 / self[1]])
}
#[inline]
pub fn min(self, other: F32x2) -> F32x2 {
F32x2([f32::min(self[0], other[0]), f32::min(self[1], other[1])])
}
#[inline]
pub fn max(self, other: F32x2) -> F32x2 {
F32x2([f32::max(self[0], other[0]), f32::max(self[1], other[1])])
}
#[inline]
pub fn clamp(self, min: F32x2, max: F32x2) -> F32x2 {
self.max(min).min(max)
}
#[inline]
pub fn abs(self) -> F32x2 {
F32x2([self[0].abs(), self[1].abs()])
}
#[inline]
pub fn floor(self) -> F32x2 {
F32x2([self[0].floor(), self[1].floor()])
}
#[inline]
pub fn ceil(self) -> F32x2 {
F32x2([self[0].ceil(), self[1].ceil()])
}
#[inline]
pub fn round(self) -> F32x2 {
F32x2([self[0].round(), self[1].round()])
}
#[inline]
pub fn sqrt(self) -> F32x2 {
F32x2([self[0].sqrt(), self[1].sqrt()])
}
// Packed comparisons
#[inline]
pub fn packed_eq(self, other: F32x2) -> U32x2 {
U32x2([
if self[0] == other[0] { !0 } else { 0 },
if self[1] == other[1] { !0 } else { 0 },
])
}
#[inline]
pub fn packed_gt(self, other: F32x2) -> U32x2 {
U32x2([
if self[0] > other[0] { !0 } else { 0 },
if self[1] > other[1] { !0 } else { 0 },
])
}
#[inline]
pub fn packed_lt(self, other: F32x2) -> U32x2 {
U32x2([
if self[0] < other[0] { !0 } else { 0 },
if self[1] < other[1] { !0 } else { 0 },
])
}
#[inline]
pub fn packed_le(self, other: F32x2) -> U32x2 {
U32x2([
if self[0] <= other[0] { !0 } else { 0 },
if self[1] <= other[1] { !0 } else { 0 },
])
}
// Conversions
#[inline]
pub fn to_f32x4(self) -> F32x4 {
F32x4([self[0] as f32, self[1] as f32, 0.0, 0.0])
}
#[inline]
pub fn to_i32x2(self) -> I32x2 {
I32x2([self[0] as i32, self[1] as i32])
}
#[inline]
pub fn to_i32x4(self) -> I32x4 {
I32x4([self[0] as i32, self[1] as i32, 0, 0])
}
// Swizzle
#[inline]
pub fn yx(self) -> F32x2 {
F32x2([self[1], self[0]])
}
// Concatenations
#[inline]
pub fn concat_xy_xy(self, other: F32x2) -> F32x4 {
F32x4([self[0], self[1], other[0], other[1]])
}
}
impl Index<usize> for F32x2 {
type Output = f32;
#[inline]
fn index(&self, index: usize) -> &f32 {
&self.0[index]
}
}
impl IndexMut<usize> for F32x2 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut f32 {
&mut self.0[index]
}
}
impl Add<F32x2> for F32x2 {
type Output = F32x2;
#[inline]
fn add(self, other: F32x2) -> F32x2 {
F32x2([self[0] + other[0], self[1] + other[1]])
}
}
impl Mul<F32x2> for F32x2 {
type Output = F32x2;
#[inline]
fn mul(self, other: F32x2) -> F32x2 {
F32x2([self[0] * other[0], self[1] * other[1]])
}
}
impl Sub<F32x2> for F32x2 {
type Output = F32x2;
#[inline]
fn sub(self, other: F32x2) -> F32x2 {
F32x2([self[0] - other[0], self[1] - other[1]])
}
}
// Four 32-bit floats
#[derive(Clone, Copy, Default, PartialEq)]
pub struct F32x4(pub [f32; 4]);
@ -162,6 +331,33 @@ impl F32x4 {
])
}
// Swizzle conversions
#[inline]
pub fn xy(self) -> F32x2 {
F32x2([self[0], self[1]])
}
#[inline]
pub fn xw(self) -> F32x2 {
F32x2([self[0], self[3]])
}
#[inline]
pub fn yx(self) -> F32x2 {
F32x2([self[1], self[0]])
}
#[inline]
pub fn zy(self) -> F32x2 {
F32x2([self[2], self[1]])
}
#[inline]
pub fn zw(self) -> F32x2 {
F32x2([self[2], self[3]])
}
// Concatenations
#[inline]
@ -246,7 +442,84 @@ impl Sub<F32x4> for F32x4 {
}
}
// 32-bit signed integers
// Two 32-bit signed integers
#[derive(Clone, Copy, Default, Debug, PartialEq)]
pub struct I32x2([i32; 2]);
impl I32x2 {
#[inline]
pub fn new(x: i32, y: i32) -> I32x2 {
I32x2([x, y])
}
#[inline]
pub fn splat(x: i32) -> I32x2 {
I32x2([x, x])
}
#[inline]
pub fn packed_eq(self, other: I32x2) -> U32x2 {
U32x2([
if self[0] == other[0] { !0 } else { 0 },
if self[1] == other[1] { !0 } else { 0 },
])
}
#[inline]
pub fn concat_xy_xy(self, other: I32x2) -> I32x4 {
I32x4([self[0], self[1], other[0], other[1]])
}
// Conversions
/// Converts these packed integers to floats.
#[inline]
pub fn to_f32x2(self) -> F32x2 {
F32x2([self[0] as f32, self[1] as f32])
}
}
impl Index<usize> for I32x2 {
type Output = i32;
#[inline]
fn index(&self, index: usize) -> &i32 {
&self.0[index]
}
}
impl IndexMut<usize> for I32x2 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut i32 {
&mut self.0[index]
}
}
impl Add<I32x2> for I32x2 {
type Output = I32x2;
#[inline]
fn add(self, other: I32x2) -> I32x2 {
I32x2([self[0] + other[0], self[1] + other[1]])
}
}
impl Sub<I32x2> for I32x2 {
type Output = I32x2;
#[inline]
fn sub(self, other: I32x2) -> I32x2 {
I32x2([self[0] - other[0], self[1] - other[1]])
}
}
impl Mul<I32x2> for I32x2 {
type Output = I32x2;
#[inline]
fn mul(self, other: I32x2) -> I32x2 {
I32x2([self[0] * other[0], self[1] * other[1]])
}
}
// Four 32-bit signed integers
#[derive(Clone, Copy, Default, Debug, PartialEq)]
pub struct I32x4([i32; 4]);
@ -263,10 +536,6 @@ impl I32x4 {
}
#[inline]
pub fn as_u8x16(self) -> U8x16 {
unsafe { U8x16(*mem::transmute::<&[i32; 4], &[u8; 16]>(&self.0)) }
}
#[inline]
pub fn min(self, other: I32x4) -> I32x4 {
I32x4([
@ -306,6 +575,28 @@ impl I32x4 {
I32x4([self[0], self[1], other[0], other[1]])
}
// Swizzle conversions
#[inline]
pub fn xy(self) -> I32x2 {
I32x2([self[0], self[1]])
}
#[inline]
pub fn xw(self) -> I32x2 {
I32x2([self[0], self[3]])
}
#[inline]
pub fn zy(self) -> I32x2 {
I32x2([self[2], self[1]])
}
#[inline]
pub fn zw(self) -> I32x2 {
I32x2([self[2], self[3]])
}
// Conversions
/// Converts these packed integers to floats.
@ -374,7 +665,61 @@ impl Mul<I32x4> for I32x4 {
}
}
// 32-bit unsigned integers
impl BitAnd<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn bitand(self, other: I32x4) -> I32x4 {
I32x4([self[0] & other[0], self[1] & other[1], self[2] & other[2], self[3] & other[3]])
}
}
impl BitOr<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn bitor(self, other: I32x4) -> I32x4 {
I32x4([self[0] | other[0], self[1] | other[1], self[2] | other[2], self[3] | other[3]])
}
}
impl Shr<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn shr(self, other: I32x4) -> I32x4 {
I32x4([
self[0] >> other[0],
self[1] >> other[1],
self[2] >> other[2],
self[3] >> other[3],
])
}
}
// Two 32-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U32x2(pub [u32; 2]);
impl U32x2 {
#[inline]
pub fn is_all_ones(&self) -> bool {
self[0] == !0 && self[1] == !0
}
#[inline]
pub fn is_all_zeroes(&self) -> bool {
self[0] == 0 && self[1] == 0
}
}
impl Index<usize> for U32x2 {
type Output = u32;
#[inline]
fn index(&self, index: usize) -> &u32 {
&self.0[index]
}
}
// Four 32-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U32x4(pub [u32; 4]);
@ -398,24 +743,3 @@ impl Index<usize> for U32x4 {
&self.0[index]
}
}
// 8-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U8x16([u8; 16]);
impl U8x16 {
#[inline]
pub fn as_i32x4(self) -> I32x4 {
unsafe { I32x4(*mem::transmute::<&[u8; 16], &[i32; 4]>(&self.0)) }
}
#[inline]
pub fn shuffle(self, indices: U8x16) -> U8x16 {
let mut result = [0; 16];
for index in 0..16 {
result[index] = self.0[(indices.0[index] & 0x0f) as usize]
}
U8x16(result)
}
}

View File

@ -12,12 +12,195 @@ use std::arch::x86_64::{self, __m128, __m128i, _MM_FROUND_TO_NEAREST_INT};
use std::cmp::PartialEq;
use std::fmt::{self, Debug, Formatter};
use std::mem;
use std::ops::{Add, BitXor, Index, IndexMut, Mul, Not, Sub};
use std::ops::{Add, BitAnd, BitOr, BitXor, Index, IndexMut, Mul, Not, Shr, Sub};
mod swizzle_f32x4;
mod swizzle_i32x4;
// 32-bit floats
// Two 32-bit floats
#[derive(Clone, Copy)]
pub struct F32x2(pub u64);
impl F32x2 {
// Constructors
#[inline]
pub fn new(a: f32, b: f32) -> F32x2 {
unsafe {
let a = mem::transmute::<*const f32, *const u32>(&a);
let b = mem::transmute::<*const f32, *const u32>(&b);
F32x2((*a as u64) | ((*b as u64) << 32))
}
}
#[inline]
pub fn splat(x: f32) -> F32x2 {
F32x2::new(x, x)
}
// Basic operations
#[inline]
pub fn approx_recip(self) -> F32x2 {
self.to_f32x4().approx_recip().xy()
}
#[inline]
pub fn min(self, other: F32x2) -> F32x2 {
self.to_f32x4().min(other.to_f32x4()).xy()
}
#[inline]
pub fn max(self, other: F32x2) -> F32x2 {
self.to_f32x4().max(other.to_f32x4()).xy()
}
#[inline]
pub fn clamp(self, min: F32x2, max: F32x2) -> F32x2 {
self.to_f32x4().clamp(min.to_f32x4(), max.to_f32x4()).xy()
}
#[inline]
pub fn abs(self) -> F32x2 {
self.to_f32x4().abs().xy()
}
#[inline]
pub fn floor(self) -> F32x2 {
self.to_f32x4().floor().xy()
}
#[inline]
pub fn ceil(self) -> F32x2 {
self.to_f32x4().ceil().xy()
}
#[inline]
pub fn round(self) -> F32x2 {
self.to_f32x4().round().xy()
}
#[inline]
pub fn sqrt(self) -> F32x2 {
self.to_f32x4().sqrt().xy()
}
// Packed comparisons
#[inline]
pub fn packed_eq(self, other: F32x2) -> U32x2 {
self.to_f32x4().packed_eq(other.to_f32x4()).xy()
}
#[inline]
pub fn packed_gt(self, other: F32x2) -> U32x2 {
self.to_f32x4().packed_gt(other.to_f32x4()).xy()
}
#[inline]
pub fn packed_lt(self, other: F32x2) -> U32x2 {
self.to_f32x4().packed_lt(other.to_f32x4()).xy()
}
#[inline]
pub fn packed_le(self, other: F32x2) -> U32x2 {
self.to_f32x4().packed_le(other.to_f32x4()).xy()
}
// Conversions
#[inline]
pub fn to_f32x4(self) -> F32x4 {
unsafe { F32x4(x86_64::_mm_castsi128_ps(x86_64::_mm_cvtsi64_si128(self.0 as i64))) }
}
#[inline]
pub fn to_i32x2(self) -> I32x2 {
self.to_i32x4().xy()
}
#[inline]
pub fn to_i32x4(self) -> I32x4 {
self.to_f32x4().to_i32x4()
}
// Swizzle
#[inline]
pub fn yx(self) -> F32x2 {
self.to_f32x4().yx()
}
// Concatenations
#[inline]
pub fn concat_xy_xy(self, other: F32x2) -> F32x4 {
self.to_f32x4().concat_xy_xy(other.to_f32x4())
}
}
impl Default for F32x2 {
#[inline]
fn default() -> F32x2 {
F32x2(0)
}
}
impl Index<usize> for F32x2 {
type Output = f32;
#[inline]
fn index(&self, index: usize) -> &f32 {
unsafe { &mem::transmute::<&u64, &[f32; 2]>(&self.0)[index] }
}
}
impl IndexMut<usize> for F32x2 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut f32 {
unsafe { &mut mem::transmute::<&mut u64, &mut [f32; 2]>(&mut self.0)[index] }
}
}
impl Debug for F32x2 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "<{}, {}>", self[0], self[1])
}
}
impl PartialEq for F32x2 {
#[inline]
fn eq(&self, other: &F32x2) -> bool {
self.packed_eq(*other).is_all_ones()
}
}
impl Add<F32x2> for F32x2 {
type Output = F32x2;
#[inline]
fn add(self, other: F32x2) -> F32x2 {
(self.to_f32x4() + other.to_f32x4()).xy()
}
}
impl Mul<F32x2> for F32x2 {
type Output = F32x2;
#[inline]
fn mul(self, other: F32x2) -> F32x2 {
(self.to_f32x4() * other.to_f32x4()).xy()
}
}
impl Sub<F32x2> for F32x2 {
type Output = F32x2;
#[inline]
fn sub(self, other: F32x2) -> F32x2 {
(self.to_f32x4() - other.to_f32x4()).xy()
}
}
// Four 32-bit floats
#[derive(Clone, Copy)]
pub struct F32x4(pub __m128);
@ -126,6 +309,33 @@ impl F32x4 {
unsafe { I32x4(x86_64::_mm_cvtps_epi32(self.0)) }
}
// Extraction
#[inline]
pub fn xy(self) -> F32x2 {
unsafe { F32x2(x86_64::_mm_cvtsi128_si64(x86_64::_mm_castps_si128(self.0)) as u64) }
}
#[inline]
pub fn xw(self) -> F32x2 {
unsafe { F32x2(x86_64::_mm_cvtsi128_si64(x86_64::_mm_castps_si128(self.xwyz().0)) as u64) }
}
#[inline]
pub fn yx(self) -> F32x2 {
unsafe { F32x2(x86_64::_mm_cvtsi128_si64(x86_64::_mm_castps_si128(self.yxwz().0)) as u64) }
}
#[inline]
pub fn zy(self) -> F32x2 {
unsafe { F32x2(x86_64::_mm_cvtsi128_si64(x86_64::_mm_castps_si128(self.zyxw().0)) as u64) }
}
#[inline]
pub fn zw(self) -> F32x2 {
unsafe { F32x2(x86_64::_mm_cvtsi128_si64(x86_64::_mm_castps_si128(self.zwxy().0)) as u64) }
}
// Concatenations
#[inline]
@ -224,7 +434,140 @@ impl Sub<F32x4> for F32x4 {
}
}
// 32-bit signed integers
// Two 32-bit signed integers
#[derive(Clone, Copy)]
pub struct I32x2(pub u64);
impl I32x2 {
// Constructors
#[inline]
pub fn new(a: i32, b: i32) -> I32x2 {
unsafe {
let a = mem::transmute::<*const i32, *const u32>(&a);
let b = mem::transmute::<*const i32, *const u32>(&b);
I32x2((*a as u64) | ((*b as u64) << 32))
}
}
#[inline]
pub fn splat(x: i32) -> I32x2 {
I32x2::new(x, x)
}
// Concatenations
#[inline]
pub fn concat_xy_xy(self, other: I32x2) -> I32x4 {
self.to_i32x4().concat_xy_xy(other.to_i32x4())
}
// Conversions
#[inline]
pub fn to_i32x4(self) -> I32x4 {
unsafe { I32x4(x86_64::_mm_cvtsi64_si128(self.0 as i64)) }
}
#[inline]
pub fn to_f32x4(self) -> F32x4 {
self.to_i32x4().to_f32x4()
}
/// Converts these packed integers to floats.
#[inline]
pub fn to_f32x2(self) -> F32x2 {
self.to_f32x4().xy()
}
// Basic operations
#[inline]
pub fn min(self, other: I32x2) -> I32x2 {
self.to_i32x4().min(other.to_i32x4()).xy()
}
// Comparisons
// TODO(pcwalton): Make a `U32x2` type and use that!
#[inline]
pub fn packed_eq(self, other: I32x2) -> U32x4 {
self.to_i32x4().packed_eq(other.to_i32x4())
}
#[inline]
pub fn packed_gt(self, other: I32x2) -> U32x4 {
self.to_i32x4().packed_gt(other.to_i32x4())
}
#[inline]
pub fn packed_le(self, other: I32x2) -> U32x4 {
self.to_i32x4().packed_le(other.to_i32x4())
}
}
impl Default for I32x2 {
#[inline]
fn default() -> I32x2 {
I32x2(0)
}
}
impl Index<usize> for I32x2 {
type Output = i32;
#[inline]
fn index(&self, index: usize) -> &i32 {
unsafe { &mem::transmute::<&u64, &[i32; 2]>(&self.0)[index] }
}
}
impl IndexMut<usize> for I32x2 {
#[inline]
fn index_mut(&mut self, index: usize) -> &mut i32 {
unsafe { &mut mem::transmute::<&mut u64, &mut [i32; 2]>(&mut self.0)[index] }
}
}
impl Add<I32x2> for I32x2 {
type Output = I32x2;
#[inline]
fn add(self, other: I32x2) -> I32x2 {
(self.to_i32x4() + other.to_i32x4()).xy()
}
}
impl Sub<I32x2> for I32x2 {
type Output = I32x2;
#[inline]
fn sub(self, other: I32x2) -> I32x2 {
(self.to_i32x4() - other.to_i32x4()).xy()
}
}
impl Mul<I32x2> for I32x2 {
type Output = I32x2;
#[inline]
fn mul(self, other: I32x2) -> I32x2 {
(self.to_i32x4() * other.to_i32x4()).xy()
}
}
impl Debug for I32x2 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
write!(f, "<{}, {}>", self[0], self[1])
}
}
impl PartialEq for I32x2 {
#[inline]
fn eq(&self, other: &I32x2) -> bool {
self.packed_eq(*other).is_all_ones()
}
}
// Four 32-bit signed integers
#[derive(Clone, Copy)]
pub struct I32x4(pub __m128i);
@ -245,6 +588,33 @@ impl I32x4 {
unsafe { I32x4(x86_64::_mm_set1_epi32(x)) }
}
// Extraction
#[inline]
pub fn xy(self) -> I32x2 {
unsafe { I32x2(x86_64::_mm_cvtsi128_si64(self.0) as u64) }
}
#[inline]
pub fn xw(self) -> I32x2 {
unsafe { I32x2(x86_64::_mm_cvtsi128_si64(self.xwyz().0) as u64) }
}
#[inline]
pub fn yx(self) -> I32x2 {
unsafe { I32x2(x86_64::_mm_cvtsi128_si64(self.yxwz().0) as u64) }
}
#[inline]
pub fn zy(self) -> I32x2 {
unsafe { I32x2(x86_64::_mm_cvtsi128_si64(self.zyxw().0) as u64) }
}
#[inline]
pub fn zw(self) -> I32x2 {
unsafe { I32x2(x86_64::_mm_cvtsi128_si64(self.zwxy().0) as u64) }
}
// Concatenations
#[inline]
@ -259,11 +629,6 @@ impl I32x4 {
// Conversions
#[inline]
pub fn as_u8x16(self) -> U8x16 {
U8x16(self.0)
}
/// Converts these packed integers to floats.
#[inline]
pub fn to_f32x4(self) -> F32x4 {
@ -343,6 +708,30 @@ impl Mul<I32x4> for I32x4 {
}
}
impl BitAnd<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn bitand(self, other: I32x4) -> I32x4 {
unsafe { I32x4(x86_64::_mm_and_si128(self.0, other.0)) }
}
}
impl BitOr<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn bitor(self, other: I32x4) -> I32x4 {
unsafe { I32x4(x86_64::_mm_or_si128(self.0, other.0)) }
}
}
impl Shr<I32x4> for I32x4 {
type Output = I32x4;
#[inline]
fn shr(self, other: I32x4) -> I32x4 {
unsafe { I32x4(x86_64::_mm_srlv_epi32(self.0, other.0)) }
}
}
impl Debug for I32x4 {
#[inline]
fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> {
@ -357,7 +746,24 @@ impl PartialEq for I32x4 {
}
}
// 32-bit unsigned integers
// Two 32-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U32x2(pub u64);
impl U32x2 {
#[inline]
pub fn is_all_ones(self) -> bool {
self.0 == !0
}
#[inline]
pub fn is_all_zeroes(self) -> bool {
self.0 == 0
}
}
// Four 32-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U32x4(pub __m128i);
@ -390,6 +796,13 @@ impl U32x4 {
unsafe { x86_64::_mm_test_all_zeros(self.0, self.0) != 0 }
}
// Extraction
#[inline]
pub fn xy(self) -> U32x2 {
unsafe { U32x2(x86_64::_mm_cvtsi128_si64(self.0) as u64) }
}
// Packed comparisons
#[inline]
@ -435,20 +848,3 @@ impl BitXor<U32x4> for U32x4 {
unsafe { U32x4(x86_64::_mm_xor_si128(self.0, other.0)) }
}
}
// 8-bit unsigned integers
#[derive(Clone, Copy)]
pub struct U8x16(pub __m128i);
impl U8x16 {
#[inline]
pub fn as_i32x4(self) -> I32x4 {
I32x4(self.0)
}
#[inline]
pub fn shuffle(self, indices: U8x16) -> U8x16 {
unsafe { U8x16(x86_64::_mm_shuffle_epi8(self.0, indices.0)) }
}
}

View File

@ -318,7 +318,7 @@ where
}
UsvgPathSegment::LineTo { x, y } => {
let to = Vector2F::new(x as f32, y as f32);
let mut segment = Segment::line(&LineSegment2F::new(self.last_subpath_point, to));
let mut segment = Segment::line(LineSegment2F::new(self.last_subpath_point, to));
if self.just_moved {
segment.flags.insert(SegmentFlags::FIRST_IN_SUBPATH);
}
@ -338,8 +338,8 @@ where
let ctrl1 = Vector2F::new(x2 as f32, y2 as f32);
let to = Vector2F::new(x as f32, y as f32);
let mut segment = Segment::cubic(
&LineSegment2F::new(self.last_subpath_point, to),
&LineSegment2F::new(ctrl0, ctrl1),
LineSegment2F::new(self.last_subpath_point, to),
LineSegment2F::new(ctrl0, ctrl1),
);
if self.just_moved {
segment.flags.insert(SegmentFlags::FIRST_IN_SUBPATH);
@ -349,7 +349,7 @@ where
Some(segment)
}
UsvgPathSegment::ClosePath => {
let mut segment = Segment::line(&LineSegment2F::new(
let mut segment = Segment::line(LineSegment2F::new(
self.last_subpath_point,
self.first_subpath_point,
));

View File

@ -93,13 +93,13 @@ impl Shape {
}
#[inline]
fn first(&self) -> &LineSegment {
&self.outline.first().unwrap()
fn first(&self) -> LineSegment {
self.outline.first().unwrap()
}
#[inline]
fn last(&self) -> &LineSegment {
&self.outline.last().unwrap()
fn last(&self) -> LineSegment {
self.outline.last().unwrap()
}
#[inline]

View File

@ -181,7 +181,7 @@ impl<D> UIPresenter<D> where D: Device {
primitive,
uniforms: &[
(&self.solid_program.framebuffer_size_uniform,
UniformData::Vec2(self.framebuffer_size.0.to_f32x4())),
UniformData::Vec2(self.framebuffer_size.0.to_f32x2())),
(&self.solid_program.color_uniform, get_color_uniform(color)),
],
textures: &[],
@ -414,11 +414,11 @@ impl<D> UIPresenter<D> where D: Device {
textures: &[&texture],
uniforms: &[
(&self.texture_program.framebuffer_size_uniform,
UniformData::Vec2(self.framebuffer_size.0.to_f32x4())),
UniformData::Vec2(self.framebuffer_size.0.to_f32x2())),
(&self.texture_program.color_uniform, get_color_uniform(color)),
(&self.texture_program.texture_uniform, UniformData::TextureUnit(0)),
(&self.texture_program.texture_size_uniform,
UniformData::Vec2(device.texture_size(&texture).0.to_f32x4()))
UniformData::Vec2(device.texture_size(&texture).0.to_f32x2()))
],
viewport: RectI::new(Vector2I::default(), self.framebuffer_size),
options: RenderOptions {