diff --git a/content/src/clip.rs b/content/src/clip.rs index b4620ba5..fc4a0146 100644 --- a/content/src/clip.rs +++ b/content/src/clip.rs @@ -24,15 +24,15 @@ struct Edge(LineSegment2F); impl TEdge for Edge { #[inline] - fn point_is_inside(&self, point: &Vector2F) -> bool { - let area = (self.0.to() - self.0.from()).det(*point - self.0.from()); + fn point_is_inside(&self, point: Vector2F) -> bool { + let area = (self.0.to() - self.0.from()).det(point - self.0.from()); debug!("point_is_inside({:?}, {:?}), area={}", self, point, area); area >= 0.0 } - fn intersect_line_segment(&self, segment: &LineSegment2F) -> ArrayVec<[f32; 3]> { + fn intersect_line_segment(&self, segment: LineSegment2F) -> ArrayVec<[f32; 3]> { let mut results = ArrayVec::new(); - if let Some(t) = segment.intersection_t(&self.0) { + if let Some(t) = segment.intersection_t(self.0) { if t >= 0.0 && t <= 1.0 { results.push(t); } @@ -51,7 +51,7 @@ enum AxisAlignedEdge { impl TEdge for AxisAlignedEdge { #[inline] - fn point_is_inside(&self, point: &Vector2F) -> bool { + fn point_is_inside(&self, point: Vector2F) -> bool { match *self { AxisAlignedEdge::Left(x) => point.x() >= x, AxisAlignedEdge::Top(y) => point.y() >= y, @@ -60,7 +60,7 @@ impl TEdge for AxisAlignedEdge { } } - fn intersect_line_segment(&self, segment: &LineSegment2F) -> ArrayVec<[f32; 3]> { + fn intersect_line_segment(&self, segment: LineSegment2F) -> ArrayVec<[f32; 3]> { let mut results = ArrayVec::new(); let t = match *self { AxisAlignedEdge::Left(x) | AxisAlignedEdge::Right(x) => segment.solve_t_for_x(x), @@ -74,26 +74,26 @@ impl TEdge for AxisAlignedEdge { } trait TEdge: Debug { - fn point_is_inside(&self, point: &Vector2F) -> bool; - fn intersect_line_segment(&self, segment: &LineSegment2F) -> ArrayVec<[f32; 3]>; + fn point_is_inside(&self, point: Vector2F) -> bool; + fn intersect_line_segment(&self, segment: LineSegment2F) -> ArrayVec<[f32; 3]>; fn trivially_test_segment(&self, segment: &Segment) -> EdgeRelativeLocation { - let from_inside = self.point_is_inside(&segment.baseline.from()); + let from_inside = self.point_is_inside(segment.baseline.from()); debug!( "point {:?} inside {:?}: {:?}", segment.baseline.from(), self, from_inside ); - if from_inside != self.point_is_inside(&segment.baseline.to()) { + if from_inside != self.point_is_inside(segment.baseline.to()) { return EdgeRelativeLocation::Intersecting; } if !segment.is_line() { - if from_inside != self.point_is_inside(&segment.ctrl.from()) { + if from_inside != self.point_is_inside(segment.ctrl.from()) { return EdgeRelativeLocation::Intersecting; } if !segment.is_quadratic() { - if from_inside != self.point_is_inside(&segment.ctrl.to()) { + if from_inside != self.point_is_inside(segment.ctrl.to()) { return EdgeRelativeLocation::Intersecting; } } @@ -107,7 +107,7 @@ trait TEdge: Debug { fn intersect_segment(&self, segment: &Segment) -> ArrayVec<[f32; 3]> { if segment.is_line() { - return self.intersect_line_segment(&segment.baseline); + return self.intersect_line_segment(segment.baseline); } let mut segment = *segment; @@ -173,10 +173,10 @@ trait TEdge: Debug { } fn intersects_cubic_segment_hull(&self, cubic_segment: CubicSegment) -> bool { - let inside = self.point_is_inside(&cubic_segment.0.baseline.from()); - inside != self.point_is_inside(&cubic_segment.0.ctrl.from()) - || inside != self.point_is_inside(&cubic_segment.0.ctrl.to()) - || inside != self.point_is_inside(&cubic_segment.0.baseline.to()) + let inside = self.point_is_inside(cubic_segment.0.baseline.from()); + inside != self.point_is_inside(cubic_segment.0.ctrl.from()) + || inside != self.point_is_inside(cubic_segment.0.ctrl.to()) + || inside != self.point_is_inside(cubic_segment.0.baseline.to()) } } @@ -222,7 +222,7 @@ where // We have a potential intersection. debug!("potential intersection: {:?} edge: {:?}", segment, edge); - let mut starts_inside = edge.point_is_inside(&segment.baseline.from()); + let mut starts_inside = edge.point_is_inside(segment.baseline.from()); let intersection_ts = edge.intersect_segment(&segment); let mut last_t = 0.0; debug!("... intersections: {:?}", intersection_ts); diff --git a/content/src/outline.rs b/content/src/outline.rs index b09cc668..904f0c63 100644 --- a/content/src/outline.rs +++ b/content/src/outline.rs @@ -434,25 +434,25 @@ impl Contour { debug_assert!(self.point_is_endpoint(point_index)); let mut segment = Segment::none(); - segment.baseline.set_from(&self.position_of(point_index)); + segment.baseline.set_from(self.position_of(point_index)); let point1_index = self.add_to_point_index(point_index, 1); if self.point_is_endpoint(point1_index) { - segment.baseline.set_to(&self.position_of(point1_index)); + segment.baseline.set_to(self.position_of(point1_index)); segment.kind = SegmentKind::Line; } else { - segment.ctrl.set_from(&self.position_of(point1_index)); + segment.ctrl.set_from(self.position_of(point1_index)); let point2_index = self.add_to_point_index(point_index, 2); if self.point_is_endpoint(point2_index) { - segment.baseline.set_to(&self.position_of(point2_index)); + segment.baseline.set_to(self.position_of(point2_index)); segment.kind = SegmentKind::Quadratic; } else { - segment.ctrl.set_to(&self.position_of(point2_index)); + segment.ctrl.set_to(self.position_of(point2_index)); segment.kind = SegmentKind::Cubic; let point3_index = self.add_to_point_index(point_index, 3); - segment.baseline.set_to(&self.position_of(point3_index)); + segment.baseline.set_to(self.position_of(point3_index)); } } @@ -541,7 +541,7 @@ impl Contour { pub fn apply_perspective(&mut self, perspective: &Perspective) { for (point_index, point) in self.points.iter_mut().enumerate() { - *point = perspective.transform_point_2d(point); + *point = perspective.transform_point_2d(*point); union_rect(&mut self.bounds, *point, point_index == 0); } } @@ -610,14 +610,14 @@ impl Contour { let ctrl_position = &contour.points[ctrl_point_index]; handle_cubic( self, - &Segment::quadratic(&baseline, *ctrl_position).to_cubic(), + &Segment::quadratic(baseline, *ctrl_position).to_cubic(), ); } else if point_count == 4 { let first_ctrl_point_index = last_endpoint_index as usize + 1; let ctrl_position_0 = &contour.points[first_ctrl_point_index + 0]; let ctrl_position_1 = &contour.points[first_ctrl_point_index + 1]; let ctrl = LineSegment2F::new(*ctrl_position_0, *ctrl_position_1); - handle_cubic(self, &Segment::cubic(&baseline, &ctrl)); + handle_cubic(self, &Segment::cubic(baseline, ctrl)); } self.push_point( @@ -802,21 +802,21 @@ impl<'a> Iterator for ContourIter<'a> { if self.index == contour.len() { let point1 = contour.position_of(0); self.index += 1; - return Some(Segment::line(&LineSegment2F::new(point0, point1))); + return Some(Segment::line(LineSegment2F::new(point0, point1))); } let point1_index = self.index; self.index += 1; let point1 = contour.position_of(point1_index); if contour.point_is_endpoint(point1_index) { - return Some(Segment::line(&LineSegment2F::new(point0, point1))); + return Some(Segment::line(LineSegment2F::new(point0, point1))); } let point2_index = self.index; let point2 = contour.position_of(point2_index); self.index += 1; if contour.point_is_endpoint(point2_index) { - return Some(Segment::quadratic(&LineSegment2F::new(point0, point2), point1)); + return Some(Segment::quadratic(LineSegment2F::new(point0, point2), point1)); } let point3_index = self.index; @@ -824,8 +824,8 @@ impl<'a> Iterator for ContourIter<'a> { self.index += 1; debug_assert!(contour.point_is_endpoint(point3_index)); return Some(Segment::cubic( - &LineSegment2F::new(point0, point3), - &LineSegment2F::new(point1, point2), + LineSegment2F::new(point0, point3), + LineSegment2F::new(point1, point2), )); } } diff --git a/content/src/segment.rs b/content/src/segment.rs index 4a9d6a76..58f57dfe 100644 --- a/content/src/segment.rs +++ b/content/src/segment.rs @@ -39,9 +39,9 @@ impl Segment { } #[inline] - pub fn line(line: &LineSegment2F) -> Segment { + pub fn line(line: LineSegment2F) -> Segment { Segment { - baseline: *line, + baseline: line, ctrl: LineSegment2F::default(), kind: SegmentKind::Line, flags: SegmentFlags::empty(), @@ -49,9 +49,9 @@ impl Segment { } #[inline] - pub fn quadratic(baseline: &LineSegment2F, ctrl: Vector2F) -> Segment { + pub fn quadratic(baseline: LineSegment2F, ctrl: Vector2F) -> Segment { Segment { - baseline: *baseline, + baseline, ctrl: LineSegment2F::new(ctrl, Vector2F::default()), kind: SegmentKind::Quadratic, flags: SegmentFlags::empty(), @@ -59,10 +59,10 @@ impl Segment { } #[inline] - pub fn cubic(baseline: &LineSegment2F, ctrl: &LineSegment2F) -> Segment { + pub fn cubic(baseline: LineSegment2F, ctrl: LineSegment2F) -> Segment { Segment { - baseline: *baseline, - ctrl: *ctrl, + baseline, + ctrl, kind: SegmentKind::Cubic, flags: SegmentFlags::empty(), } @@ -91,7 +91,7 @@ impl Segment { let (p0x, p0y) = (p3p0.z(), p3p0.w()); let (p1x, p1y) = (4.0 - p0x, (1.0 - p0x) * (3.0 - p0x) / p0y); let p2p1 = F32x4::new(p1x, -p1y, p1x, p1y) * F32x4::splat(1.0 / 3.0); - return Segment::cubic(&LineSegment2F(p3p0), &LineSegment2F(p2p1)); + return Segment::cubic(LineSegment2F(p3p0), LineSegment2F(p2p1)); } #[inline] @@ -100,7 +100,7 @@ impl Segment { let p1 = Vector2F::new(-SQRT_2 / 6.0 + 4.0 / 3.0, 7.0 * SQRT_2 / 6.0 - 4.0 / 3.0); let flip = Vector2F::new(1.0, -1.0); let (p2, p3) = (p1.scale_xy(flip), p0.scale_xy(flip)); - Segment::cubic(&LineSegment2F::new(p3, p0), &LineSegment2F::new(p2, p1)) + Segment::cubic(LineSegment2F::new(p3, p0), LineSegment2F::new(p2, p1)) } #[inline] @@ -198,7 +198,7 @@ impl Segment { // FIXME(pcwalton): Don't degree elevate! if self.is_line() { let (before, after) = self.as_line_segment().split(t); - (Segment::line(&before), Segment::line(&after)) + (Segment::line(before), Segment::line(after)) } else { self.to_cubic().as_cubic_segment().split(t) } @@ -217,8 +217,8 @@ impl Segment { #[inline] pub fn transform(self, transform: &Transform2DF) -> Segment { Segment { - baseline: transform.transform_line_segment(&self.baseline), - ctrl: transform.transform_line_segment(&self.ctrl), + baseline: transform.transform_line_segment(self.baseline), + ctrl: transform.transform_line_segment(self.ctrl), kind: self.kind, flags: self.flags, } diff --git a/content/src/stroke.rs b/content/src/stroke.rs index 48f68276..4b724cab 100644 --- a/content/src/stroke.rs +++ b/content/src/stroke.rs @@ -104,7 +104,7 @@ impl<'a> OutlineStrokeToFill<'a> { stroker.output.add_join(self.style.line_width * 0.5, self.style.line_join, stroker.input.position_of(0), - &final_segment); + final_segment); } stroker.output.closed = true; @@ -235,7 +235,7 @@ impl Offset for Segment { self.ctrl.from() }; - contour.add_join(distance, join, join_point, &LineSegment2F::new(p4, p3)); + contour.add_join(distance, join, join_point, LineSegment2F::new(p4, p3)); } // Push segment. @@ -245,7 +245,7 @@ impl Offset for Segment { fn offset_once(&self, distance: f32) -> Segment { if self.is_line() { - return Segment::line(&self.baseline.offset(distance)); + return Segment::line(self.baseline.offset(distance)); } if self.is_quadratic() { @@ -253,12 +253,12 @@ impl Offset for Segment { let mut segment_1 = LineSegment2F::new(self.ctrl.from(), self.baseline.to()); segment_0 = segment_0.offset(distance); segment_1 = segment_1.offset(distance); - let ctrl = match segment_0.intersection_t(&segment_1) { + let ctrl = match segment_0.intersection_t(segment_1) { Some(t) => segment_0.sample(t), None => segment_0.to().lerp(segment_1.from(), 0.5), }; let baseline = LineSegment2F::new(segment_0.from(), segment_1.to()); - return Segment::quadratic(&baseline, ctrl); + return Segment::quadratic(baseline, ctrl); } debug_assert!(self.is_cubic()); @@ -268,13 +268,13 @@ impl Offset for Segment { let mut segment_1 = LineSegment2F::new(self.ctrl.to(), self.baseline.to()); segment_0 = segment_0.offset(distance); segment_1 = segment_1.offset(distance); - let ctrl = match segment_0.intersection_t(&segment_1) { + let ctrl = match segment_0.intersection_t(segment_1) { Some(t) => segment_0.sample(t), None => segment_0.to().lerp(segment_1.from(), 0.5), }; let baseline = LineSegment2F::new(segment_0.from(), segment_1.to()); let ctrl = LineSegment2F::new(segment_0.from(), ctrl); - return Segment::cubic(&baseline, &ctrl); + return Segment::cubic(baseline, ctrl); } if self.ctrl.to() == self.baseline.to() { @@ -282,13 +282,13 @@ impl Offset for Segment { let mut segment_1 = LineSegment2F::new(self.ctrl.from(), self.baseline.to()); segment_0 = segment_0.offset(distance); segment_1 = segment_1.offset(distance); - let ctrl = match segment_0.intersection_t(&segment_1) { + let ctrl = match segment_0.intersection_t(segment_1) { Some(t) => segment_0.sample(t), None => segment_0.to().lerp(segment_1.from(), 0.5), }; let baseline = LineSegment2F::new(segment_0.from(), segment_1.to()); let ctrl = LineSegment2F::new(ctrl, segment_1.to()); - return Segment::cubic(&baseline, &ctrl); + return Segment::cubic(baseline, ctrl); } let mut segment_0 = LineSegment2F::new(self.baseline.from(), self.ctrl.from()); @@ -298,8 +298,8 @@ impl Offset for Segment { segment_1 = segment_1.offset(distance); segment_2 = segment_2.offset(distance); let (ctrl_0, ctrl_1) = match ( - segment_0.intersection_t(&segment_1), - segment_1.intersection_t(&segment_2), + segment_0.intersection_t(segment_1), + segment_1.intersection_t(segment_2), ) { (Some(t0), Some(t1)) => (segment_0.sample(t0), segment_1.sample(t1)), _ => ( @@ -309,7 +309,7 @@ impl Offset for Segment { }; let baseline = LineSegment2F::new(segment_0.from(), segment_2.to()); let ctrl = LineSegment2F::new(ctrl_0, ctrl_1); - Segment::cubic(&baseline, &ctrl) + Segment::cubic(baseline, ctrl) } fn error_is_within_tolerance(&self, other: &Segment, distance: f32) -> bool { @@ -357,14 +357,14 @@ impl Contour { distance: f32, join: LineJoin, join_point: Vector2F, - next_tangent: &LineSegment2F) { + next_tangent: LineSegment2F) { let (p0, p1) = (self.position_of_last(2), self.position_of_last(1)); let prev_tangent = LineSegment2F::new(p0, p1); match join { LineJoin::Bevel => {} LineJoin::Miter(miter_limit) => { - if let Some(prev_tangent_t) = prev_tangent.intersection_t(&next_tangent) { + if let Some(prev_tangent_t) = prev_tangent.intersection_t(next_tangent) { let miter_endpoint = prev_tangent.sample(prev_tangent_t); let threshold = miter_limit * distance; if (miter_endpoint - join_point).square_length() <= threshold * threshold { diff --git a/content/src/transform.rs b/content/src/transform.rs index 283b4386..195451e2 100644 --- a/content/src/transform.rs +++ b/content/src/transform.rs @@ -34,20 +34,12 @@ where // TODO(pcwalton): Can we go faster by transforming an entire line segment with SIMD? let mut segment = self.iter.next()?; if !segment.is_none() { - segment - .baseline - .set_from(&self.transform.transform_point(segment.baseline.from())); - segment - .baseline - .set_to(&self.transform.transform_point(segment.baseline.to())); + segment.baseline.set_from(self.transform.transform_point(segment.baseline.from())); + segment.baseline.set_to(self.transform.transform_point(segment.baseline.to())); if !segment.is_line() { - segment - .ctrl - .set_from(&self.transform.transform_point(segment.ctrl.from())); + segment.ctrl.set_from(self.transform.transform_point(segment.ctrl.from())); if !segment.is_quadratic() { - segment - .ctrl - .set_to(&self.transform.transform_point(segment.ctrl.to())); + segment.ctrl.set_to(self.transform.transform_point(segment.ctrl.to())); } } } @@ -88,21 +80,13 @@ where let mut segment = self.iter.next()?; if !segment.is_none() { segment.baseline.set_from( - &self - .perspective - .transform_point_2d(&segment.baseline.from()), + self.perspective.transform_point_2d(segment.baseline.from()), ); - segment - .baseline - .set_to(&self.perspective.transform_point_2d(&segment.baseline.to())); + segment.baseline.set_to(self.perspective.transform_point_2d(segment.baseline.to())); if !segment.is_line() { - segment - .ctrl - .set_from(&self.perspective.transform_point_2d(&segment.ctrl.from())); + segment.ctrl.set_from(self.perspective.transform_point_2d(segment.ctrl.from())); if !segment.is_quadratic() { - segment - .ctrl - .set_to(&self.perspective.transform_point_2d(&segment.ctrl.to())); + segment.ctrl.set_to(self.perspective.transform_point_2d(segment.ctrl.to())); } } } diff --git a/geometry/src/line_segment.rs b/geometry/src/line_segment.rs index 7842c893..63012a3c 100644 --- a/geometry/src/line_segment.rs +++ b/geometry/src/line_segment.rs @@ -10,8 +10,8 @@ //! Line segment types, optimized with SIMD. -use crate::vector::Vector2F; use crate::transform2d::Matrix2x2F; +use crate::vector::Vector2F; use crate::util; use pathfinder_simd::default::F32x4; use std::ops::{Add, Sub}; @@ -26,44 +26,44 @@ impl LineSegment2F { } #[inline] - pub fn from(&self) -> Vector2F { - Vector2F(self.0) + pub fn from(self) -> Vector2F { + Vector2F(self.0.xy()) } #[inline] - pub fn to(&self) -> Vector2F { - Vector2F(self.0.zwxy()) + pub fn to(self) -> Vector2F { + Vector2F(self.0.zw()) } #[inline] - pub fn set_from(&mut self, point: &Vector2F) { - self.0 = point.0.concat_xy_zw(self.0) + pub fn set_from(&mut self, point: Vector2F) { + self.0 = point.0.to_f32x4().concat_xy_zw(self.0) } #[inline] - pub fn set_to(&mut self, point: &Vector2F) { - self.0 = self.0.concat_xy_xy(point.0) + pub fn set_to(&mut self, point: Vector2F) { + self.0 = self.0.concat_xy_xy(point.0.to_f32x4()) } #[allow(clippy::wrong_self_convention)] #[inline] - pub fn from_x(&self) -> f32 { + pub fn from_x(self) -> f32 { self.0[0] } #[allow(clippy::wrong_self_convention)] #[inline] - pub fn from_y(&self) -> f32 { + pub fn from_y(self) -> f32 { self.0[1] } #[inline] - pub fn to_x(&self) -> f32 { + pub fn to_x(self) -> f32 { self.0[2] } #[inline] - pub fn to_y(&self) -> f32 { + pub fn to_y(self) -> f32 { self.0[3] } @@ -88,22 +88,22 @@ impl LineSegment2F { } #[inline] - pub fn translate(&self, offset: Vector2F) -> LineSegment2F { - LineSegment2F(self.0 + offset.0.xyxy()) + pub fn translate(self, offset: Vector2F) -> LineSegment2F { + LineSegment2F(self.0 + offset.0.to_f32x4().xyxy()) } #[inline] - pub fn scale(&self, factor: f32) -> LineSegment2F { + pub fn scale(self, factor: f32) -> LineSegment2F { LineSegment2F(self.0 * F32x4::splat(factor)) } #[inline] - pub fn scale_xy(&self, factors: Vector2F) -> LineSegment2F { - LineSegment2F(self.0 * factors.0.xyxy()) + pub fn scale_xy(self, factors: Vector2F) -> LineSegment2F { + LineSegment2F(self.0 * factors.0.to_f32x4().xyxy()) } #[inline] - pub fn split(&self, t: f32) -> (LineSegment2F, LineSegment2F) { + pub fn split(self, t: f32) -> (LineSegment2F, LineSegment2F) { debug_assert!(t >= 0.0 && t <= 1.0); let (from_from, to_to) = (self.0.xyxy(), self.0.zwzw()); let d_d = to_to - from_from; @@ -116,7 +116,7 @@ impl LineSegment2F { // Returns the left segment first, followed by the right segment. #[inline] - pub fn split_at_x(&self, x: f32) -> (LineSegment2F, LineSegment2F) { + pub fn split_at_x(self, x: f32) -> (LineSegment2F, LineSegment2F) { let (min_part, max_part) = self.split(self.solve_t_for_x(x)); if min_part.from_x() < max_part.from_x() { (min_part, max_part) @@ -127,7 +127,7 @@ impl LineSegment2F { // Returns the upper segment first, followed by the lower segment. #[inline] - pub fn split_at_y(&self, y: f32) -> (LineSegment2F, LineSegment2F) { + pub fn split_at_y(self, y: f32) -> (LineSegment2F, LineSegment2F) { let (min_part, max_part) = self.split(self.solve_t_for_y(y)); // Make sure we compare `from_y` and `to_y` to properly handle the case in which one of the @@ -140,32 +140,32 @@ impl LineSegment2F { } #[inline] - pub fn solve_t_for_x(&self, x: f32) -> f32 { + pub fn solve_t_for_x(self, x: f32) -> f32 { (x - self.from_x()) / (self.to_x() - self.from_x()) } #[inline] - pub fn solve_t_for_y(&self, y: f32) -> f32 { + pub fn solve_t_for_y(self, y: f32) -> f32 { (y - self.from_y()) / (self.to_y() - self.from_y()) } #[inline] - pub fn solve_x_for_y(&self, y: f32) -> f32 { + pub fn solve_x_for_y(self, y: f32) -> f32 { util::lerp(self.from_x(), self.to_x(), self.solve_t_for_y(y)) } #[inline] - pub fn solve_y_for_x(&self, x: f32) -> f32 { + pub fn solve_y_for_x(self, x: f32) -> f32 { util::lerp(self.from_y(), self.to_y(), self.solve_t_for_x(x)) } #[inline] - pub fn reversed(&self) -> LineSegment2F { + pub fn reversed(self) -> LineSegment2F { LineSegment2F(self.0.zwxy()) } #[inline] - pub fn upper_point(&self) -> Vector2F { + pub fn upper_point(self) -> Vector2F { if self.from_y() < self.to_y() { self.from() } else { @@ -174,27 +174,27 @@ impl LineSegment2F { } #[inline] - pub fn min_x(&self) -> f32 { + pub fn min_x(self) -> f32 { f32::min(self.from_x(), self.to_x()) } #[inline] - pub fn max_x(&self) -> f32 { + pub fn max_x(self) -> f32 { f32::max(self.from_x(), self.to_x()) } #[inline] - pub fn min_y(&self) -> f32 { + pub fn min_y(self) -> f32 { f32::min(self.from_y(), self.to_y()) } #[inline] - pub fn max_y(&self) -> f32 { + pub fn max_y(self) -> f32 { f32::max(self.from_y(), self.to_y()) } #[inline] - pub fn y_winding(&self) -> i32 { + pub fn y_winding(self) -> i32 { if self.from_y() < self.to_y() { 1 } else { @@ -205,9 +205,9 @@ impl LineSegment2F { // Reverses if necessary so that the from point is above the to point. Calling this method // again will undo the transformation. #[inline] - pub fn orient(&self, y_winding: i32) -> LineSegment2F { + pub fn orient(self, y_winding: i32) -> LineSegment2F { if y_winding >= 0 { - *self + self } else { self.reversed() } @@ -215,18 +215,18 @@ impl LineSegment2F { // TODO(pcwalton): Optimize with SIMD. #[inline] - pub fn square_length(&self) -> f32 { + pub fn square_length(self) -> f32 { let (dx, dy) = (self.to_x() - self.from_x(), self.to_y() - self.from_y()); dx * dx + dy * dy } #[inline] - pub fn vector(&self) -> Vector2F { + pub fn vector(self) -> Vector2F { self.to() - self.from() } // http://www.cs.swan.ac.uk/~cssimon/line_intersection.html - pub fn intersection_t(&self, other: &LineSegment2F) -> Option { + pub fn intersection_t(self, other: LineSegment2F) -> Option { let p0p1 = self.vector(); let matrix = Matrix2x2F(other.vector().0.concat_xy_xy((-p0p1).0)); if f32::abs(matrix.det()) < EPSILON { @@ -238,32 +238,27 @@ impl LineSegment2F { } #[inline] - pub fn sample(&self, t: f32) -> Vector2F { + pub fn sample(self, t: f32) -> Vector2F { self.from() + self.vector().scale(t) } #[inline] - pub fn midpoint(&self) -> Vector2F { + pub fn midpoint(self) -> Vector2F { self.sample(0.5) } #[inline] - pub fn offset(&self, distance: f32) -> LineSegment2F { + pub fn offset(self, distance: f32) -> LineSegment2F { if self.is_zero_length() { - *self + self } else { - *self - + self - .vector() - .yx() - .normalize() - .scale_xy(Vector2F::new(-distance, distance)) + self + self.vector().yx().normalize().scale_xy(Vector2F::new(-distance, distance)) } } #[inline] - pub fn is_zero_length(&self) -> bool { + pub fn is_zero_length(self) -> bool { self.vector().is_zero() } } @@ -272,7 +267,7 @@ impl Add for LineSegment2F { type Output = LineSegment2F; #[inline] fn add(self, point: Vector2F) -> LineSegment2F { - LineSegment2F(self.0 + point.0.xyxy()) + LineSegment2F(self.0 + point.0.to_f32x4().xyxy()) } } @@ -280,14 +275,22 @@ impl Sub for LineSegment2F { type Output = LineSegment2F; #[inline] fn sub(self, point: Vector2F) -> LineSegment2F { - LineSegment2F(self.0 - point.0.xyxy()) + LineSegment2F(self.0 - point.0.to_f32x4().xyxy()) } } #[derive(Clone, Copy, Debug, Default)] -#[repr(transparent)] -pub struct LineSegmentU4(pub u16); +#[repr(C)] +pub struct LineSegmentU4 { + pub from: u8, + pub to: u8, +} #[derive(Clone, Copy, Debug, Default)] -#[repr(transparent)] -pub struct LineSegmentU8(pub u32); +#[repr(C)] +pub struct LineSegmentU8 { + pub from_x: u8, + pub from_y: u8, + pub to_x: u8, + pub to_y: u8, +} diff --git a/geometry/src/rect.rs b/geometry/src/rect.rs index ec7e103a..9116acd1 100644 --- a/geometry/src/rect.rs +++ b/geometry/src/rect.rs @@ -29,36 +29,34 @@ impl RectF { #[inline] pub fn origin(&self) -> Vector2F { - Vector2F(self.0) + Vector2F(self.0.xy()) } #[inline] pub fn size(&self) -> Vector2F { - Vector2F(self.0.zwxy() - self.0.xyxy()) + Vector2F(self.0.zw() - self.0.xy()) } #[inline] pub fn upper_right(&self) -> Vector2F { - Vector2F(self.0.zyxw()) + Vector2F(self.0.zy()) } #[inline] pub fn lower_left(&self) -> Vector2F { - Vector2F(self.0.xwzy()) + Vector2F(self.0.xw()) } #[inline] pub fn lower_right(&self) -> Vector2F { - Vector2F(self.0.zwxy()) + Vector2F(self.0.zw()) } #[inline] pub fn contains_point(&self, point: Vector2F) -> bool { // self.origin <= point && point <= self.lower_right - self.0 - .concat_xy_xy(point.0) - .packed_le(point.0.concat_xy_zw(self.0)) - .is_all_ones() + let point = point.0.to_f32x4(); + self.0.concat_xy_xy(point).packed_le(point.concat_xy_zw(self.0)).is_all_ones() } #[inline] @@ -166,27 +164,27 @@ impl RectI { #[inline] pub fn origin(&self) -> Vector2I { - Vector2I(self.0) + Vector2I(self.0.xy()) } #[inline] pub fn size(&self) -> Vector2I { - Vector2I(self.0.zwxy() - self.0.xyxy()) + Vector2I(self.0.zw() - self.0.xy()) } #[inline] pub fn upper_right(&self) -> Vector2I { - Vector2I(self.0.zyxw()) + Vector2I(self.0.zy()) } #[inline] pub fn lower_left(&self) -> Vector2I { - Vector2I(self.0.xwzy()) + Vector2I(self.0.xw()) } #[inline] pub fn lower_right(&self) -> Vector2I { - Vector2I(self.0.zwxy()) + Vector2I(self.0.zw()) } #[inline] @@ -213,7 +211,8 @@ impl RectI { pub fn contains_point(&self, point: Vector2I) -> bool { // self.origin <= point && point <= self.lower_right - 1 let lower_right = self.lower_right() - Vector2I::splat(1); - self.0 + self.origin() + .0 .concat_xy_xy(point.0) .packed_le(point.0.concat_xy_xy(lower_right.0)) .is_all_ones() diff --git a/geometry/src/transform2d.rs b/geometry/src/transform2d.rs index 89d30aae..fb6e8ceb 100644 --- a/geometry/src/transform2d.rs +++ b/geometry/src/transform2d.rs @@ -42,7 +42,7 @@ impl Matrix2x2F { #[inline] pub fn from_rotation_vector(vector: UnitVector) -> Matrix2x2F { - Matrix2x2F((vector.0).0.xyyx() * F32x4::new(1.0, 1.0, -1.0, 1.0)) + Matrix2x2F((vector.0).0.to_f32x4().xyyx() * F32x4::new(1.0, 1.0, -1.0, 1.0)) } #[inline] @@ -72,8 +72,8 @@ impl Matrix2x2F { #[inline] pub fn transform_point(&self, point: Vector2F) -> Vector2F { - let halves = self.0 * point.0.xxyy(); - Vector2F(halves + halves.zwzw()) + let halves = self.0 * point.0.to_f32x4().xxyy(); + Vector2F(halves.xy() + halves.zw()) } #[inline] @@ -182,7 +182,7 @@ impl Transform2DF { } #[inline] - pub fn transform_line_segment(&self, line_segment: &LineSegment2F) -> LineSegment2F { + pub fn transform_line_segment(&self, line_segment: LineSegment2F) -> LineSegment2F { LineSegment2F::new(self.transform_point(line_segment.from()), self.transform_point(line_segment.to())) } @@ -291,6 +291,6 @@ impl Transform2DF { /// This decomposition assumes that scale, rotation, and translation are applied in that order. #[inline] pub fn scale_factor(&self) -> f32 { - Vector2F(self.matrix.0.zwxy()).length() + Vector2F(self.matrix.0.zw()).length() } } diff --git a/geometry/src/transform3d.rs b/geometry/src/transform3d.rs index 7beb1c9a..eeaa9024 100644 --- a/geometry/src/transform3d.rs +++ b/geometry/src/transform3d.rs @@ -345,7 +345,7 @@ impl Perspective { } #[inline] - pub fn transform_point_2d(&self, point: &Vector2F) -> Vector2F { + pub fn transform_point_2d(&self, point: Vector2F) -> Vector2F { let point = self .transform .transform_point(point.to_3d()) @@ -358,10 +358,10 @@ impl Perspective { // TODO(pcwalton): SIMD? #[inline] pub fn transform_rect(&self, rect: RectF) -> RectF { - let upper_left = self.transform_point_2d(&rect.origin()); - let upper_right = self.transform_point_2d(&rect.upper_right()); - let lower_left = self.transform_point_2d(&rect.lower_left()); - let lower_right = self.transform_point_2d(&rect.lower_right()); + let upper_left = self.transform_point_2d(rect.origin()); + let upper_right = self.transform_point_2d(rect.upper_right()); + let lower_left = self.transform_point_2d(rect.lower_left()); + let lower_right = self.transform_point_2d(rect.lower_right()); let min_point = upper_left.min(upper_right).min(lower_left).min(lower_right); let max_point = upper_left.max(upper_right).max(lower_left).max(lower_right); RectF::from_points(min_point, max_point) diff --git a/geometry/src/unit_vector.rs b/geometry/src/unit_vector.rs index b46431b0..e06202dd 100644 --- a/geometry/src/unit_vector.rs +++ b/geometry/src/unit_vector.rs @@ -11,7 +11,7 @@ //! A utility module that allows unit vectors to be treated like angles. use crate::vector::Vector2F; -use pathfinder_simd::default::F32x4; +use pathfinder_simd::default::F32x2; #[derive(Clone, Copy, Debug)] pub struct UnitVector(pub Vector2F); @@ -25,14 +25,14 @@ impl UnitVector { /// Angle addition formula. #[inline] pub fn rotate_by(&self, other: UnitVector) -> UnitVector { - let products = (self.0).0.xyyx() * (other.0).0.xyxy(); + let products = (self.0).0.to_f32x4().xyyx() * (other.0).0.to_f32x4().xyxy(); UnitVector(Vector2F::new(products[0] - products[1], products[2] + products[3])) } /// Angle subtraction formula. #[inline] pub fn rev_rotate_by(&self, other: UnitVector) -> UnitVector { - let products = (self.0).0.xyyx() * (other.0).0.xyxy(); + let products = (self.0).0.to_f32x4().xyyx() * (other.0).0.to_f32x4().xyxy(); UnitVector(Vector2F::new(products[0] + products[1], products[2] - products[3])) } @@ -40,7 +40,7 @@ impl UnitVector { #[inline] pub fn halve_angle(&self) -> UnitVector { let x = self.0.x(); - let term = F32x4::new(x, -x, 0.0, 0.0); - UnitVector(Vector2F((F32x4::splat(0.5) * (F32x4::splat(1.0) + term)).sqrt())) + let term = F32x2::new(x, -x); + UnitVector(Vector2F((F32x2::splat(0.5) * (F32x2::splat(1.0) + term)).sqrt())) } } diff --git a/geometry/src/vector.rs b/geometry/src/vector.rs index 0d6e4941..91b1a14c 100644 --- a/geometry/src/vector.rs +++ b/geometry/src/vector.rs @@ -10,36 +10,36 @@ //! A SIMD-optimized point type. -use pathfinder_simd::default::{F32x4, I32x4}; +use pathfinder_simd::default::{F32x2, F32x4, I32x2}; use std::ops::{Add, AddAssign, Mul, Neg, Sub}; /// 2D points with 32-bit floating point coordinates. #[derive(Clone, Copy, Debug, Default)] -pub struct Vector2F(pub F32x4); +pub struct Vector2F(pub F32x2); impl Vector2F { #[inline] pub fn new(x: f32, y: f32) -> Vector2F { - Vector2F(F32x4::new(x, y, 0.0, 0.0)) + Vector2F(F32x2::new(x, y)) } #[inline] pub fn splat(value: f32) -> Vector2F { - Vector2F(F32x4::splat(value)) + Vector2F(F32x2::splat(value)) } #[inline] pub fn to_3d(self) -> Vector4F { - Vector4F(self.0.concat_xy_xy(F32x4::new(0.0, 1.0, 0.0, 0.0))) + Vector4F(self.0.to_f32x4().concat_xy_zw(F32x4::new(0.0, 0.0, 0.0, 1.0))) } #[inline] - pub fn x(&self) -> f32 { + pub fn x(self) -> f32 { self.0[0] } #[inline] - pub fn y(&self) -> f32 { + pub fn y(self) -> f32 { self.0[1] } @@ -54,97 +54,96 @@ impl Vector2F { } #[inline] - pub fn min(&self, other: Vector2F) -> Vector2F { + pub fn min(self, other: Vector2F) -> Vector2F { Vector2F(self.0.min(other.0)) } #[inline] - pub fn max(&self, other: Vector2F) -> Vector2F { + pub fn max(self, other: Vector2F) -> Vector2F { Vector2F(self.0.max(other.0)) } #[inline] - pub fn clamp(&self, min_val: Vector2F, max_val: Vector2F) -> Vector2F { + pub fn clamp(self, min_val: Vector2F, max_val: Vector2F) -> Vector2F { self.max(min_val).min(max_val) } #[inline] - pub fn det(&self, other: Vector2F) -> f32 { + pub fn det(self, other: Vector2F) -> f32 { self.x() * other.y() - self.y() * other.x() } #[inline] - pub fn dot(&self, other: Vector2F) -> f32 { + pub fn dot(self, other: Vector2F) -> f32 { let xy = self.0 * other.0; xy.x() + xy.y() } #[inline] - pub fn scale(&self, x: f32) -> Vector2F { - Vector2F(self.0 * F32x4::splat(x)) + pub fn scale(self, x: f32) -> Vector2F { + Vector2F(self.0 * F32x2::splat(x)) } #[inline] - pub fn scale_xy(&self, factors: Vector2F) -> Vector2F { + pub fn scale_xy(self, factors: Vector2F) -> Vector2F { Vector2F(self.0 * factors.0) } #[inline] - pub fn floor(&self) -> Vector2F { + pub fn floor(self) -> Vector2F { Vector2F(self.0.floor()) } #[inline] - pub fn ceil(&self) -> Vector2F { + pub fn ceil(self) -> Vector2F { Vector2F(self.0.ceil()) } /// Treats this point as a vector and calculates its squared length. #[inline] - pub fn square_length(&self) -> f32 { + pub fn square_length(self) -> f32 { let squared = self.0 * self.0; squared[0] + squared[1] } /// Treats this point as a vector and calculates its length. #[inline] - pub fn length(&self) -> f32 { + pub fn length(self) -> f32 { f32::sqrt(self.square_length()) } /// Treats this point as a vector and normalizes it. #[inline] - pub fn normalize(&self) -> Vector2F { + pub fn normalize(self) -> Vector2F { self.scale(1.0 / self.length()) } /// Swaps y and x. #[inline] - pub fn yx(&self) -> Vector2F { - Vector2F(self.0.yxwz()) + pub fn yx(self) -> Vector2F { + Vector2F(self.0.yx()) } #[inline] - pub fn is_zero(&self) -> bool { - *self == Vector2F::default() + pub fn is_zero(self) -> bool { + self == Vector2F::default() } #[inline] - pub fn lerp(&self, other: Vector2F, t: f32) -> Vector2F { - *self + (other - *self).scale(t) + pub fn lerp(self, other: Vector2F, t: f32) -> Vector2F { + self + (other - self).scale(t) } #[inline] - pub fn to_i32(&self) -> Vector2I { - Vector2I(self.0.to_i32x4()) + pub fn to_i32(self) -> Vector2I { + Vector2I(self.0.to_i32x2()) } } impl PartialEq for Vector2F { #[inline] fn eq(&self, other: &Vector2F) -> bool { - let results = self.0.packed_eq(other.0); - results[0] != 0 && results[1] != 0 + self.0.packed_eq(other.0).is_all_ones() } } @@ -182,26 +181,26 @@ impl Neg for Vector2F { /// 2D points with 32-bit signed integer coordinates. #[derive(Clone, Copy, Debug, Default)] -pub struct Vector2I(pub I32x4); +pub struct Vector2I(pub I32x2); impl Vector2I { #[inline] pub fn new(x: i32, y: i32) -> Vector2I { - Vector2I(I32x4::new(x, y, 0, 0)) + Vector2I(I32x2::new(x, y)) } #[inline] pub fn splat(value: i32) -> Vector2I { - Vector2I(I32x4::splat(value)) + Vector2I(I32x2::splat(value)) } #[inline] - pub fn x(&self) -> i32 { + pub fn x(self) -> i32 { self.0[0] } #[inline] - pub fn y(&self) -> i32 { + pub fn y(self) -> i32 { self.0[1] } @@ -216,18 +215,18 @@ impl Vector2I { } #[inline] - pub fn scale(&self, factor: i32) -> Vector2I { - Vector2I(self.0 * I32x4::splat(factor)) + pub fn scale(self, factor: i32) -> Vector2I { + Vector2I(self.0 * I32x2::splat(factor)) } #[inline] - pub fn scale_xy(&self, factors: Vector2I) -> Vector2I { + pub fn scale_xy(self, factors: Vector2I) -> Vector2I { Vector2I(self.0 * factors.0) } #[inline] - pub fn to_f32(&self) -> Vector2F { - Vector2F(self.0.to_f32x4()) + pub fn to_f32(self) -> Vector2F { + Vector2F(self.0.to_f32x2()) } } @@ -257,8 +256,7 @@ impl Sub for Vector2I { impl PartialEq for Vector2I { #[inline] fn eq(&self, other: &Vector2I) -> bool { - let results = self.0.packed_eq(other.0); - results[0] != 0 && results[1] != 0 + self.0.packed_eq(other.0).is_all_ones() } } @@ -279,7 +277,7 @@ impl Vector4F { #[inline] pub fn to_2d(self) -> Vector2F { - Vector2F(self.0) + Vector2F(self.0.xy()) } #[inline] @@ -303,7 +301,7 @@ impl Vector4F { } #[inline] - pub fn scale(&self, x: f32) -> Vector4F { + pub fn scale(self, x: f32) -> Vector4F { let mut factors = F32x4::splat(x); factors[3] = 1.0; Vector4F(self.0 * factors) @@ -335,7 +333,7 @@ impl Vector4F { } #[inline] - pub fn approx_eq(&self, other: &Vector4F, epsilon: f32) -> bool { + pub fn approx_eq(self, other: Vector4F, epsilon: f32) -> bool { self.0.approx_eq(other.0, epsilon) } diff --git a/gpu/src/lib.rs b/gpu/src/lib.rs index 70dcd132..16e10191 100644 --- a/gpu/src/lib.rs +++ b/gpu/src/lib.rs @@ -16,7 +16,7 @@ use pathfinder_content::color::ColorF; use pathfinder_geometry::rect::RectI; use pathfinder_geometry::transform3d::Transform3DF; use pathfinder_geometry::vector::Vector2I; -use pathfinder_simd::default::F32x4; +use pathfinder_simd::default::{F32x2, F32x4}; use std::time::Duration; pub mod resources; @@ -153,7 +153,7 @@ pub enum ShaderKind { pub enum UniformData { Int(i32), Mat4([F32x4; 4]), - Vec2(F32x4), + Vec2(F32x2), Vec4(F32x4), TextureUnit(u32), } diff --git a/metal/src/lib.rs b/metal/src/lib.rs index 41a11c80..1535acd1 100644 --- a/metal/src/lib.rs +++ b/metal/src/lib.rs @@ -47,7 +47,7 @@ use pathfinder_gpu::{BlendState, BufferData, BufferTarget, BufferUploadMode, Dep use pathfinder_gpu::{Primitive, RenderState, RenderTarget, ShaderKind, StencilFunc, TextureData}; use pathfinder_gpu::{TextureFormat, UniformData, VertexAttrClass}; use pathfinder_gpu::{VertexAttrDescriptor, VertexAttrType}; -use pathfinder_simd::default::F32x4; +use pathfinder_simd::default::{F32x2, F32x4}; use std::cell::{Cell, RefCell}; use std::mem; use std::ptr; @@ -1146,7 +1146,7 @@ impl UniformDataExt for UniformData { Some(slice::from_raw_parts(&data[0] as *const F32x4 as *const u8, 4 * 16)) } UniformData::Vec2(ref data) => { - Some(slice::from_raw_parts(data as *const F32x4 as *const u8, 4 * 2)) + Some(slice::from_raw_parts(data as *const F32x2 as *const u8, 4 * 2)) } UniformData::Vec4(ref data) => { Some(slice::from_raw_parts(data as *const F32x4 as *const u8, 4 * 4)) diff --git a/renderer/src/builder.rs b/renderer/src/builder.rs index b3d4ca1c..9e53c492 100644 --- a/renderer/src/builder.rs +++ b/renderer/src/builder.rs @@ -160,7 +160,7 @@ impl BuiltObject { fn add_fill( &mut self, builder: &SceneBuilder, - segment: &LineSegment2F, + segment: LineSegment2F, tile_coords: Vector2I, ) { debug!("add_fill({:?} ({:?}))", segment, tile_coords); @@ -171,31 +171,19 @@ impl BuiltObject { }; debug_assert_eq!(TILE_WIDTH, TILE_HEIGHT); + + // Compute the upper left corner of the tile. let tile_size = F32x4::splat(TILE_WIDTH as f32); - let (min, max) = ( - F32x4::default(), - F32x4::splat((TILE_WIDTH * 256 - 1) as f32), - ); - let shuffle_mask = I32x4::new(0x0c08_0400, 0x0d05_0901, 0, 0).as_u8x16(); - - let tile_upper_left = tile_coords.to_f32().0.xyxy() * tile_size; + let tile_upper_left = tile_coords.to_f32().0.to_f32x4().xyxy() * tile_size; + // Convert to 4.8 fixed point. let segment = (segment.0 - tile_upper_left) * F32x4::splat(256.0); - let segment = segment - .clamp(min, max) - .to_i32x4() - .as_u8x16() - .shuffle(shuffle_mask) - .as_i32x4(); - - // Unpack whole and fractional pixels. - let px = LineSegmentU4((segment[1] | (segment[1] >> 12)) as u16); - let subpx = LineSegmentU8(segment[0] as u32); + let (min, max) = (F32x4::default(), F32x4::splat((TILE_WIDTH * 256 - 1) as f32)); + let segment = segment.clamp(min, max).to_i32x4(); + let (from_x, from_y, to_x, to_y) = (segment[0], segment[1], segment[2], segment[3]); // Cull degenerate fills. - if (px.0 & 0xf) as u8 == ((px.0 >> 8) & 0xf) as u8 - && (subpx.0 & 0xff) as u8 == ((subpx.0 >> 16) & 0xff) as u8 - { + if from_x == to_x { debug!("... culling!"); return; } @@ -203,10 +191,20 @@ impl BuiltObject { // Allocate global tile if necessary. let alpha_tile_index = self.get_or_allocate_alpha_tile_index(builder, tile_coords); + // Pack whole pixels. + let mut px = (segment & I32x4::splat(0xf00)) >> I32x4::new(8, 4, 8, 4); + px = px | px.yxwz(); + + // Pack instance data. debug!("... OK, pushing"); self.fills.push(FillBatchPrimitive { - px, - subpx, + px: LineSegmentU4 { from: px[0] as u8, to: px[2] as u8 }, + subpx: LineSegmentU8 { + from_x: from_x as u8, + from_y: from_y as u8, + to_x: to_x as u8, + to_y: to_y as u8, + }, alpha_tile_index, }); } @@ -256,7 +254,7 @@ impl BuiltObject { ); while winding != 0 { - self.add_fill(builder, &segment, tile_coords); + self.add_fill(builder, segment, tile_coords); if winding < 0 { winding += 1 } else { @@ -315,7 +313,7 @@ impl BuiltObject { let fill_segment = LineSegment2F::new(fill_from, fill_to); let fill_tile_coords = Vector2I::new(subsegment_tile_x, tile_y); - self.add_fill(builder, &fill_segment, fill_tile_coords); + self.add_fill(builder, fill_segment, fill_tile_coords); } } diff --git a/renderer/src/gpu/renderer.rs b/renderer/src/gpu/renderer.rs index 7280dd0e..ce6ff354 100644 --- a/renderer/src/gpu/renderer.rs +++ b/renderer/src/gpu/renderer.rs @@ -23,7 +23,7 @@ use pathfinder_gpu::{BlendState, BufferData, BufferTarget, BufferUploadMode, Cle use pathfinder_gpu::{DepthFunc, DepthState, Device, Primitive, RenderOptions, RenderState}; use pathfinder_gpu::{RenderTarget, StencilFunc, StencilState, TextureFormat, UniformData}; use pathfinder_gpu::{VertexAttrClass, VertexAttrDescriptor, VertexAttrType}; -use pathfinder_simd::default::{F32x4, I32x4}; +use pathfinder_simd::default::{F32x2, F32x4}; use std::cmp; use std::collections::VecDeque; use std::mem; @@ -447,15 +447,10 @@ where textures: &[&self.area_lut_texture], uniforms: &[ (&self.fill_program.framebuffer_size_uniform, - UniformData::Vec2(I32x4::new(MASK_FRAMEBUFFER_WIDTH, - MASK_FRAMEBUFFER_HEIGHT, - 0, - 0).to_f32x4())), + UniformData::Vec2(F32x2::new(MASK_FRAMEBUFFER_WIDTH as f32, + MASK_FRAMEBUFFER_HEIGHT as f32))), (&self.fill_program.tile_size_uniform, - UniformData::Vec2(I32x4::new(TILE_WIDTH as i32, - TILE_HEIGHT as i32, - 0, - 0).to_f32x4())), + UniformData::Vec2(F32x2::new(TILE_WIDTH as f32, TILE_HEIGHT as f32))), (&self.fill_program.area_lut_uniform, UniformData::TextureUnit(0)), ], viewport: self.mask_viewport(), @@ -475,7 +470,7 @@ where fn tile_transform(&self) -> Transform3DF { let draw_viewport = self.draw_viewport().size().to_f32(); - let scale = F32x4::new(2.0 / draw_viewport.x(), -2.0 / draw_viewport.y(), 1.0, 1.0); + let scale = F32x2::new(2.0 / draw_viewport.x(), -2.0 / draw_viewport.y()); let transform = Transform3DF::from_scale(scale.x(), scale.y(), 1.0); Transform3DF::from_translation(-1.0, 1.0, 0.0).post_mul(&transform) } @@ -491,16 +486,11 @@ where (&alpha_tile_program.transform_uniform, UniformData::Mat4(self.tile_transform().to_columns())), (&alpha_tile_program.tile_size_uniform, - UniformData::Vec2(I32x4::new(TILE_WIDTH as i32, - TILE_HEIGHT as i32, - 0, - 0).to_f32x4())), + UniformData::Vec2(F32x2::new(TILE_WIDTH as f32, TILE_HEIGHT as f32))), (&alpha_tile_program.stencil_texture_uniform, UniformData::TextureUnit(0)), (&alpha_tile_program.stencil_texture_size_uniform, - UniformData::Vec2(I32x4::new(MASK_FRAMEBUFFER_WIDTH, - MASK_FRAMEBUFFER_HEIGHT, - 0, - 0).to_f32x4())), + UniformData::Vec2(F32x2::new(MASK_FRAMEBUFFER_WIDTH as f32, + MASK_FRAMEBUFFER_HEIGHT as f32))), ]; match self.render_mode { @@ -513,7 +503,7 @@ where UniformData::Vec2(self.device .texture_size(paint_texture) .0 - .to_f32x4()))); + .to_f32x2()))); } RenderMode::Monochrome { .. } if self.postprocessing_needed() => { uniforms.push((&self.alpha_monochrome_tile_program.color_uniform, @@ -555,10 +545,7 @@ where (&solid_tile_program.transform_uniform, UniformData::Mat4(self.tile_transform().to_columns())), (&solid_tile_program.tile_size_uniform, - UniformData::Vec2(I32x4::new(TILE_WIDTH as i32, - TILE_HEIGHT as i32, - 0, - 0).to_f32x4())), + UniformData::Vec2(F32x2::new(TILE_WIDTH as f32, TILE_HEIGHT as f32))), ]; match self.render_mode { @@ -571,7 +558,7 @@ where UniformData::Vec2(self.device .texture_size(paint_texture) .0 - .to_f32x4()))); + .to_f32x2()))); } RenderMode::Monochrome { .. } if self.postprocessing_needed() => { uniforms.push((&self.solid_monochrome_tile_program.color_uniform, @@ -636,7 +623,7 @@ where UniformData::Vec2(main_viewport.size().to_f32().0)), (&self.postprocess_program.source_uniform, UniformData::TextureUnit(0)), (&self.postprocess_program.source_size_uniform, - UniformData::Vec2(source_texture_size.0.to_f32x4())), + UniformData::Vec2(source_texture_size.0.to_f32x2())), (&self.postprocess_program.gamma_lut_uniform, UniformData::TextureUnit(1)), (&self.postprocess_program.fg_color_uniform, UniformData::Vec4(fg_color.0)), (&self.postprocess_program.bg_color_uniform, UniformData::Vec4(bg_color.0)), diff --git a/renderer/src/tile_map.rs b/renderer/src/tile_map.rs index f0d5ac40..d0ca65ca 100644 --- a/renderer/src/tile_map.rs +++ b/renderer/src/tile_map.rs @@ -44,15 +44,11 @@ impl DenseTileMap { #[inline] pub fn coords_to_index(&self, coords: Vector2I) -> Option { - // TODO(pcwalton): SIMD? - if coords.x() < self.rect.min_x() - || coords.x() >= self.rect.max_x() - || coords.y() < self.rect.min_y() - || coords.y() >= self.rect.max_y() - { - return None; + if self.rect.contains_point(coords) { + Some(self.coords_to_index_unchecked(coords)) + } else { + None } - Some(self.coords_to_index_unchecked(coords)) } #[inline] diff --git a/renderer/src/tiles.rs b/renderer/src/tiles.rs index 0c98f0a5..60384f8f 100644 --- a/renderer/src/tiles.rs +++ b/renderer/src/tiles.rs @@ -413,14 +413,11 @@ impl ActiveEdge { } else { segment.baseline.to() }; - ActiveEdge::from_segment_and_crossing(segment, &crossing) + ActiveEdge::from_segment_and_crossing(segment, crossing) } - fn from_segment_and_crossing(segment: &Segment, crossing: &Vector2F) -> ActiveEdge { - ActiveEdge { - segment: *segment, - crossing: *crossing, - } + fn from_segment_and_crossing(segment: &Segment, crossing: Vector2F) -> ActiveEdge { + ActiveEdge { segment: *segment, crossing } } fn process(&mut self, builder: &SceneBuilder, built_object: &mut BuiltObject, tile_y: i32) { @@ -436,8 +433,8 @@ impl ActiveEdge { if segment.is_line() { let line_segment = segment.as_line_segment(); self.segment = - match self.process_line_segment(&line_segment, builder, built_object, tile_y) { - Some(lower_part) => Segment::line(&lower_part), + match self.process_line_segment(line_segment, builder, built_object, tile_y) { + Some(lower_part) => Segment::line(lower_part), None => Segment::none(), }; return; @@ -453,7 +450,7 @@ impl ActiveEdge { let first_line_segment = LineSegment2F::new(self.crossing, segment.baseline.upper_point()).orient(winding); if self - .process_line_segment(&first_line_segment, builder, built_object, tile_y) + .process_line_segment(first_line_segment, builder, built_object, tile_y) .is_some() { return; @@ -484,9 +481,9 @@ impl ActiveEdge { ); let line = before_segment.baseline.orient(winding); - match self.process_line_segment(&line, builder, built_object, tile_y) { - Some(ref lower_part) if split_t == 1.0 => { - self.segment = Segment::line(&lower_part); + match self.process_line_segment(line, builder, built_object, tile_y) { + Some(lower_part) if split_t == 1.0 => { + self.segment = Segment::line(lower_part); return; } None if split_t == 1.0 => { @@ -504,7 +501,7 @@ impl ActiveEdge { fn process_line_segment( &mut self, - line_segment: &LineSegment2F, + line_segment: LineSegment2F, builder: &SceneBuilder, built_object: &mut BuiltObject, tile_y: i32, @@ -516,7 +513,7 @@ impl ActiveEdge { ); if line_segment.max_y() <= tile_bottom { - built_object.generate_fill_primitives_for_line(builder, *line_segment, tile_y); + built_object.generate_fill_primitives_for_line(builder, line_segment, tile_y); return None; } diff --git a/simd/src/arm/mod.rs b/simd/src/arm/mod.rs index f73c7303..e1705812 100644 --- a/simd/src/arm/mod.rs +++ b/simd/src/arm/mod.rs @@ -8,17 +8,198 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use std::arch::aarch64::{self, float32x4_t, int32x4_t, uint32x4_t, uint64x2_t, uint8x16_t}; -use std::arch::aarch64::{uint8x8_t, uint8x8x2_t}; +use std::arch::aarch64::{self, float32x2_t, float32x4_t, int32x2_t, int32x4_t}; +use std::arch::aarch64::{uint32x2_t, uint32x4_t}; use std::f32; use std::fmt::{self, Debug, Formatter}; use std::mem; -use std::ops::{Add, Index, IndexMut, Mul, Sub}; +use std::ops::{Add, BitAnd, BitOr, Index, IndexMut, Mul, Shr, Sub}; mod swizzle_f32x4; mod swizzle_i32x4; -// 32-bit floats +// Two 32-bit floats + +#[derive(Clone, Copy)] +pub struct F32x2(pub float32x2_t); + +impl F32x2 { + // Constructors + + #[inline] + pub fn new(a: f32, b: f32) -> F32x2 { + unsafe { F32x2(mem::transmute([a, b])) } + } + + #[inline] + pub fn splat(x: f32) -> F32x2 { + F32x2::new(x, x) + } + + // Basic operations + + #[inline] + pub fn approx_recip(self) -> F32x2 { + unsafe { F32x2(vrecpe_v2f32(self.0)) } + } + + #[inline] + pub fn min(self, other: F32x2) -> F32x2 { + unsafe { F32x2(simd_fmin(self.0, other.0)) } + } + + #[inline] + pub fn max(self, other: F32x2) -> F32x2 { + unsafe { F32x2(simd_fmax(self.0, other.0)) } + } + + #[inline] + pub fn clamp(self, min: F32x2, max: F32x2) -> F32x2 { + self.max(min).min(max) + } + + #[inline] + pub fn abs(self) -> F32x2 { + unsafe { F32x2(fabs_v2f32(self.0)) } + } + + #[inline] + pub fn floor(self) -> F32x2 { + unsafe { F32x2(floor_v2f32(self.0)) } + } + + #[inline] + pub fn ceil(self) -> F32x2 { + unsafe { F32x2(ceil_v2f32(self.0)) } + } + + #[inline] + pub fn round(self) -> F32x2 { + unsafe { F32x2(round_v2f32(self.0)) } + } + + #[inline] + pub fn sqrt(self) -> F32x2 { + unsafe { F32x2(sqrt_v2f32(self.0)) } + } + + // Packed comparisons + + #[inline] + pub fn packed_eq(self, other: F32x2) -> U32x2 { + unsafe { U32x2(simd_eq(self.0, other.0)) } + } + + #[inline] + pub fn packed_gt(self, other: F32x2) -> U32x2 { + unsafe { U32x2(simd_gt(self.0, other.0)) } + } + + #[inline] + pub fn packed_lt(self, other: F32x2) -> U32x2 { + unsafe { U32x2(simd_lt(self.0, other.0)) } + } + + #[inline] + pub fn packed_le(self, other: F32x2) -> U32x2 { + unsafe { U32x2(simd_le(self.0, other.0)) } + } + + // Conversions + + #[inline] + pub fn to_f32x4(self) -> F32x4 { + self.concat_xy_xy(F32x2::default()) + } + + #[inline] + pub fn to_i32x2(self) -> I32x2 { + unsafe { I32x2(simd_cast(self.0)) } + } + + #[inline] + pub fn to_i32x4(self) -> I32x4 { + self.to_i32x2().concat_xy_xy(I32x2::default()) + } + + // Swizzle + + #[inline] + pub fn yx(self) -> F32x2 { + unsafe { F32x2(simd_shuffle2(self.0, self.0, [1, 0])) } + } + + // Concatenations + + #[inline] + pub fn concat_xy_xy(self, other: F32x2) -> F32x4 { + unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 0, 1])) } + } +} + +impl Default for F32x2 { + #[inline] + fn default() -> F32x2 { + F32x2::new(0.0, 0.0) + } +} + +impl Debug for F32x2 { + #[inline] + fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> { + write!(f, "<{}, {}>", self[0], self[1]) + } +} + +impl Index for F32x2 { + type Output = f32; + #[inline] + fn index(&self, index: usize) -> &f32 { + unsafe { + assert!(index < 2); + let ptr = &self.0 as *const float32x2_t as *const f32; + mem::transmute::<*const f32, &f32>(ptr.offset(index as isize)) + } + } +} + +impl IndexMut for F32x2 { + #[inline] + fn index_mut(&mut self, index: usize) -> &mut f32 { + unsafe { + assert!(index < 2); + let ptr = &mut self.0 as *mut float32x2_t as *mut f32; + mem::transmute::<*mut f32, &mut f32>(ptr.offset(index as isize)) + } + } +} + + +impl Add for F32x2 { + type Output = F32x2; + #[inline] + fn add(self, other: F32x2) -> F32x2 { + unsafe { F32x2(simd_add(self.0, other.0)) } + } +} + +impl Mul for F32x2 { + type Output = F32x2; + #[inline] + fn mul(self, other: F32x2) -> F32x2 { + unsafe { F32x2(simd_mul(self.0, other.0)) } + } +} + +impl Sub for F32x2 { + type Output = F32x2; + #[inline] + fn sub(self, other: F32x2) -> F32x2 { + unsafe { F32x2(simd_sub(self.0, other.0)) } + } +} + +// Four 32-bit floats #[derive(Clone, Copy)] pub struct F32x4(pub float32x4_t); @@ -103,32 +284,56 @@ impl F32x4 { unsafe { U32x4(simd_lt(self.0, other.0)) } } - // Converts these packed floats to integers. + // Swizzle conversions + #[inline] - pub fn to_i32x4(self) -> I32x4 { - unsafe { I32x4(simd_cast(self.0)) } + pub fn xy(self) -> F32x2 { + unsafe { F32x2(simd_shuffle2(self.0, self.0, [0, 1])) } + } + + #[inline] + pub fn yx(self) -> F32x2 { + unsafe { F32x2(simd_shuffle2(self.0, self.0, [1, 0])) } + } + + #[inline] + pub fn xw(self) -> F32x2 { + unsafe { F32x2(simd_shuffle2(self.0, self.0, [0, 3])) } + } + + #[inline] + pub fn zy(self) -> F32x2 { + unsafe { F32x2(simd_shuffle2(self.0, self.0, [2, 1])) } + } + + #[inline] + pub fn zw(self) -> F32x2 { + unsafe { F32x2(simd_shuffle2(self.0, self.0, [2, 3])) } } // Concatenations #[inline] pub fn concat_xy_xy(self, other: F32x4) -> F32x4 { - unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 4, 5])) } + unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 0, 1])) } } #[inline] pub fn concat_xy_zw(self, other: F32x4) -> F32x4 { - unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 6, 7])) } + unsafe { F32x4(simd_shuffle4(self.0, other.0, [0, 1, 2, 3])) } } #[inline] pub fn concat_zw_zw(self, other: F32x4) -> F32x4 { - unsafe { F32x4(simd_shuffle4(self.0, other.0, [2, 3, 6, 7])) } + unsafe { F32x4(simd_shuffle4(self.0, other.0, [2, 3, 2, 3])) } } + // Conversions + + // Converts these packed floats to integers. #[inline] - pub fn concat_wz_yx(self, other: F32x4) -> F32x4 { - unsafe { F32x4(simd_shuffle4(self.0, other.0, [3, 2, 5, 4])) } + pub fn to_i32x4(self) -> I32x4 { + unsafe { I32x4(simd_cast(self.0)) } } } @@ -200,7 +405,105 @@ impl Sub for F32x4 { } } -// 32-bit signed integers +// Two 32-bit signed integers + +#[derive(Clone, Copy, Debug)] +pub struct I32x2(pub int32x2_t); + +impl I32x2 { + #[inline] + pub fn new(x: i32, y: i32) -> I32x2 { + unsafe { I32x2(mem::transmute([x, y])) } + } + + #[inline] + pub fn splat(x: i32) -> I32x2 { + I32x2::new(x, x) + } + + #[inline] + pub fn packed_eq(self, other: I32x2) -> U32x2 { + unsafe { U32x2(simd_eq(self.0, other.0)) } + } + + // Concatenations + + #[inline] + pub fn concat_xy_xy(self, other: I32x2) -> I32x4 { + unsafe { I32x4(simd_shuffle4(self.0, other.0, [0, 1, 0, 1])) } + } + + // Conversions + + /// Converts these packed integers to floats. + #[inline] + pub fn to_f32x2(self) -> F32x2 { + unsafe { F32x2(simd_cast(self.0)) } + } +} + +impl Default for I32x2 { + #[inline] + fn default() -> I32x2 { + I32x2::splat(0) + } +} + +impl PartialEq for I32x2 { + #[inline] + fn eq(&self, other: &I32x2) -> bool { + self.packed_eq(*other).is_all_ones() + } +} + +impl Index for I32x2 { + type Output = i32; + #[inline] + fn index(&self, index: usize) -> &i32 { + unsafe { + assert!(index < 2); + let ptr = &self.0 as *const int32x2_t as *const i32; + mem::transmute::<*const i32, &i32>(ptr.offset(index as isize)) + } + } +} + +impl IndexMut for I32x2 { + #[inline] + fn index_mut(&mut self, index: usize) -> &mut i32 { + unsafe { + assert!(index < 2); + let ptr = &mut self.0 as *mut int32x2_t as *mut i32; + mem::transmute::<*mut i32, &mut i32>(ptr.offset(index as isize)) + } + } +} + +impl Add for I32x2 { + type Output = I32x2; + #[inline] + fn add(self, other: I32x2) -> I32x2 { + unsafe { I32x2(simd_add(self.0, other.0)) } + } +} + +impl Sub for I32x2 { + type Output = I32x2; + #[inline] + fn sub(self, other: I32x2) -> I32x2 { + unsafe { I32x2(simd_sub(self.0, other.0)) } + } +} + +impl Mul for I32x2 { + type Output = I32x2; + #[inline] + fn mul(self, other: I32x2) -> I32x2 { + unsafe { I32x2(simd_mul(self.0, other.0)) } + } +} + +// Four 32-bit signed integers #[derive(Clone, Copy, Debug)] pub struct I32x4(pub int32x4_t); @@ -216,11 +519,6 @@ impl I32x4 { I32x4::new(x, x, x, x) } - #[inline] - pub fn as_u8x16(self) -> U8x16 { - unsafe { U8x16(*mem::transmute::<&int32x4_t, &uint8x16_t>(&self.0)) } - } - #[inline] pub fn min(self, other: I32x4) -> I32x4 { unsafe { I32x4(simd_fmin(self.0, other.0)) } @@ -245,6 +543,33 @@ impl I32x4 { unsafe { I32x4(simd_shuffle4(self.0, other.0, [0, 1, 4, 5])) } } + // Swizzle conversions + + #[inline] + pub fn xy(self) -> I32x2 { + unsafe { I32x2(simd_shuffle2(self.0, self.0, [0, 1])) } + } + + #[inline] + pub fn yx(self) -> I32x2 { + unsafe { I32x2(simd_shuffle2(self.0, self.0, [1, 0])) } + } + + #[inline] + pub fn xw(self) -> I32x2 { + unsafe { I32x2(simd_shuffle2(self.0, self.0, [0, 3])) } + } + + #[inline] + pub fn zy(self) -> I32x2 { + unsafe { I32x2(simd_shuffle2(self.0, self.0, [2, 1])) } + } + + #[inline] + pub fn zw(self) -> I32x2 { + unsafe { I32x2(simd_shuffle2(self.0, self.0, [2, 3])) } + } + // Conversions /// Converts these packed integers to floats. @@ -315,7 +640,60 @@ impl PartialEq for I32x4 { } } -// 32-bit unsigned integers +impl BitAnd for I32x4 { + type Output = I32x4; + #[inline] + fn bitand(self, other: I32x4) -> I32x4 { + unsafe { I32x4(simd_and(self.0, other.0)) } + } +} + +impl BitOr for I32x4 { + type Output = I32x4; + #[inline] + fn bitor(self, other: I32x4) -> I32x4 { + unsafe { I32x4(simd_or(self.0, other.0)) } + } +} + +impl Shr for I32x4 { + type Output = I32x4; + #[inline] + fn shr(self, other: I32x4) -> I32x4 { + unsafe { I32x4(simd_shr(self.0, other.0)) } + } +} + +// Two 32-bit unsigned integers + +#[derive(Clone, Copy)] +pub struct U32x2(pub uint32x2_t); + +impl U32x2 { + #[inline] + pub fn is_all_ones(&self) -> bool { + unsafe { aarch64::vminv_u32(self.0) == !0 } + } + + #[inline] + pub fn is_all_zeroes(&self) -> bool { + unsafe { aarch64::vmaxv_u32(self.0) == 0 } + } +} + +impl Index for U32x2 { + type Output = u32; + #[inline] + fn index(&self, index: usize) -> &u32 { + unsafe { + assert!(index < 2); + let ptr = &self.0 as *const uint32x2_t as *const u32; + mem::transmute::<*const u32, &u32>(ptr.offset(index as isize)) + } + } +} + +// Four 32-bit unsigned integers #[derive(Clone, Copy)] pub struct U32x4(pub uint32x4_t); @@ -344,44 +722,6 @@ impl Index for U32x4 { } } -// 8-bit unsigned integers - -#[derive(Clone, Copy)] -pub struct U8x16(pub uint8x16_t); - -impl U8x16 { - #[inline] - pub fn as_i32x4(self) -> I32x4 { - unsafe { I32x4(*mem::transmute::<&uint8x16_t, &int32x4_t>(&self.0)) } - } - - #[inline] - pub fn shuffle(self, indices: U8x16) -> U8x16 { - unsafe { - let table = mem::transmute::(self.0); - let low = aarch64::vtbl2_u8(table, indices.extract_low()); - let high = aarch64::vtbl2_u8(table, indices.extract_high()); - U8x16(aarch64::vcombine_u8(low, high)) - } - } - - #[inline] - fn extract_low(self) -> uint8x8_t { - unsafe { - let low = simd_extract(mem::transmute::(self.0), 0); - mem::transmute::(low) - } - } - - #[inline] - fn extract_high(self) -> uint8x8_t { - unsafe { - let high = simd_extract(mem::transmute::(self.0), 1); - mem::transmute::(high) - } - } -} - // Intrinsics extern "platform-intrinsic" { @@ -389,6 +729,11 @@ extern "platform-intrinsic" { fn simd_mul(x: T, y: T) -> T; fn simd_sub(x: T, y: T) -> T; + fn simd_shr(x: T, y: T) -> T; + + fn simd_and(x: T, y: T) -> T; + fn simd_or(x: T, y: T) -> T; + fn simd_fmin(x: T, y: T) -> T; fn simd_fmax(x: T, y: T) -> T; @@ -397,15 +742,24 @@ extern "platform-intrinsic" { fn simd_le(x: T, y: T) -> U; fn simd_lt(x: T, y: T) -> U; + fn simd_shuffle2(x: T, y: T, idx: [u32; 2]) -> U; fn simd_shuffle4(x: T, y: T, idx: [u32; 4]) -> U; fn simd_cast(x: T) -> U; - - fn simd_insert(x: T, index: u32, value: U) -> T; - fn simd_extract(x: T, index: u32) -> U; } extern "C" { + #[link_name = "llvm.fabs.v2f32"] + fn fabs_v2f32(a: float32x2_t) -> float32x2_t; + #[link_name = "llvm.floor.v2f32"] + fn floor_v2f32(a: float32x2_t) -> float32x2_t; + #[link_name = "llvm.ceil.v2f32"] + fn ceil_v2f32(a: float32x2_t) -> float32x2_t; + #[link_name = "llvm.round.v2f32"] + fn round_v2f32(a: float32x2_t) -> float32x2_t; + #[link_name = "llvm.sqrt.v2f32"] + fn sqrt_v2f32(a: float32x2_t) -> float32x2_t; + #[link_name = "llvm.fabs.v4f32"] fn fabs_v4f32(a: float32x4_t) -> float32x4_t; #[link_name = "llvm.floor.v4f32"] @@ -417,6 +771,9 @@ extern "C" { #[link_name = "llvm.sqrt.v4f32"] fn sqrt_v4f32(a: float32x4_t) -> float32x4_t; + #[link_name = "llvm.aarch64.neon.frecpe.v2f32"] + fn vrecpe_v2f32(a: float32x2_t) -> float32x2_t; + #[link_name = "llvm.aarch64.neon.frecpe.v4f32"] fn vrecpe_v4f32(a: float32x4_t) -> float32x4_t; } diff --git a/simd/src/extras.rs b/simd/src/extras.rs index 8abea386..55e265cf 100644 --- a/simd/src/extras.rs +++ b/simd/src/extras.rs @@ -8,10 +8,84 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use crate::default::{F32x4, I32x4}; +use crate::default::{F32x2, F32x4, I32x2, I32x4}; use std::ops::{AddAssign, MulAssign, Neg, SubAssign}; -// 32-bit floats +// Two 32-bit floats + +impl F32x2 { + // Constructors + + #[inline] + pub fn from_slice(slice: &[f32]) -> F32x2 { + F32x2::new(slice[0], slice[1]) + } + + // Accessors + + #[inline] + pub fn x(self) -> f32 { + self[0] + } + + #[inline] + pub fn y(self) -> f32 { + self[1] + } + + // Mutators + + #[inline] + pub fn set_x(&mut self, x: f32) { + self[0] = x + } + + #[inline] + pub fn set_y(&mut self, y: f32) { + self[1] = y + } + + // Comparisons + + #[inline] + pub fn approx_eq(self, other: F32x2, epsilon: f32) -> bool { + (self - other) + .abs() + .packed_gt(F32x2::splat(epsilon)) + .is_all_zeroes() + } +} + +impl AddAssign for F32x2 { + #[inline] + fn add_assign(&mut self, other: F32x2) { + *self = *self + other + } +} + +impl SubAssign for F32x2 { + #[inline] + fn sub_assign(&mut self, other: F32x2) { + *self = *self - other + } +} + +impl MulAssign for F32x2 { + #[inline] + fn mul_assign(&mut self, other: F32x2) { + *self = *self * other + } +} + +impl Neg for F32x2 { + type Output = F32x2; + #[inline] + fn neg(self) -> F32x2 { + F32x2::default() - self + } +} + +// Four 32-bit floats impl F32x4 { // Constructors @@ -105,7 +179,38 @@ impl Neg for F32x4 { } } -// 32-bit integers +// Two 32-bit integers + +impl AddAssign for I32x2 { + #[inline] + fn add_assign(&mut self, other: I32x2) { + *self = *self + other + } +} + +impl SubAssign for I32x2 { + #[inline] + fn sub_assign(&mut self, other: I32x2) { + *self = *self - other + } +} + +impl MulAssign for I32x2 { + #[inline] + fn mul_assign(&mut self, other: I32x2) { + *self = *self * other + } +} + +impl Neg for I32x2 { + type Output = I32x2; + #[inline] + fn neg(self) -> I32x2 { + I32x2::default() - self + } +} + +// Four 32-bit integers impl AddAssign for I32x4 { #[inline] diff --git a/simd/src/scalar/mod.rs b/simd/src/scalar/mod.rs index 19049d01..867dfe4b 100644 --- a/simd/src/scalar/mod.rs +++ b/simd/src/scalar/mod.rs @@ -10,13 +10,182 @@ use std::f32; use std::fmt::{self, Debug, Formatter}; -use std::mem; -use std::ops::{Add, Index, IndexMut, Mul, Sub}; +use std::ops::{Add, BitAnd, BitOr, Index, IndexMut, Mul, Shr, Sub}; mod swizzle_f32x4; mod swizzle_i32x4; -// 32-bit floats +// Two 32-bit floats + +#[derive(Clone, Copy, Debug, Default, PartialEq)] +pub struct F32x2(pub [f32; 2]); + +impl F32x2 { + // Constructors + + #[inline] + pub fn new(a: f32, b: f32) -> F32x2 { + F32x2([a, b]) + } + + #[inline] + pub fn splat(x: f32) -> F32x2 { + F32x2([x, x]) + } + + // Basic operations + + #[inline] + pub fn approx_recip(self) -> F32x2 { + F32x2([1.0 / self[0], 1.0 / self[1]]) + } + + #[inline] + pub fn min(self, other: F32x2) -> F32x2 { + F32x2([f32::min(self[0], other[0]), f32::min(self[1], other[1])]) + } + + #[inline] + pub fn max(self, other: F32x2) -> F32x2 { + F32x2([f32::max(self[0], other[0]), f32::max(self[1], other[1])]) + } + + #[inline] + pub fn clamp(self, min: F32x2, max: F32x2) -> F32x2 { + self.max(min).min(max) + } + + #[inline] + pub fn abs(self) -> F32x2 { + F32x2([self[0].abs(), self[1].abs()]) + } + + #[inline] + pub fn floor(self) -> F32x2 { + F32x2([self[0].floor(), self[1].floor()]) + } + + #[inline] + pub fn ceil(self) -> F32x2 { + F32x2([self[0].ceil(), self[1].ceil()]) + } + + #[inline] + pub fn round(self) -> F32x2 { + F32x2([self[0].round(), self[1].round()]) + } + + #[inline] + pub fn sqrt(self) -> F32x2 { + F32x2([self[0].sqrt(), self[1].sqrt()]) + } + + // Packed comparisons + + #[inline] + pub fn packed_eq(self, other: F32x2) -> U32x2 { + U32x2([ + if self[0] == other[0] { !0 } else { 0 }, + if self[1] == other[1] { !0 } else { 0 }, + ]) + } + + #[inline] + pub fn packed_gt(self, other: F32x2) -> U32x2 { + U32x2([ + if self[0] > other[0] { !0 } else { 0 }, + if self[1] > other[1] { !0 } else { 0 }, + ]) + } + + #[inline] + pub fn packed_lt(self, other: F32x2) -> U32x2 { + U32x2([ + if self[0] < other[0] { !0 } else { 0 }, + if self[1] < other[1] { !0 } else { 0 }, + ]) + } + + #[inline] + pub fn packed_le(self, other: F32x2) -> U32x2 { + U32x2([ + if self[0] <= other[0] { !0 } else { 0 }, + if self[1] <= other[1] { !0 } else { 0 }, + ]) + } + + // Conversions + + #[inline] + pub fn to_f32x4(self) -> F32x4 { + F32x4([self[0] as f32, self[1] as f32, 0.0, 0.0]) + } + + #[inline] + pub fn to_i32x2(self) -> I32x2 { + I32x2([self[0] as i32, self[1] as i32]) + } + + #[inline] + pub fn to_i32x4(self) -> I32x4 { + I32x4([self[0] as i32, self[1] as i32, 0, 0]) + } + + // Swizzle + + #[inline] + pub fn yx(self) -> F32x2 { + F32x2([self[1], self[0]]) + } + + // Concatenations + + #[inline] + pub fn concat_xy_xy(self, other: F32x2) -> F32x4 { + F32x4([self[0], self[1], other[0], other[1]]) + } +} + +impl Index for F32x2 { + type Output = f32; + #[inline] + fn index(&self, index: usize) -> &f32 { + &self.0[index] + } +} + +impl IndexMut for F32x2 { + #[inline] + fn index_mut(&mut self, index: usize) -> &mut f32 { + &mut self.0[index] + } +} + +impl Add for F32x2 { + type Output = F32x2; + #[inline] + fn add(self, other: F32x2) -> F32x2 { + F32x2([self[0] + other[0], self[1] + other[1]]) + } +} + +impl Mul for F32x2 { + type Output = F32x2; + #[inline] + fn mul(self, other: F32x2) -> F32x2 { + F32x2([self[0] * other[0], self[1] * other[1]]) + } +} + +impl Sub for F32x2 { + type Output = F32x2; + #[inline] + fn sub(self, other: F32x2) -> F32x2 { + F32x2([self[0] - other[0], self[1] - other[1]]) + } +} + +// Four 32-bit floats #[derive(Clone, Copy, Default, PartialEq)] pub struct F32x4(pub [f32; 4]); @@ -162,6 +331,33 @@ impl F32x4 { ]) } + // Swizzle conversions + + #[inline] + pub fn xy(self) -> F32x2 { + F32x2([self[0], self[1]]) + } + + #[inline] + pub fn xw(self) -> F32x2 { + F32x2([self[0], self[3]]) + } + + #[inline] + pub fn yx(self) -> F32x2 { + F32x2([self[1], self[0]]) + } + + #[inline] + pub fn zy(self) -> F32x2 { + F32x2([self[2], self[1]]) + } + + #[inline] + pub fn zw(self) -> F32x2 { + F32x2([self[2], self[3]]) + } + // Concatenations #[inline] @@ -246,7 +442,84 @@ impl Sub for F32x4 { } } -// 32-bit signed integers +// Two 32-bit signed integers + +#[derive(Clone, Copy, Default, Debug, PartialEq)] +pub struct I32x2([i32; 2]); + +impl I32x2 { + #[inline] + pub fn new(x: i32, y: i32) -> I32x2 { + I32x2([x, y]) + } + + #[inline] + pub fn splat(x: i32) -> I32x2 { + I32x2([x, x]) + } + + #[inline] + pub fn packed_eq(self, other: I32x2) -> U32x2 { + U32x2([ + if self[0] == other[0] { !0 } else { 0 }, + if self[1] == other[1] { !0 } else { 0 }, + ]) + } + + #[inline] + pub fn concat_xy_xy(self, other: I32x2) -> I32x4 { + I32x4([self[0], self[1], other[0], other[1]]) + } + + // Conversions + + /// Converts these packed integers to floats. + #[inline] + pub fn to_f32x2(self) -> F32x2 { + F32x2([self[0] as f32, self[1] as f32]) + } +} + +impl Index for I32x2 { + type Output = i32; + #[inline] + fn index(&self, index: usize) -> &i32 { + &self.0[index] + } +} + +impl IndexMut for I32x2 { + #[inline] + fn index_mut(&mut self, index: usize) -> &mut i32 { + &mut self.0[index] + } +} + +impl Add for I32x2 { + type Output = I32x2; + #[inline] + fn add(self, other: I32x2) -> I32x2 { + I32x2([self[0] + other[0], self[1] + other[1]]) + } +} + +impl Sub for I32x2 { + type Output = I32x2; + #[inline] + fn sub(self, other: I32x2) -> I32x2 { + I32x2([self[0] - other[0], self[1] - other[1]]) + } +} + +impl Mul for I32x2 { + type Output = I32x2; + #[inline] + fn mul(self, other: I32x2) -> I32x2 { + I32x2([self[0] * other[0], self[1] * other[1]]) + } +} + +// Four 32-bit signed integers #[derive(Clone, Copy, Default, Debug, PartialEq)] pub struct I32x4([i32; 4]); @@ -263,10 +536,6 @@ impl I32x4 { } #[inline] - pub fn as_u8x16(self) -> U8x16 { - unsafe { U8x16(*mem::transmute::<&[i32; 4], &[u8; 16]>(&self.0)) } - } - #[inline] pub fn min(self, other: I32x4) -> I32x4 { I32x4([ @@ -306,6 +575,28 @@ impl I32x4 { I32x4([self[0], self[1], other[0], other[1]]) } + // Swizzle conversions + + #[inline] + pub fn xy(self) -> I32x2 { + I32x2([self[0], self[1]]) + } + + #[inline] + pub fn xw(self) -> I32x2 { + I32x2([self[0], self[3]]) + } + + #[inline] + pub fn zy(self) -> I32x2 { + I32x2([self[2], self[1]]) + } + + #[inline] + pub fn zw(self) -> I32x2 { + I32x2([self[2], self[3]]) + } + // Conversions /// Converts these packed integers to floats. @@ -374,7 +665,61 @@ impl Mul for I32x4 { } } -// 32-bit unsigned integers +impl BitAnd for I32x4 { + type Output = I32x4; + #[inline] + fn bitand(self, other: I32x4) -> I32x4 { + I32x4([self[0] & other[0], self[1] & other[1], self[2] & other[2], self[3] & other[3]]) + } +} + +impl BitOr for I32x4 { + type Output = I32x4; + #[inline] + fn bitor(self, other: I32x4) -> I32x4 { + I32x4([self[0] | other[0], self[1] | other[1], self[2] | other[2], self[3] | other[3]]) + } +} + +impl Shr for I32x4 { + type Output = I32x4; + #[inline] + fn shr(self, other: I32x4) -> I32x4 { + I32x4([ + self[0] >> other[0], + self[1] >> other[1], + self[2] >> other[2], + self[3] >> other[3], + ]) + } +} + +// Two 32-bit unsigned integers + +#[derive(Clone, Copy)] +pub struct U32x2(pub [u32; 2]); + +impl U32x2 { + #[inline] + pub fn is_all_ones(&self) -> bool { + self[0] == !0 && self[1] == !0 + } + + #[inline] + pub fn is_all_zeroes(&self) -> bool { + self[0] == 0 && self[1] == 0 + } +} + +impl Index for U32x2 { + type Output = u32; + #[inline] + fn index(&self, index: usize) -> &u32 { + &self.0[index] + } +} + +// Four 32-bit unsigned integers #[derive(Clone, Copy)] pub struct U32x4(pub [u32; 4]); @@ -398,24 +743,3 @@ impl Index for U32x4 { &self.0[index] } } - -// 8-bit unsigned integers - -#[derive(Clone, Copy)] -pub struct U8x16([u8; 16]); - -impl U8x16 { - #[inline] - pub fn as_i32x4(self) -> I32x4 { - unsafe { I32x4(*mem::transmute::<&[u8; 16], &[i32; 4]>(&self.0)) } - } - - #[inline] - pub fn shuffle(self, indices: U8x16) -> U8x16 { - let mut result = [0; 16]; - for index in 0..16 { - result[index] = self.0[(indices.0[index] & 0x0f) as usize] - } - U8x16(result) - } -} diff --git a/simd/src/x86/mod.rs b/simd/src/x86/mod.rs index d10e1230..41e86e99 100644 --- a/simd/src/x86/mod.rs +++ b/simd/src/x86/mod.rs @@ -12,12 +12,195 @@ use std::arch::x86_64::{self, __m128, __m128i, _MM_FROUND_TO_NEAREST_INT}; use std::cmp::PartialEq; use std::fmt::{self, Debug, Formatter}; use std::mem; -use std::ops::{Add, BitXor, Index, IndexMut, Mul, Not, Sub}; +use std::ops::{Add, BitAnd, BitOr, BitXor, Index, IndexMut, Mul, Not, Shr, Sub}; mod swizzle_f32x4; mod swizzle_i32x4; -// 32-bit floats +// Two 32-bit floats + +#[derive(Clone, Copy)] +pub struct F32x2(pub u64); + +impl F32x2 { + // Constructors + + #[inline] + pub fn new(a: f32, b: f32) -> F32x2 { + unsafe { + let a = mem::transmute::<*const f32, *const u32>(&a); + let b = mem::transmute::<*const f32, *const u32>(&b); + F32x2((*a as u64) | ((*b as u64) << 32)) + } + } + + #[inline] + pub fn splat(x: f32) -> F32x2 { + F32x2::new(x, x) + } + + // Basic operations + + #[inline] + pub fn approx_recip(self) -> F32x2 { + self.to_f32x4().approx_recip().xy() + } + + #[inline] + pub fn min(self, other: F32x2) -> F32x2 { + self.to_f32x4().min(other.to_f32x4()).xy() + } + + #[inline] + pub fn max(self, other: F32x2) -> F32x2 { + self.to_f32x4().max(other.to_f32x4()).xy() + } + + #[inline] + pub fn clamp(self, min: F32x2, max: F32x2) -> F32x2 { + self.to_f32x4().clamp(min.to_f32x4(), max.to_f32x4()).xy() + } + + #[inline] + pub fn abs(self) -> F32x2 { + self.to_f32x4().abs().xy() + } + + #[inline] + pub fn floor(self) -> F32x2 { + self.to_f32x4().floor().xy() + } + + #[inline] + pub fn ceil(self) -> F32x2 { + self.to_f32x4().ceil().xy() + } + + #[inline] + pub fn round(self) -> F32x2 { + self.to_f32x4().round().xy() + } + + #[inline] + pub fn sqrt(self) -> F32x2 { + self.to_f32x4().sqrt().xy() + } + + // Packed comparisons + + #[inline] + pub fn packed_eq(self, other: F32x2) -> U32x2 { + self.to_f32x4().packed_eq(other.to_f32x4()).xy() + } + + #[inline] + pub fn packed_gt(self, other: F32x2) -> U32x2 { + self.to_f32x4().packed_gt(other.to_f32x4()).xy() + } + + #[inline] + pub fn packed_lt(self, other: F32x2) -> U32x2 { + self.to_f32x4().packed_lt(other.to_f32x4()).xy() + } + + #[inline] + pub fn packed_le(self, other: F32x2) -> U32x2 { + self.to_f32x4().packed_le(other.to_f32x4()).xy() + } + + // Conversions + + #[inline] + pub fn to_f32x4(self) -> F32x4 { + unsafe { F32x4(x86_64::_mm_castsi128_ps(x86_64::_mm_cvtsi64_si128(self.0 as i64))) } + } + + #[inline] + pub fn to_i32x2(self) -> I32x2 { + self.to_i32x4().xy() + } + + #[inline] + pub fn to_i32x4(self) -> I32x4 { + self.to_f32x4().to_i32x4() + } + + // Swizzle + + #[inline] + pub fn yx(self) -> F32x2 { + self.to_f32x4().yx() + } + + // Concatenations + + #[inline] + pub fn concat_xy_xy(self, other: F32x2) -> F32x4 { + self.to_f32x4().concat_xy_xy(other.to_f32x4()) + } +} + +impl Default for F32x2 { + #[inline] + fn default() -> F32x2 { + F32x2(0) + } +} + +impl Index for F32x2 { + type Output = f32; + #[inline] + fn index(&self, index: usize) -> &f32 { + unsafe { &mem::transmute::<&u64, &[f32; 2]>(&self.0)[index] } + } +} + +impl IndexMut for F32x2 { + #[inline] + fn index_mut(&mut self, index: usize) -> &mut f32 { + unsafe { &mut mem::transmute::<&mut u64, &mut [f32; 2]>(&mut self.0)[index] } + } +} + +impl Debug for F32x2 { + #[inline] + fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> { + write!(f, "<{}, {}>", self[0], self[1]) + } +} + +impl PartialEq for F32x2 { + #[inline] + fn eq(&self, other: &F32x2) -> bool { + self.packed_eq(*other).is_all_ones() + } +} + +impl Add for F32x2 { + type Output = F32x2; + #[inline] + fn add(self, other: F32x2) -> F32x2 { + (self.to_f32x4() + other.to_f32x4()).xy() + } +} + +impl Mul for F32x2 { + type Output = F32x2; + #[inline] + fn mul(self, other: F32x2) -> F32x2 { + (self.to_f32x4() * other.to_f32x4()).xy() + } +} + +impl Sub for F32x2 { + type Output = F32x2; + #[inline] + fn sub(self, other: F32x2) -> F32x2 { + (self.to_f32x4() - other.to_f32x4()).xy() + } +} + +// Four 32-bit floats #[derive(Clone, Copy)] pub struct F32x4(pub __m128); @@ -126,6 +309,33 @@ impl F32x4 { unsafe { I32x4(x86_64::_mm_cvtps_epi32(self.0)) } } + // Extraction + + #[inline] + pub fn xy(self) -> F32x2 { + unsafe { F32x2(x86_64::_mm_cvtsi128_si64(x86_64::_mm_castps_si128(self.0)) as u64) } + } + + #[inline] + pub fn xw(self) -> F32x2 { + unsafe { F32x2(x86_64::_mm_cvtsi128_si64(x86_64::_mm_castps_si128(self.xwyz().0)) as u64) } + } + + #[inline] + pub fn yx(self) -> F32x2 { + unsafe { F32x2(x86_64::_mm_cvtsi128_si64(x86_64::_mm_castps_si128(self.yxwz().0)) as u64) } + } + + #[inline] + pub fn zy(self) -> F32x2 { + unsafe { F32x2(x86_64::_mm_cvtsi128_si64(x86_64::_mm_castps_si128(self.zyxw().0)) as u64) } + } + + #[inline] + pub fn zw(self) -> F32x2 { + unsafe { F32x2(x86_64::_mm_cvtsi128_si64(x86_64::_mm_castps_si128(self.zwxy().0)) as u64) } + } + // Concatenations #[inline] @@ -224,7 +434,140 @@ impl Sub for F32x4 { } } -// 32-bit signed integers +// Two 32-bit signed integers + +#[derive(Clone, Copy)] +pub struct I32x2(pub u64); + +impl I32x2 { + // Constructors + + #[inline] + pub fn new(a: i32, b: i32) -> I32x2 { + unsafe { + let a = mem::transmute::<*const i32, *const u32>(&a); + let b = mem::transmute::<*const i32, *const u32>(&b); + I32x2((*a as u64) | ((*b as u64) << 32)) + } + } + + #[inline] + pub fn splat(x: i32) -> I32x2 { + I32x2::new(x, x) + } + + // Concatenations + + #[inline] + pub fn concat_xy_xy(self, other: I32x2) -> I32x4 { + self.to_i32x4().concat_xy_xy(other.to_i32x4()) + } + + // Conversions + + #[inline] + pub fn to_i32x4(self) -> I32x4 { + unsafe { I32x4(x86_64::_mm_cvtsi64_si128(self.0 as i64)) } + } + + #[inline] + pub fn to_f32x4(self) -> F32x4 { + self.to_i32x4().to_f32x4() + } + + /// Converts these packed integers to floats. + #[inline] + pub fn to_f32x2(self) -> F32x2 { + self.to_f32x4().xy() + } + + // Basic operations + + #[inline] + pub fn min(self, other: I32x2) -> I32x2 { + self.to_i32x4().min(other.to_i32x4()).xy() + } + + // Comparisons + + // TODO(pcwalton): Make a `U32x2` type and use that! + #[inline] + pub fn packed_eq(self, other: I32x2) -> U32x4 { + self.to_i32x4().packed_eq(other.to_i32x4()) + } + + #[inline] + pub fn packed_gt(self, other: I32x2) -> U32x4 { + self.to_i32x4().packed_gt(other.to_i32x4()) + } + + #[inline] + pub fn packed_le(self, other: I32x2) -> U32x4 { + self.to_i32x4().packed_le(other.to_i32x4()) + } +} + +impl Default for I32x2 { + #[inline] + fn default() -> I32x2 { + I32x2(0) + } +} + +impl Index for I32x2 { + type Output = i32; + #[inline] + fn index(&self, index: usize) -> &i32 { + unsafe { &mem::transmute::<&u64, &[i32; 2]>(&self.0)[index] } + } +} + +impl IndexMut for I32x2 { + #[inline] + fn index_mut(&mut self, index: usize) -> &mut i32 { + unsafe { &mut mem::transmute::<&mut u64, &mut [i32; 2]>(&mut self.0)[index] } + } +} + +impl Add for I32x2 { + type Output = I32x2; + #[inline] + fn add(self, other: I32x2) -> I32x2 { + (self.to_i32x4() + other.to_i32x4()).xy() + } +} + +impl Sub for I32x2 { + type Output = I32x2; + #[inline] + fn sub(self, other: I32x2) -> I32x2 { + (self.to_i32x4() - other.to_i32x4()).xy() + } +} + +impl Mul for I32x2 { + type Output = I32x2; + #[inline] + fn mul(self, other: I32x2) -> I32x2 { + (self.to_i32x4() * other.to_i32x4()).xy() + } +} + +impl Debug for I32x2 { + #[inline] + fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> { + write!(f, "<{}, {}>", self[0], self[1]) + } +} + +impl PartialEq for I32x2 { + #[inline] + fn eq(&self, other: &I32x2) -> bool { + self.packed_eq(*other).is_all_ones() + } +} + +// Four 32-bit signed integers #[derive(Clone, Copy)] pub struct I32x4(pub __m128i); @@ -245,6 +588,33 @@ impl I32x4 { unsafe { I32x4(x86_64::_mm_set1_epi32(x)) } } + // Extraction + + #[inline] + pub fn xy(self) -> I32x2 { + unsafe { I32x2(x86_64::_mm_cvtsi128_si64(self.0) as u64) } + } + + #[inline] + pub fn xw(self) -> I32x2 { + unsafe { I32x2(x86_64::_mm_cvtsi128_si64(self.xwyz().0) as u64) } + } + + #[inline] + pub fn yx(self) -> I32x2 { + unsafe { I32x2(x86_64::_mm_cvtsi128_si64(self.yxwz().0) as u64) } + } + + #[inline] + pub fn zy(self) -> I32x2 { + unsafe { I32x2(x86_64::_mm_cvtsi128_si64(self.zyxw().0) as u64) } + } + + #[inline] + pub fn zw(self) -> I32x2 { + unsafe { I32x2(x86_64::_mm_cvtsi128_si64(self.zwxy().0) as u64) } + } + // Concatenations #[inline] @@ -259,11 +629,6 @@ impl I32x4 { // Conversions - #[inline] - pub fn as_u8x16(self) -> U8x16 { - U8x16(self.0) - } - /// Converts these packed integers to floats. #[inline] pub fn to_f32x4(self) -> F32x4 { @@ -343,6 +708,30 @@ impl Mul for I32x4 { } } +impl BitAnd for I32x4 { + type Output = I32x4; + #[inline] + fn bitand(self, other: I32x4) -> I32x4 { + unsafe { I32x4(x86_64::_mm_and_si128(self.0, other.0)) } + } +} + +impl BitOr for I32x4 { + type Output = I32x4; + #[inline] + fn bitor(self, other: I32x4) -> I32x4 { + unsafe { I32x4(x86_64::_mm_or_si128(self.0, other.0)) } + } +} + +impl Shr for I32x4 { + type Output = I32x4; + #[inline] + fn shr(self, other: I32x4) -> I32x4 { + unsafe { I32x4(x86_64::_mm_srlv_epi32(self.0, other.0)) } + } +} + impl Debug for I32x4 { #[inline] fn fmt(&self, f: &mut Formatter) -> Result<(), fmt::Error> { @@ -357,7 +746,24 @@ impl PartialEq for I32x4 { } } -// 32-bit unsigned integers +// Two 32-bit unsigned integers + +#[derive(Clone, Copy)] +pub struct U32x2(pub u64); + +impl U32x2 { + #[inline] + pub fn is_all_ones(self) -> bool { + self.0 == !0 + } + + #[inline] + pub fn is_all_zeroes(self) -> bool { + self.0 == 0 + } +} + +// Four 32-bit unsigned integers #[derive(Clone, Copy)] pub struct U32x4(pub __m128i); @@ -390,6 +796,13 @@ impl U32x4 { unsafe { x86_64::_mm_test_all_zeros(self.0, self.0) != 0 } } + // Extraction + + #[inline] + pub fn xy(self) -> U32x2 { + unsafe { U32x2(x86_64::_mm_cvtsi128_si64(self.0) as u64) } + } + // Packed comparisons #[inline] @@ -435,20 +848,3 @@ impl BitXor for U32x4 { unsafe { U32x4(x86_64::_mm_xor_si128(self.0, other.0)) } } } - -// 8-bit unsigned integers - -#[derive(Clone, Copy)] -pub struct U8x16(pub __m128i); - -impl U8x16 { - #[inline] - pub fn as_i32x4(self) -> I32x4 { - I32x4(self.0) - } - - #[inline] - pub fn shuffle(self, indices: U8x16) -> U8x16 { - unsafe { U8x16(x86_64::_mm_shuffle_epi8(self.0, indices.0)) } - } -} diff --git a/svg/src/lib.rs b/svg/src/lib.rs index 2d9b6454..7347665b 100644 --- a/svg/src/lib.rs +++ b/svg/src/lib.rs @@ -318,7 +318,7 @@ where } UsvgPathSegment::LineTo { x, y } => { let to = Vector2F::new(x as f32, y as f32); - let mut segment = Segment::line(&LineSegment2F::new(self.last_subpath_point, to)); + let mut segment = Segment::line(LineSegment2F::new(self.last_subpath_point, to)); if self.just_moved { segment.flags.insert(SegmentFlags::FIRST_IN_SUBPATH); } @@ -338,8 +338,8 @@ where let ctrl1 = Vector2F::new(x2 as f32, y2 as f32); let to = Vector2F::new(x as f32, y as f32); let mut segment = Segment::cubic( - &LineSegment2F::new(self.last_subpath_point, to), - &LineSegment2F::new(ctrl0, ctrl1), + LineSegment2F::new(self.last_subpath_point, to), + LineSegment2F::new(ctrl0, ctrl1), ); if self.just_moved { segment.flags.insert(SegmentFlags::FIRST_IN_SUBPATH); @@ -349,7 +349,7 @@ where Some(segment) } UsvgPathSegment::ClosePath => { - let mut segment = Segment::line(&LineSegment2F::new( + let mut segment = Segment::line(LineSegment2F::new( self.last_subpath_point, self.first_subpath_point, )); diff --git a/swf/src/shapes.rs b/swf/src/shapes.rs index ff0a7b02..6dbe6a0e 100644 --- a/swf/src/shapes.rs +++ b/swf/src/shapes.rs @@ -93,13 +93,13 @@ impl Shape { } #[inline] - fn first(&self) -> &LineSegment { - &self.outline.first().unwrap() + fn first(&self) -> LineSegment { + self.outline.first().unwrap() } #[inline] - fn last(&self) -> &LineSegment { - &self.outline.last().unwrap() + fn last(&self) -> LineSegment { + self.outline.last().unwrap() } #[inline] diff --git a/ui/src/lib.rs b/ui/src/lib.rs index 7ae36ab3..377cb1ed 100644 --- a/ui/src/lib.rs +++ b/ui/src/lib.rs @@ -181,7 +181,7 @@ impl UIPresenter where D: Device { primitive, uniforms: &[ (&self.solid_program.framebuffer_size_uniform, - UniformData::Vec2(self.framebuffer_size.0.to_f32x4())), + UniformData::Vec2(self.framebuffer_size.0.to_f32x2())), (&self.solid_program.color_uniform, get_color_uniform(color)), ], textures: &[], @@ -414,11 +414,11 @@ impl UIPresenter where D: Device { textures: &[&texture], uniforms: &[ (&self.texture_program.framebuffer_size_uniform, - UniformData::Vec2(self.framebuffer_size.0.to_f32x4())), + UniformData::Vec2(self.framebuffer_size.0.to_f32x2())), (&self.texture_program.color_uniform, get_color_uniform(color)), (&self.texture_program.texture_uniform, UniformData::TextureUnit(0)), (&self.texture_program.texture_size_uniform, - UniformData::Vec2(device.texture_size(&texture).0.to_f32x4())) + UniformData::Vec2(device.texture_size(&texture).0.to_f32x2())) ], viewport: RectI::new(Vector2I::default(), self.framebuffer_size), options: RenderOptions {