From 3e6fb1157e0b9b2dbdacb2f5e410e83dd219761c Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Wed, 9 Jan 2019 19:19:25 -0800 Subject: [PATCH] Optimize `add_fill` a bit more --- utils/tile-svg/src/main.rs | 111 +++++++++++++++++++++++++------------ 1 file changed, 75 insertions(+), 36 deletions(-) diff --git a/utils/tile-svg/src/main.rs b/utils/tile-svg/src/main.rs index f62ca83a..173ed7f4 100644 --- a/utils/tile-svg/src/main.rs +++ b/utils/tile-svg/src/main.rs @@ -25,7 +25,7 @@ use fixedbitset::FixedBitSet; use hashbrown::HashMap; use jemallocator; use lyon_geom::math::Transform; -use lyon_geom::{CubicBezierSegment, LineSegment, QuadraticBezierSegment}; +use lyon_geom::{CubicBezierSegment, QuadraticBezierSegment}; use lyon_path::PathEvent; use lyon_path::iterator::PathIter; use pathfinder_path_utils::stroke::{StrokeStyle, StrokeToFillIter}; @@ -56,7 +56,7 @@ static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc; const SCALE_FACTOR: f32 = 1.0; // TODO(pcwalton): Make this configurable. -const FLATTENING_TOLERANCE: f32 = 0.333; +const FLATTENING_TOLERANCE: f32 = 0.1; const HAIRLINE_STROKE_WIDTH: f32 = 0.5; @@ -441,39 +441,32 @@ impl Contour { } } - // TODO(pcwalton): Optimize this more with SIMD? fn segment_after(&self, point_index: u32) -> Segment { debug_assert!(self.point_is_endpoint(point_index)); - let mut flags = SegmentFlags::HAS_ENDPOINTS; - let from = self.position_of(point_index); - let mut ctrl0 = Point2DF32::default(); - let mut ctrl1 = Point2DF32::default(); - let to; + let mut segment = Segment::new(); + segment.flags |= SegmentFlags::HAS_ENDPOINTS; + segment.baseline.set_from(&self.position_of(point_index)); let point1_index = self.add_to_point_index(point_index, 1); if self.point_is_endpoint(point1_index) { - to = self.position_of(point1_index); + segment.baseline.set_to(&self.position_of(point1_index)); } else { - ctrl0 = self.position_of(point1_index); - flags |= SegmentFlags::HAS_CONTROL_POINT_0; + segment.ctrl.set_from(&self.position_of(point1_index)); + segment.flags |= SegmentFlags::HAS_CONTROL_POINT_0; let point2_index = self.add_to_point_index(point_index, 2); if self.point_is_endpoint(point2_index) { - to = self.position_of(point2_index); + segment.baseline.set_to(&self.position_of(point2_index)); } else { - ctrl1 = self.position_of(point2_index); - flags |= SegmentFlags::HAS_CONTROL_POINT_1; + segment.ctrl.set_to(&self.position_of(point2_index)); + segment.flags |= SegmentFlags::HAS_CONTROL_POINT_1; let point3_index = self.add_to_point_index(point_index, 3); - to = self.position_of(point3_index); + segment.baseline.set_to(&self.position_of(point3_index)); } } - let mut segment = Segment::new(); - segment.baseline = LineSegmentF32::new(&from, &to); - segment.ctrl = LineSegmentF32::new(&ctrl0, &ctrl1); - segment.flags = flags; segment } @@ -1409,16 +1402,28 @@ impl BuiltObject { // TODO(pcwalton): SIMD-ify `tile_x` and `tile_y`. fn add_fill(&mut self, segment: &LineSegmentF32, tile_x: i16, tile_y: i16) { - let tile_origin = Point2DF32::new((i32::from(tile_x) * TILE_WIDTH as i32) as f32, - (i32::from(tile_y) * TILE_HEIGHT as i32) as f32); + let (px, subpx); + unsafe { + let mut segment = Sse41::cvtps_epi32(Sse41::mul_ps(segment.0, Sse41::set1_ps(256.0))); + + let mut tile_origin = Sse41::setzero_epi32(); + tile_origin[0] = (tile_x as i32) * (TILE_WIDTH as i32) * 256; + tile_origin[1] = (tile_y as i32) * (TILE_HEIGHT as i32) * 256; + tile_origin = Sse41::shuffle_epi32(tile_origin, 0b0100_0100); + + segment = Sse41::sub_epi32(segment, tile_origin); + segment = Sse41::min_epi32(segment, Sse41::set1_epi32(0x0fff)); + + let mut shuffle_mask = Sse41::setzero_epi32(); + shuffle_mask[0] = 0x0c08_0400; + shuffle_mask[1] = 0x0d05_0901; + segment = Sse41::shuffle_epi8(segment, shuffle_mask); + + px = LineSegmentU4((segment[1] | (segment[1] >> 12)) as u16); + subpx = LineSegmentU8(segment[0] as u32); + } + let tile_index = self.tile_coords_to_index(tile_x, tile_y); - let mut segment = *segment - tile_origin; - - let (tile_min, tile_max) = (Point2DF32::default(), Point2DF32::splat(16.0 - 1.0 / 256.0)); - segment = segment.clamp(&tile_min, &tile_max); - - let px = segment.to_line_segment_u4(); - let subpx = segment.fract().scale(256.0).to_line_segment_u8(); /* // TODO(pcwalton): Cull degenerate fills again. @@ -1430,7 +1435,6 @@ impl BuiltObject { */ self.fills.push(FillObjectPrimitive { px, subpx, tile_x, tile_y }); - self.solid_tiles.set(tile_index as usize, false); } @@ -2073,6 +2077,7 @@ impl LineSegmentF32 { Sse41::setzero_pd()))) } } + fn to(&self) -> Point2DF32 { unsafe { Point2DF32(Sse41::castpd_ps(Sse41::unpackhi_pd(Sse41::castps_pd(self.0), @@ -2080,6 +2085,22 @@ impl LineSegmentF32 { } } + fn set_from(&mut self, point: &Point2DF32) { + unsafe { + let (mut this, point) = (Sse41::castps_pd(self.0), Sse41::castps_pd(point.0)); + this[0] = point[0]; + self.0 = Sse41::castpd_ps(this); + } + } + + fn set_to(&mut self, point: &Point2DF32) { + unsafe { + let (mut this, point) = (Sse41::castps_pd(self.0), Sse41::castps_pd(point.0)); + this[1] = point[0]; + self.0 = Sse41::castpd_ps(this); + } + } + #[allow(clippy::wrong_self_convention)] fn from_x(&self) -> f32 { self.0[0] } #[allow(clippy::wrong_self_convention)] @@ -2088,6 +2109,14 @@ impl LineSegmentF32 { fn to_x(&self) -> f32 { self.0[2] } fn to_y(&self) -> f32 { self.0[3] } + fn min(&self, max: &Point2DF32) -> LineSegmentF32 { + unsafe { + let max_max = Sse41::castpd_ps(Sse41::unpacklo_pd(Sse41::castps_pd(max.0), + Sse41::castps_pd(max.0))); + LineSegmentF32(Sse41::min_ps(max_max, self.0)) + } + } + fn clamp(&self, min: &Point2DF32, max: &Point2DF32) -> LineSegmentF32 { unsafe { let min_min = Sse41::castpd_ps(Sse41::unpacklo_pd(Sse41::castps_pd(min.0), @@ -2104,7 +2133,11 @@ impl LineSegmentF32 { } } - fn floor(&self) -> LineSegmentF32 { unsafe { LineSegmentF32(Sse41::fastfloor_ps(self.0)) } } + fn floor(&self) -> LineSegmentF32 { + unsafe { + LineSegmentF32(Sse41::fastfloor_ps(self.0)) + } + } fn fract(&self) -> LineSegmentF32 { unsafe { @@ -2131,7 +2164,7 @@ impl LineSegmentF32 { // Returns the upper segment first, followed by the lower segment. fn split_at_y(&self, y: f32) -> (LineSegmentF32, LineSegmentF32) { - let (min_part, max_part) = self.split((y - self.from_y()) / (self.to_y() - self.from_y())); + let (min_part, max_part) = self.split(self.solve_t_for_y(y)); if min_part.from_y() < max_part.from_y() { (min_part, max_part) } else { @@ -2156,14 +2189,16 @@ impl LineSegmentF32 { } } - // FIXME(pcwalton): Eliminate all uses of this! - fn as_lyon_line_segment(&self) -> LineSegment { - LineSegment { from: self.from().as_euclid(), to: self.to().as_euclid() } + fn solve_t_for_x(&self, x: f32) -> f32 { + (x - self.from_x()) / (self.to_x() - self.from_x()) + } + + fn solve_t_for_y(&self, y: f32) -> f32 { + (y - self.from_y()) / (self.to_y() - self.from_y()) } - // FIXME(pcwalton): Optimize this! fn solve_y_for_x(&self, x: f32) -> f32 { - self.as_lyon_line_segment().solve_y_for_x(x) + lerp(self.from_y(), self.to_y(), self.solve_t_for_x(x)) } fn reversed(&self) -> LineSegmentF32 { @@ -2264,6 +2299,10 @@ impl SimdExt for Sse41 { // Trivial utilities +fn lerp(a: f32, b: f32, t: f32) -> f32 { + a + (b - a) * t +} + fn alignup_i32(a: i32, b: i32) -> i32 { (a + b - 1) / b }