From b198c0696822e509c410ef3741585fa91d831667 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Tue, 7 May 2019 11:22:24 -0700 Subject: [PATCH] Properly account for concurrency in the performance measurement --- demo/common/src/lib.rs | 9 ++- renderer/src/gpu/debug.rs | 90 +++++++++++++++++++------- renderer/src/gpu/renderer.rs | 119 ++++++++++++++++++++++++++++------- 3 files changed, 167 insertions(+), 51 deletions(-) diff --git a/demo/common/src/lib.rs b/demo/common/src/lib.rs index 6595a4c9..2b6b0b0a 100644 --- a/demo/common/src/lib.rs +++ b/demo/common/src/lib.rs @@ -26,7 +26,7 @@ use pathfinder_gl::GLDevice; use pathfinder_gpu::Device; use pathfinder_gpu::resources::ResourceLoader; use pathfinder_renderer::concurrent::scene_proxy::{RenderCommandStream, SceneProxy}; -use pathfinder_renderer::gpu::renderer::{DestFramebuffer, RenderStats, Renderer}; +use pathfinder_renderer::gpu::renderer::{DestFramebuffer, RenderStats, RenderTime, Renderer}; use pathfinder_renderer::options::{RenderOptions, RenderTransform}; use pathfinder_renderer::post::STEM_DARKENING_FACTORS; use pathfinder_renderer::scene::Scene; @@ -473,7 +473,7 @@ impl DemoApp where W: Window { fn update_stats(&mut self) { let frame = self.current_frame.as_mut().unwrap(); - if let Some(rendering_time) = self.renderer.shift_timer_query() { + if let Some(rendering_time) = self.renderer.shift_rendering_time() { frame.scene_rendering_times.push(rendering_time); } @@ -488,12 +488,11 @@ impl DemoApp where W: Window { let total_rendering_time = if frame.scene_rendering_times.is_empty() { None } else { - let zero = Duration::new(0, 0); Some( frame .scene_rendering_times .iter() - .fold(zero, |sum, item| sum + *item), + .fold(RenderTime::default(), |sum, item| sum + *item), ) }; @@ -747,7 +746,7 @@ fn emit_message( struct Frame { transform: RenderTransform, ui_events: Vec, - scene_rendering_times: Vec, + scene_rendering_times: Vec, scene_stats: Vec, } diff --git a/renderer/src/gpu/debug.rs b/renderer/src/gpu/debug.rs index 35e518e1..247e39e6 100644 --- a/renderer/src/gpu/debug.rs +++ b/renderer/src/gpu/debug.rs @@ -15,7 +15,7 @@ //! //! The debug font atlas was generated using: https://evanw.github.io/font-texture-generator/ -use crate::gpu::renderer::RenderStats; +use crate::gpu::renderer::{RenderStats, RenderTime}; use pathfinder_geometry::basic::point::Point2DI32; use pathfinder_geometry::basic::rect::RectI32; use pathfinder_gpu::resources::ResourceLoader; @@ -27,8 +27,11 @@ use std::time::Duration; const SAMPLE_BUFFER_SIZE: usize = 60; -const PERF_WINDOW_WIDTH: i32 = 375; -const PERF_WINDOW_HEIGHT: i32 = LINE_HEIGHT * 6 + PADDING + 2; +const STATS_WINDOW_WIDTH: i32 = 325; +const STATS_WINDOW_HEIGHT: i32 = LINE_HEIGHT * 4 + PADDING + 2; + +const PERFORMANCE_WINDOW_WIDTH: i32 = 400; +const PERFORMANCE_WINDOW_HEIGHT: i32 = LINE_HEIGHT * 4 + PADDING + 2; pub struct DebugUIPresenter where @@ -61,34 +64,37 @@ where &mut self, stats: RenderStats, tile_time: Duration, - rendering_time: Option, + rendering_time: Option, ) { self.cpu_samples.push(CPUSample { stats, elapsed: tile_time, }); - if let Some(rendering_time) = rendering_time { - self.gpu_samples.push(GPUSample { - elapsed: rendering_time, - }); + if let Some(time) = rendering_time { + self.gpu_samples.push(GPUSample { time }) } } pub fn draw(&self, device: &D) { - // Draw performance window. + let mean_cpu_sample = self.cpu_samples.mean(); + self.draw_stats_window(device, &mean_cpu_sample); + self.draw_performance_window(device, &mean_cpu_sample); + } + + fn draw_stats_window(&self, device: &D, mean_cpu_sample: &CPUSample) { let framebuffer_size = self.ui_presenter.framebuffer_size(); let bottom = framebuffer_size.y() - PADDING; let window_rect = RectI32::new( Point2DI32::new( - framebuffer_size.x() - PADDING - PERF_WINDOW_WIDTH, - bottom - PERF_WINDOW_HEIGHT, + framebuffer_size.x() - PADDING - STATS_WINDOW_WIDTH, + bottom - PERFORMANCE_WINDOW_HEIGHT - PADDING - STATS_WINDOW_HEIGHT, ), - Point2DI32::new(PERF_WINDOW_WIDTH, PERF_WINDOW_HEIGHT), + Point2DI32::new(STATS_WINDOW_WIDTH, STATS_WINDOW_HEIGHT), ); - self.ui_presenter.draw_solid_rounded_rect(device, window_rect, WINDOW_COLOR); - let origin = window_rect.origin() + Point2DI32::new(PADDING, PADDING + FONT_ASCENT); - let mean_cpu_sample = self.cpu_samples.mean(); + self.ui_presenter.draw_solid_rounded_rect(device, window_rect, WINDOW_COLOR); + + let origin = window_rect.origin() + Point2DI32::new(PADDING, PADDING + FONT_ASCENT); self.ui_presenter.draw_text( device, &format!("Objects: {}", mean_cpu_sample.stats.object_count), @@ -113,14 +119,29 @@ where origin + Point2DI32::new(0, LINE_HEIGHT * 3), false, ); + } + fn draw_performance_window(&self, device: &D, mean_cpu_sample: &CPUSample) { + let framebuffer_size = self.ui_presenter.framebuffer_size(); + let bottom = framebuffer_size.y() - PADDING; + let window_rect = RectI32::new( + Point2DI32::new( + framebuffer_size.x() - PADDING - PERFORMANCE_WINDOW_WIDTH, + bottom - PERFORMANCE_WINDOW_HEIGHT, + ), + Point2DI32::new(PERFORMANCE_WINDOW_WIDTH, PERFORMANCE_WINDOW_HEIGHT), + ); + + self.ui_presenter.draw_solid_rounded_rect(device, window_rect, WINDOW_COLOR); + + let origin = window_rect.origin() + Point2DI32::new(PADDING, PADDING + FONT_ASCENT); self.ui_presenter.draw_text( device, &format!( - "CPU Time: {:.3} ms", + "Stage 0 CPU: {:.3} ms", duration_to_ms(mean_cpu_sample.elapsed) ), - origin + Point2DI32::new(0, LINE_HEIGHT * 4), + origin, false, ); @@ -128,13 +149,33 @@ where self.ui_presenter.draw_text( device, &format!( - "GPU Time: {:.3} ms", - duration_to_ms(mean_gpu_sample.elapsed) + "Stage 0 GPU: {:.3} ms", + duration_to_ms(mean_gpu_sample.time.stage_0) ), - origin + Point2DI32::new(0, LINE_HEIGHT * 5), + origin + Point2DI32::new(0, LINE_HEIGHT * 1), + false, + ); + self.ui_presenter.draw_text( + device, + &format!( + "Stage 1 GPU: {:.3} ms", + duration_to_ms(mean_gpu_sample.time.stage_1) + ), + origin + Point2DI32::new(0, LINE_HEIGHT * 2), + false, + ); + + let wallclock_time = f64::max(duration_to_ms(mean_gpu_sample.time.stage_0), + duration_to_ms(mean_cpu_sample.elapsed)) + + duration_to_ms(mean_gpu_sample.time.stage_1); + self.ui_presenter.draw_text( + device, + &format!("Wallclock: {:.3} ms", wallclock_time), + origin + Point2DI32::new(0, LINE_HEIGHT * 3), false, ); } + } struct SampleBuffer @@ -203,14 +244,14 @@ impl Div for CPUSample { #[derive(Clone, Default)] struct GPUSample { - elapsed: Duration, + time: RenderTime, } impl Add for GPUSample { type Output = GPUSample; fn add(self, other: GPUSample) -> GPUSample { GPUSample { - elapsed: self.elapsed + other.elapsed, + time: self.time + other.time, } } } @@ -219,7 +260,10 @@ impl Div for GPUSample { type Output = GPUSample; fn div(self, divisor: usize) -> GPUSample { GPUSample { - elapsed: self.elapsed / (divisor as u32), + time: RenderTime { + stage_0: self.time.stage_0 / (divisor as u32), + stage_1: self.time.stage_1 / (divisor as u32), + } } } } diff --git a/renderer/src/gpu/renderer.rs b/renderer/src/gpu/renderer.rs index 14686aec..bdc94011 100644 --- a/renderer/src/gpu/renderer.rs +++ b/renderer/src/gpu/renderer.rs @@ -90,8 +90,8 @@ where // Debug pub stats: RenderStats, - current_timer_query: Option, - pending_timer_queries: VecDeque, + current_timers: RenderTimers, + pending_timers: VecDeque>, free_timer_queries: Vec, pub debug_ui_presenter: DebugUIPresenter, @@ -209,8 +209,8 @@ where reprojection_vertex_array, stats: RenderStats::default(), - current_timer_query: None, - pending_timer_queries: VecDeque::new(), + current_timers: RenderTimers::new(), + pending_timers: VecDeque::new(), free_timer_queries: vec![], debug_ui_presenter, @@ -230,13 +230,6 @@ where pub fn begin_scene(&mut self) { self.init_postprocessing_framebuffer(); - let timer_query = self - .free_timer_queries - .pop() - .unwrap_or_else(|| self.device.create_timer_query()); - self.device.begin_timer_query(&timer_query); - self.current_timer_query = Some(timer_query); - self.mask_framebuffer_cleared = false; self.stats = RenderStats::default(); } @@ -251,7 +244,10 @@ where } RenderCommand::AddShaders(ref shaders) => self.upload_shaders(shaders), RenderCommand::AddFills(ref fills) => self.add_fills(fills), - RenderCommand::FlushFills => self.draw_buffered_fills(), + RenderCommand::FlushFills => { + self.begin_composite_timer_query(); + self.draw_buffered_fills(); + } RenderCommand::SolidTile(ref solid_tiles) => { let count = solid_tiles.len(); self.stats.solid_tile_count += count; @@ -273,9 +269,8 @@ where self.postprocess(); } - let timer_query = self.current_timer_query.take().unwrap(); - self.device.end_timer_query(&timer_query); - self.pending_timer_queries.push_back(timer_query); + self.end_composite_timer_query(); + self.pending_timers.push_back(mem::replace(&mut self.current_timers, RenderTimers::new())); } pub fn draw_debug_ui(&self) { @@ -283,15 +278,33 @@ where self.debug_ui_presenter.draw(&self.device); } - pub fn shift_timer_query(&mut self) -> Option { - let query = self.pending_timer_queries.front()?; - if !self.device.timer_query_is_available(&query) { - return None; + pub fn shift_rendering_time(&mut self) -> Option { + let timers = self.pending_timers.front()?; + + // Accumulate stage-0 time. + let mut total_stage_0_time = Duration::new(0, 0); + for timer_query in &timers.stage_0 { + if !self.device.timer_query_is_available(timer_query) { + return None; + } + total_stage_0_time += self.device.get_timer_query(timer_query); } - let query = self.pending_timer_queries.pop_front().unwrap(); - let result = self.device.get_timer_query(&query); - self.free_timer_queries.push(query); - Some(result) + + // Get stage-1 time. + let stage_1_time = { + let stage_1_timer_query = timers.stage_1.as_ref().unwrap(); + if !self.device.timer_query_is_available(&stage_1_timer_query) { + return None; + } + self.device.get_timer_query(stage_1_timer_query) + }; + + // Recycle all timer queries. + let timers = self.pending_timers.pop_front().unwrap(); + self.free_timer_queries.extend(timers.stage_0.into_iter()); + self.free_timer_queries.push(timers.stage_1.unwrap()); + + Some(RenderTime { stage_0: total_stage_0_time, stage_1: stage_1_time }) } #[inline] @@ -378,6 +391,9 @@ where return; } + let timer_query = self.allocate_timer_query(); + self.device.begin_timer_query(&timer_query); + self.stats.fill_count += fills.len(); while !fills.is_empty() { @@ -388,6 +404,9 @@ where self.draw_buffered_fills(); } } + + self.device.end_timer_query(&timer_query); + self.current_timers.stage_0.push(timer_query); } fn draw_buffered_fills(&mut self) { @@ -854,6 +873,24 @@ where } } } + + fn allocate_timer_query(&mut self) -> D::TimerQuery { + match self.free_timer_queries.pop() { + Some(query) => query, + None => self.device.create_timer_query(), + } + } + + fn begin_composite_timer_query(&mut self) { + let timer_query = self.allocate_timer_query(); + self.device.begin_timer_query(&timer_query); + self.current_timers.stage_1 = Some(timer_query); + } + + fn end_composite_timer_query(&mut self) { + let query = self.current_timers.stage_1.as_ref().expect("No stage 1 timer query yet?!"); + self.device.end_timer_query(&query); + } } struct FillVertexArray @@ -1537,3 +1574,39 @@ impl Div for RenderStats { } } } + +struct RenderTimers where D: Device { + stage_0: Vec, + stage_1: Option, +} + +impl RenderTimers where D: Device { + fn new() -> RenderTimers { + RenderTimers { stage_0: vec![], stage_1: None } + } +} + +#[derive(Clone, Copy, Debug)] +pub struct RenderTime { + pub stage_0: Duration, + pub stage_1: Duration, +} + +impl Default for RenderTime { + #[inline] + fn default() -> RenderTime { + RenderTime { stage_0: Duration::new(0, 0), stage_1: Duration::new(0, 0) } + } +} + +impl Add for RenderTime { + type Output = RenderTime; + + #[inline] + fn add(self, other: RenderTime) -> RenderTime { + RenderTime { + stage_0: self.stage_0 + other.stage_0, + stage_1: self.stage_1 + other.stage_1, + } + } +}