Properly account for concurrency in the performance measurement

This commit is contained in:
Patrick Walton 2019-05-07 11:22:24 -07:00
parent 15f63df844
commit b198c06968
3 changed files with 167 additions and 51 deletions

View File

@ -26,7 +26,7 @@ use pathfinder_gl::GLDevice;
use pathfinder_gpu::Device; use pathfinder_gpu::Device;
use pathfinder_gpu::resources::ResourceLoader; use pathfinder_gpu::resources::ResourceLoader;
use pathfinder_renderer::concurrent::scene_proxy::{RenderCommandStream, SceneProxy}; use pathfinder_renderer::concurrent::scene_proxy::{RenderCommandStream, SceneProxy};
use pathfinder_renderer::gpu::renderer::{DestFramebuffer, RenderStats, Renderer}; use pathfinder_renderer::gpu::renderer::{DestFramebuffer, RenderStats, RenderTime, Renderer};
use pathfinder_renderer::options::{RenderOptions, RenderTransform}; use pathfinder_renderer::options::{RenderOptions, RenderTransform};
use pathfinder_renderer::post::STEM_DARKENING_FACTORS; use pathfinder_renderer::post::STEM_DARKENING_FACTORS;
use pathfinder_renderer::scene::Scene; use pathfinder_renderer::scene::Scene;
@ -473,7 +473,7 @@ impl<W> DemoApp<W> where W: Window {
fn update_stats(&mut self) { fn update_stats(&mut self) {
let frame = self.current_frame.as_mut().unwrap(); let frame = self.current_frame.as_mut().unwrap();
if let Some(rendering_time) = self.renderer.shift_timer_query() { if let Some(rendering_time) = self.renderer.shift_rendering_time() {
frame.scene_rendering_times.push(rendering_time); frame.scene_rendering_times.push(rendering_time);
} }
@ -488,12 +488,11 @@ impl<W> DemoApp<W> where W: Window {
let total_rendering_time = if frame.scene_rendering_times.is_empty() { let total_rendering_time = if frame.scene_rendering_times.is_empty() {
None None
} else { } else {
let zero = Duration::new(0, 0);
Some( Some(
frame frame
.scene_rendering_times .scene_rendering_times
.iter() .iter()
.fold(zero, |sum, item| sum + *item), .fold(RenderTime::default(), |sum, item| sum + *item),
) )
}; };
@ -747,7 +746,7 @@ fn emit_message<W>(
struct Frame { struct Frame {
transform: RenderTransform, transform: RenderTransform,
ui_events: Vec<UIEvent>, ui_events: Vec<UIEvent>,
scene_rendering_times: Vec<Duration>, scene_rendering_times: Vec<RenderTime>,
scene_stats: Vec<RenderStats>, scene_stats: Vec<RenderStats>,
} }

View File

@ -15,7 +15,7 @@
//! //!
//! The debug font atlas was generated using: https://evanw.github.io/font-texture-generator/ //! The debug font atlas was generated using: https://evanw.github.io/font-texture-generator/
use crate::gpu::renderer::RenderStats; use crate::gpu::renderer::{RenderStats, RenderTime};
use pathfinder_geometry::basic::point::Point2DI32; use pathfinder_geometry::basic::point::Point2DI32;
use pathfinder_geometry::basic::rect::RectI32; use pathfinder_geometry::basic::rect::RectI32;
use pathfinder_gpu::resources::ResourceLoader; use pathfinder_gpu::resources::ResourceLoader;
@ -27,8 +27,11 @@ use std::time::Duration;
const SAMPLE_BUFFER_SIZE: usize = 60; const SAMPLE_BUFFER_SIZE: usize = 60;
const PERF_WINDOW_WIDTH: i32 = 375; const STATS_WINDOW_WIDTH: i32 = 325;
const PERF_WINDOW_HEIGHT: i32 = LINE_HEIGHT * 6 + PADDING + 2; const STATS_WINDOW_HEIGHT: i32 = LINE_HEIGHT * 4 + PADDING + 2;
const PERFORMANCE_WINDOW_WIDTH: i32 = 400;
const PERFORMANCE_WINDOW_HEIGHT: i32 = LINE_HEIGHT * 4 + PADDING + 2;
pub struct DebugUIPresenter<D> pub struct DebugUIPresenter<D>
where where
@ -61,34 +64,37 @@ where
&mut self, &mut self,
stats: RenderStats, stats: RenderStats,
tile_time: Duration, tile_time: Duration,
rendering_time: Option<Duration>, rendering_time: Option<RenderTime>,
) { ) {
self.cpu_samples.push(CPUSample { self.cpu_samples.push(CPUSample {
stats, stats,
elapsed: tile_time, elapsed: tile_time,
}); });
if let Some(rendering_time) = rendering_time { if let Some(time) = rendering_time {
self.gpu_samples.push(GPUSample { self.gpu_samples.push(GPUSample { time })
elapsed: rendering_time,
});
} }
} }
pub fn draw(&self, device: &D) { pub fn draw(&self, device: &D) {
// Draw performance window. let mean_cpu_sample = self.cpu_samples.mean();
self.draw_stats_window(device, &mean_cpu_sample);
self.draw_performance_window(device, &mean_cpu_sample);
}
fn draw_stats_window(&self, device: &D, mean_cpu_sample: &CPUSample) {
let framebuffer_size = self.ui_presenter.framebuffer_size(); let framebuffer_size = self.ui_presenter.framebuffer_size();
let bottom = framebuffer_size.y() - PADDING; let bottom = framebuffer_size.y() - PADDING;
let window_rect = RectI32::new( let window_rect = RectI32::new(
Point2DI32::new( Point2DI32::new(
framebuffer_size.x() - PADDING - PERF_WINDOW_WIDTH, framebuffer_size.x() - PADDING - STATS_WINDOW_WIDTH,
bottom - PERF_WINDOW_HEIGHT, bottom - PERFORMANCE_WINDOW_HEIGHT - PADDING - STATS_WINDOW_HEIGHT,
), ),
Point2DI32::new(PERF_WINDOW_WIDTH, PERF_WINDOW_HEIGHT), Point2DI32::new(STATS_WINDOW_WIDTH, STATS_WINDOW_HEIGHT),
); );
self.ui_presenter.draw_solid_rounded_rect(device, window_rect, WINDOW_COLOR);
let origin = window_rect.origin() + Point2DI32::new(PADDING, PADDING + FONT_ASCENT);
let mean_cpu_sample = self.cpu_samples.mean(); self.ui_presenter.draw_solid_rounded_rect(device, window_rect, WINDOW_COLOR);
let origin = window_rect.origin() + Point2DI32::new(PADDING, PADDING + FONT_ASCENT);
self.ui_presenter.draw_text( self.ui_presenter.draw_text(
device, device,
&format!("Objects: {}", mean_cpu_sample.stats.object_count), &format!("Objects: {}", mean_cpu_sample.stats.object_count),
@ -113,14 +119,29 @@ where
origin + Point2DI32::new(0, LINE_HEIGHT * 3), origin + Point2DI32::new(0, LINE_HEIGHT * 3),
false, false,
); );
}
fn draw_performance_window(&self, device: &D, mean_cpu_sample: &CPUSample) {
let framebuffer_size = self.ui_presenter.framebuffer_size();
let bottom = framebuffer_size.y() - PADDING;
let window_rect = RectI32::new(
Point2DI32::new(
framebuffer_size.x() - PADDING - PERFORMANCE_WINDOW_WIDTH,
bottom - PERFORMANCE_WINDOW_HEIGHT,
),
Point2DI32::new(PERFORMANCE_WINDOW_WIDTH, PERFORMANCE_WINDOW_HEIGHT),
);
self.ui_presenter.draw_solid_rounded_rect(device, window_rect, WINDOW_COLOR);
let origin = window_rect.origin() + Point2DI32::new(PADDING, PADDING + FONT_ASCENT);
self.ui_presenter.draw_text( self.ui_presenter.draw_text(
device, device,
&format!( &format!(
"CPU Time: {:.3} ms", "Stage 0 CPU: {:.3} ms",
duration_to_ms(mean_cpu_sample.elapsed) duration_to_ms(mean_cpu_sample.elapsed)
), ),
origin + Point2DI32::new(0, LINE_HEIGHT * 4), origin,
false, false,
); );
@ -128,13 +149,33 @@ where
self.ui_presenter.draw_text( self.ui_presenter.draw_text(
device, device,
&format!( &format!(
"GPU Time: {:.3} ms", "Stage 0 GPU: {:.3} ms",
duration_to_ms(mean_gpu_sample.elapsed) duration_to_ms(mean_gpu_sample.time.stage_0)
), ),
origin + Point2DI32::new(0, LINE_HEIGHT * 5), origin + Point2DI32::new(0, LINE_HEIGHT * 1),
false,
);
self.ui_presenter.draw_text(
device,
&format!(
"Stage 1 GPU: {:.3} ms",
duration_to_ms(mean_gpu_sample.time.stage_1)
),
origin + Point2DI32::new(0, LINE_HEIGHT * 2),
false,
);
let wallclock_time = f64::max(duration_to_ms(mean_gpu_sample.time.stage_0),
duration_to_ms(mean_cpu_sample.elapsed)) +
duration_to_ms(mean_gpu_sample.time.stage_1);
self.ui_presenter.draw_text(
device,
&format!("Wallclock: {:.3} ms", wallclock_time),
origin + Point2DI32::new(0, LINE_HEIGHT * 3),
false, false,
); );
} }
} }
struct SampleBuffer<S> struct SampleBuffer<S>
@ -203,14 +244,14 @@ impl Div<usize> for CPUSample {
#[derive(Clone, Default)] #[derive(Clone, Default)]
struct GPUSample { struct GPUSample {
elapsed: Duration, time: RenderTime,
} }
impl Add<GPUSample> for GPUSample { impl Add<GPUSample> for GPUSample {
type Output = GPUSample; type Output = GPUSample;
fn add(self, other: GPUSample) -> GPUSample { fn add(self, other: GPUSample) -> GPUSample {
GPUSample { GPUSample {
elapsed: self.elapsed + other.elapsed, time: self.time + other.time,
} }
} }
} }
@ -219,7 +260,10 @@ impl Div<usize> for GPUSample {
type Output = GPUSample; type Output = GPUSample;
fn div(self, divisor: usize) -> GPUSample { fn div(self, divisor: usize) -> GPUSample {
GPUSample { GPUSample {
elapsed: self.elapsed / (divisor as u32), time: RenderTime {
stage_0: self.time.stage_0 / (divisor as u32),
stage_1: self.time.stage_1 / (divisor as u32),
}
} }
} }
} }

View File

@ -90,8 +90,8 @@ where
// Debug // Debug
pub stats: RenderStats, pub stats: RenderStats,
current_timer_query: Option<D::TimerQuery>, current_timers: RenderTimers<D>,
pending_timer_queries: VecDeque<D::TimerQuery>, pending_timers: VecDeque<RenderTimers<D>>,
free_timer_queries: Vec<D::TimerQuery>, free_timer_queries: Vec<D::TimerQuery>,
pub debug_ui_presenter: DebugUIPresenter<D>, pub debug_ui_presenter: DebugUIPresenter<D>,
@ -209,8 +209,8 @@ where
reprojection_vertex_array, reprojection_vertex_array,
stats: RenderStats::default(), stats: RenderStats::default(),
current_timer_query: None, current_timers: RenderTimers::new(),
pending_timer_queries: VecDeque::new(), pending_timers: VecDeque::new(),
free_timer_queries: vec![], free_timer_queries: vec![],
debug_ui_presenter, debug_ui_presenter,
@ -230,13 +230,6 @@ where
pub fn begin_scene(&mut self) { pub fn begin_scene(&mut self) {
self.init_postprocessing_framebuffer(); self.init_postprocessing_framebuffer();
let timer_query = self
.free_timer_queries
.pop()
.unwrap_or_else(|| self.device.create_timer_query());
self.device.begin_timer_query(&timer_query);
self.current_timer_query = Some(timer_query);
self.mask_framebuffer_cleared = false; self.mask_framebuffer_cleared = false;
self.stats = RenderStats::default(); self.stats = RenderStats::default();
} }
@ -251,7 +244,10 @@ where
} }
RenderCommand::AddShaders(ref shaders) => self.upload_shaders(shaders), RenderCommand::AddShaders(ref shaders) => self.upload_shaders(shaders),
RenderCommand::AddFills(ref fills) => self.add_fills(fills), RenderCommand::AddFills(ref fills) => self.add_fills(fills),
RenderCommand::FlushFills => self.draw_buffered_fills(), RenderCommand::FlushFills => {
self.begin_composite_timer_query();
self.draw_buffered_fills();
}
RenderCommand::SolidTile(ref solid_tiles) => { RenderCommand::SolidTile(ref solid_tiles) => {
let count = solid_tiles.len(); let count = solid_tiles.len();
self.stats.solid_tile_count += count; self.stats.solid_tile_count += count;
@ -273,9 +269,8 @@ where
self.postprocess(); self.postprocess();
} }
let timer_query = self.current_timer_query.take().unwrap(); self.end_composite_timer_query();
self.device.end_timer_query(&timer_query); self.pending_timers.push_back(mem::replace(&mut self.current_timers, RenderTimers::new()));
self.pending_timer_queries.push_back(timer_query);
} }
pub fn draw_debug_ui(&self) { pub fn draw_debug_ui(&self) {
@ -283,15 +278,33 @@ where
self.debug_ui_presenter.draw(&self.device); self.debug_ui_presenter.draw(&self.device);
} }
pub fn shift_timer_query(&mut self) -> Option<Duration> { pub fn shift_rendering_time(&mut self) -> Option<RenderTime> {
let query = self.pending_timer_queries.front()?; let timers = self.pending_timers.front()?;
if !self.device.timer_query_is_available(&query) {
// Accumulate stage-0 time.
let mut total_stage_0_time = Duration::new(0, 0);
for timer_query in &timers.stage_0 {
if !self.device.timer_query_is_available(timer_query) {
return None; return None;
} }
let query = self.pending_timer_queries.pop_front().unwrap(); total_stage_0_time += self.device.get_timer_query(timer_query);
let result = self.device.get_timer_query(&query); }
self.free_timer_queries.push(query);
Some(result) // Get stage-1 time.
let stage_1_time = {
let stage_1_timer_query = timers.stage_1.as_ref().unwrap();
if !self.device.timer_query_is_available(&stage_1_timer_query) {
return None;
}
self.device.get_timer_query(stage_1_timer_query)
};
// Recycle all timer queries.
let timers = self.pending_timers.pop_front().unwrap();
self.free_timer_queries.extend(timers.stage_0.into_iter());
self.free_timer_queries.push(timers.stage_1.unwrap());
Some(RenderTime { stage_0: total_stage_0_time, stage_1: stage_1_time })
} }
#[inline] #[inline]
@ -378,6 +391,9 @@ where
return; return;
} }
let timer_query = self.allocate_timer_query();
self.device.begin_timer_query(&timer_query);
self.stats.fill_count += fills.len(); self.stats.fill_count += fills.len();
while !fills.is_empty() { while !fills.is_empty() {
@ -388,6 +404,9 @@ where
self.draw_buffered_fills(); self.draw_buffered_fills();
} }
} }
self.device.end_timer_query(&timer_query);
self.current_timers.stage_0.push(timer_query);
} }
fn draw_buffered_fills(&mut self) { fn draw_buffered_fills(&mut self) {
@ -854,6 +873,24 @@ where
} }
} }
} }
fn allocate_timer_query(&mut self) -> D::TimerQuery {
match self.free_timer_queries.pop() {
Some(query) => query,
None => self.device.create_timer_query(),
}
}
fn begin_composite_timer_query(&mut self) {
let timer_query = self.allocate_timer_query();
self.device.begin_timer_query(&timer_query);
self.current_timers.stage_1 = Some(timer_query);
}
fn end_composite_timer_query(&mut self) {
let query = self.current_timers.stage_1.as_ref().expect("No stage 1 timer query yet?!");
self.device.end_timer_query(&query);
}
} }
struct FillVertexArray<D> struct FillVertexArray<D>
@ -1537,3 +1574,39 @@ impl Div<usize> for RenderStats {
} }
} }
} }
struct RenderTimers<D> where D: Device {
stage_0: Vec<D::TimerQuery>,
stage_1: Option<D::TimerQuery>,
}
impl<D> RenderTimers<D> where D: Device {
fn new() -> RenderTimers<D> {
RenderTimers { stage_0: vec![], stage_1: None }
}
}
#[derive(Clone, Copy, Debug)]
pub struct RenderTime {
pub stage_0: Duration,
pub stage_1: Duration,
}
impl Default for RenderTime {
#[inline]
fn default() -> RenderTime {
RenderTime { stage_0: Duration::new(0, 0), stage_1: Duration::new(0, 0) }
}
}
impl Add<RenderTime> for RenderTime {
type Output = RenderTime;
#[inline]
fn add(self, other: RenderTime) -> RenderTime {
RenderTime {
stage_0: self.stage_0 + other.stage_0,
stage_1: self.stage_1 + other.stage_1,
}
}
}