Properly account for concurrency in the performance measurement
This commit is contained in:
parent
15f63df844
commit
b198c06968
|
@ -26,7 +26,7 @@ use pathfinder_gl::GLDevice;
|
|||
use pathfinder_gpu::Device;
|
||||
use pathfinder_gpu::resources::ResourceLoader;
|
||||
use pathfinder_renderer::concurrent::scene_proxy::{RenderCommandStream, SceneProxy};
|
||||
use pathfinder_renderer::gpu::renderer::{DestFramebuffer, RenderStats, Renderer};
|
||||
use pathfinder_renderer::gpu::renderer::{DestFramebuffer, RenderStats, RenderTime, Renderer};
|
||||
use pathfinder_renderer::options::{RenderOptions, RenderTransform};
|
||||
use pathfinder_renderer::post::STEM_DARKENING_FACTORS;
|
||||
use pathfinder_renderer::scene::Scene;
|
||||
|
@ -473,7 +473,7 @@ impl<W> DemoApp<W> where W: Window {
|
|||
|
||||
fn update_stats(&mut self) {
|
||||
let frame = self.current_frame.as_mut().unwrap();
|
||||
if let Some(rendering_time) = self.renderer.shift_timer_query() {
|
||||
if let Some(rendering_time) = self.renderer.shift_rendering_time() {
|
||||
frame.scene_rendering_times.push(rendering_time);
|
||||
}
|
||||
|
||||
|
@ -488,12 +488,11 @@ impl<W> DemoApp<W> where W: Window {
|
|||
let total_rendering_time = if frame.scene_rendering_times.is_empty() {
|
||||
None
|
||||
} else {
|
||||
let zero = Duration::new(0, 0);
|
||||
Some(
|
||||
frame
|
||||
.scene_rendering_times
|
||||
.iter()
|
||||
.fold(zero, |sum, item| sum + *item),
|
||||
.fold(RenderTime::default(), |sum, item| sum + *item),
|
||||
)
|
||||
};
|
||||
|
||||
|
@ -747,7 +746,7 @@ fn emit_message<W>(
|
|||
struct Frame {
|
||||
transform: RenderTransform,
|
||||
ui_events: Vec<UIEvent>,
|
||||
scene_rendering_times: Vec<Duration>,
|
||||
scene_rendering_times: Vec<RenderTime>,
|
||||
scene_stats: Vec<RenderStats>,
|
||||
}
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
//!
|
||||
//! The debug font atlas was generated using: https://evanw.github.io/font-texture-generator/
|
||||
|
||||
use crate::gpu::renderer::RenderStats;
|
||||
use crate::gpu::renderer::{RenderStats, RenderTime};
|
||||
use pathfinder_geometry::basic::point::Point2DI32;
|
||||
use pathfinder_geometry::basic::rect::RectI32;
|
||||
use pathfinder_gpu::resources::ResourceLoader;
|
||||
|
@ -27,8 +27,11 @@ use std::time::Duration;
|
|||
|
||||
const SAMPLE_BUFFER_SIZE: usize = 60;
|
||||
|
||||
const PERF_WINDOW_WIDTH: i32 = 375;
|
||||
const PERF_WINDOW_HEIGHT: i32 = LINE_HEIGHT * 6 + PADDING + 2;
|
||||
const STATS_WINDOW_WIDTH: i32 = 325;
|
||||
const STATS_WINDOW_HEIGHT: i32 = LINE_HEIGHT * 4 + PADDING + 2;
|
||||
|
||||
const PERFORMANCE_WINDOW_WIDTH: i32 = 400;
|
||||
const PERFORMANCE_WINDOW_HEIGHT: i32 = LINE_HEIGHT * 4 + PADDING + 2;
|
||||
|
||||
pub struct DebugUIPresenter<D>
|
||||
where
|
||||
|
@ -61,34 +64,37 @@ where
|
|||
&mut self,
|
||||
stats: RenderStats,
|
||||
tile_time: Duration,
|
||||
rendering_time: Option<Duration>,
|
||||
rendering_time: Option<RenderTime>,
|
||||
) {
|
||||
self.cpu_samples.push(CPUSample {
|
||||
stats,
|
||||
elapsed: tile_time,
|
||||
});
|
||||
if let Some(rendering_time) = rendering_time {
|
||||
self.gpu_samples.push(GPUSample {
|
||||
elapsed: rendering_time,
|
||||
});
|
||||
if let Some(time) = rendering_time {
|
||||
self.gpu_samples.push(GPUSample { time })
|
||||
}
|
||||
}
|
||||
|
||||
pub fn draw(&self, device: &D) {
|
||||
// Draw performance window.
|
||||
let mean_cpu_sample = self.cpu_samples.mean();
|
||||
self.draw_stats_window(device, &mean_cpu_sample);
|
||||
self.draw_performance_window(device, &mean_cpu_sample);
|
||||
}
|
||||
|
||||
fn draw_stats_window(&self, device: &D, mean_cpu_sample: &CPUSample) {
|
||||
let framebuffer_size = self.ui_presenter.framebuffer_size();
|
||||
let bottom = framebuffer_size.y() - PADDING;
|
||||
let window_rect = RectI32::new(
|
||||
Point2DI32::new(
|
||||
framebuffer_size.x() - PADDING - PERF_WINDOW_WIDTH,
|
||||
bottom - PERF_WINDOW_HEIGHT,
|
||||
framebuffer_size.x() - PADDING - STATS_WINDOW_WIDTH,
|
||||
bottom - PERFORMANCE_WINDOW_HEIGHT - PADDING - STATS_WINDOW_HEIGHT,
|
||||
),
|
||||
Point2DI32::new(PERF_WINDOW_WIDTH, PERF_WINDOW_HEIGHT),
|
||||
Point2DI32::new(STATS_WINDOW_WIDTH, STATS_WINDOW_HEIGHT),
|
||||
);
|
||||
self.ui_presenter.draw_solid_rounded_rect(device, window_rect, WINDOW_COLOR);
|
||||
let origin = window_rect.origin() + Point2DI32::new(PADDING, PADDING + FONT_ASCENT);
|
||||
|
||||
let mean_cpu_sample = self.cpu_samples.mean();
|
||||
self.ui_presenter.draw_solid_rounded_rect(device, window_rect, WINDOW_COLOR);
|
||||
|
||||
let origin = window_rect.origin() + Point2DI32::new(PADDING, PADDING + FONT_ASCENT);
|
||||
self.ui_presenter.draw_text(
|
||||
device,
|
||||
&format!("Objects: {}", mean_cpu_sample.stats.object_count),
|
||||
|
@ -113,14 +119,29 @@ where
|
|||
origin + Point2DI32::new(0, LINE_HEIGHT * 3),
|
||||
false,
|
||||
);
|
||||
}
|
||||
|
||||
fn draw_performance_window(&self, device: &D, mean_cpu_sample: &CPUSample) {
|
||||
let framebuffer_size = self.ui_presenter.framebuffer_size();
|
||||
let bottom = framebuffer_size.y() - PADDING;
|
||||
let window_rect = RectI32::new(
|
||||
Point2DI32::new(
|
||||
framebuffer_size.x() - PADDING - PERFORMANCE_WINDOW_WIDTH,
|
||||
bottom - PERFORMANCE_WINDOW_HEIGHT,
|
||||
),
|
||||
Point2DI32::new(PERFORMANCE_WINDOW_WIDTH, PERFORMANCE_WINDOW_HEIGHT),
|
||||
);
|
||||
|
||||
self.ui_presenter.draw_solid_rounded_rect(device, window_rect, WINDOW_COLOR);
|
||||
|
||||
let origin = window_rect.origin() + Point2DI32::new(PADDING, PADDING + FONT_ASCENT);
|
||||
self.ui_presenter.draw_text(
|
||||
device,
|
||||
&format!(
|
||||
"CPU Time: {:.3} ms",
|
||||
"Stage 0 CPU: {:.3} ms",
|
||||
duration_to_ms(mean_cpu_sample.elapsed)
|
||||
),
|
||||
origin + Point2DI32::new(0, LINE_HEIGHT * 4),
|
||||
origin,
|
||||
false,
|
||||
);
|
||||
|
||||
|
@ -128,13 +149,33 @@ where
|
|||
self.ui_presenter.draw_text(
|
||||
device,
|
||||
&format!(
|
||||
"GPU Time: {:.3} ms",
|
||||
duration_to_ms(mean_gpu_sample.elapsed)
|
||||
"Stage 0 GPU: {:.3} ms",
|
||||
duration_to_ms(mean_gpu_sample.time.stage_0)
|
||||
),
|
||||
origin + Point2DI32::new(0, LINE_HEIGHT * 5),
|
||||
origin + Point2DI32::new(0, LINE_HEIGHT * 1),
|
||||
false,
|
||||
);
|
||||
self.ui_presenter.draw_text(
|
||||
device,
|
||||
&format!(
|
||||
"Stage 1 GPU: {:.3} ms",
|
||||
duration_to_ms(mean_gpu_sample.time.stage_1)
|
||||
),
|
||||
origin + Point2DI32::new(0, LINE_HEIGHT * 2),
|
||||
false,
|
||||
);
|
||||
|
||||
let wallclock_time = f64::max(duration_to_ms(mean_gpu_sample.time.stage_0),
|
||||
duration_to_ms(mean_cpu_sample.elapsed)) +
|
||||
duration_to_ms(mean_gpu_sample.time.stage_1);
|
||||
self.ui_presenter.draw_text(
|
||||
device,
|
||||
&format!("Wallclock: {:.3} ms", wallclock_time),
|
||||
origin + Point2DI32::new(0, LINE_HEIGHT * 3),
|
||||
false,
|
||||
);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
struct SampleBuffer<S>
|
||||
|
@ -203,14 +244,14 @@ impl Div<usize> for CPUSample {
|
|||
|
||||
#[derive(Clone, Default)]
|
||||
struct GPUSample {
|
||||
elapsed: Duration,
|
||||
time: RenderTime,
|
||||
}
|
||||
|
||||
impl Add<GPUSample> for GPUSample {
|
||||
type Output = GPUSample;
|
||||
fn add(self, other: GPUSample) -> GPUSample {
|
||||
GPUSample {
|
||||
elapsed: self.elapsed + other.elapsed,
|
||||
time: self.time + other.time,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -219,7 +260,10 @@ impl Div<usize> for GPUSample {
|
|||
type Output = GPUSample;
|
||||
fn div(self, divisor: usize) -> GPUSample {
|
||||
GPUSample {
|
||||
elapsed: self.elapsed / (divisor as u32),
|
||||
time: RenderTime {
|
||||
stage_0: self.time.stage_0 / (divisor as u32),
|
||||
stage_1: self.time.stage_1 / (divisor as u32),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -90,8 +90,8 @@ where
|
|||
|
||||
// Debug
|
||||
pub stats: RenderStats,
|
||||
current_timer_query: Option<D::TimerQuery>,
|
||||
pending_timer_queries: VecDeque<D::TimerQuery>,
|
||||
current_timers: RenderTimers<D>,
|
||||
pending_timers: VecDeque<RenderTimers<D>>,
|
||||
free_timer_queries: Vec<D::TimerQuery>,
|
||||
pub debug_ui_presenter: DebugUIPresenter<D>,
|
||||
|
||||
|
@ -209,8 +209,8 @@ where
|
|||
reprojection_vertex_array,
|
||||
|
||||
stats: RenderStats::default(),
|
||||
current_timer_query: None,
|
||||
pending_timer_queries: VecDeque::new(),
|
||||
current_timers: RenderTimers::new(),
|
||||
pending_timers: VecDeque::new(),
|
||||
free_timer_queries: vec![],
|
||||
debug_ui_presenter,
|
||||
|
||||
|
@ -230,13 +230,6 @@ where
|
|||
pub fn begin_scene(&mut self) {
|
||||
self.init_postprocessing_framebuffer();
|
||||
|
||||
let timer_query = self
|
||||
.free_timer_queries
|
||||
.pop()
|
||||
.unwrap_or_else(|| self.device.create_timer_query());
|
||||
self.device.begin_timer_query(&timer_query);
|
||||
self.current_timer_query = Some(timer_query);
|
||||
|
||||
self.mask_framebuffer_cleared = false;
|
||||
self.stats = RenderStats::default();
|
||||
}
|
||||
|
@ -251,7 +244,10 @@ where
|
|||
}
|
||||
RenderCommand::AddShaders(ref shaders) => self.upload_shaders(shaders),
|
||||
RenderCommand::AddFills(ref fills) => self.add_fills(fills),
|
||||
RenderCommand::FlushFills => self.draw_buffered_fills(),
|
||||
RenderCommand::FlushFills => {
|
||||
self.begin_composite_timer_query();
|
||||
self.draw_buffered_fills();
|
||||
}
|
||||
RenderCommand::SolidTile(ref solid_tiles) => {
|
||||
let count = solid_tiles.len();
|
||||
self.stats.solid_tile_count += count;
|
||||
|
@ -273,9 +269,8 @@ where
|
|||
self.postprocess();
|
||||
}
|
||||
|
||||
let timer_query = self.current_timer_query.take().unwrap();
|
||||
self.device.end_timer_query(&timer_query);
|
||||
self.pending_timer_queries.push_back(timer_query);
|
||||
self.end_composite_timer_query();
|
||||
self.pending_timers.push_back(mem::replace(&mut self.current_timers, RenderTimers::new()));
|
||||
}
|
||||
|
||||
pub fn draw_debug_ui(&self) {
|
||||
|
@ -283,15 +278,33 @@ where
|
|||
self.debug_ui_presenter.draw(&self.device);
|
||||
}
|
||||
|
||||
pub fn shift_timer_query(&mut self) -> Option<Duration> {
|
||||
let query = self.pending_timer_queries.front()?;
|
||||
if !self.device.timer_query_is_available(&query) {
|
||||
pub fn shift_rendering_time(&mut self) -> Option<RenderTime> {
|
||||
let timers = self.pending_timers.front()?;
|
||||
|
||||
// Accumulate stage-0 time.
|
||||
let mut total_stage_0_time = Duration::new(0, 0);
|
||||
for timer_query in &timers.stage_0 {
|
||||
if !self.device.timer_query_is_available(timer_query) {
|
||||
return None;
|
||||
}
|
||||
let query = self.pending_timer_queries.pop_front().unwrap();
|
||||
let result = self.device.get_timer_query(&query);
|
||||
self.free_timer_queries.push(query);
|
||||
Some(result)
|
||||
total_stage_0_time += self.device.get_timer_query(timer_query);
|
||||
}
|
||||
|
||||
// Get stage-1 time.
|
||||
let stage_1_time = {
|
||||
let stage_1_timer_query = timers.stage_1.as_ref().unwrap();
|
||||
if !self.device.timer_query_is_available(&stage_1_timer_query) {
|
||||
return None;
|
||||
}
|
||||
self.device.get_timer_query(stage_1_timer_query)
|
||||
};
|
||||
|
||||
// Recycle all timer queries.
|
||||
let timers = self.pending_timers.pop_front().unwrap();
|
||||
self.free_timer_queries.extend(timers.stage_0.into_iter());
|
||||
self.free_timer_queries.push(timers.stage_1.unwrap());
|
||||
|
||||
Some(RenderTime { stage_0: total_stage_0_time, stage_1: stage_1_time })
|
||||
}
|
||||
|
||||
#[inline]
|
||||
|
@ -378,6 +391,9 @@ where
|
|||
return;
|
||||
}
|
||||
|
||||
let timer_query = self.allocate_timer_query();
|
||||
self.device.begin_timer_query(&timer_query);
|
||||
|
||||
self.stats.fill_count += fills.len();
|
||||
|
||||
while !fills.is_empty() {
|
||||
|
@ -388,6 +404,9 @@ where
|
|||
self.draw_buffered_fills();
|
||||
}
|
||||
}
|
||||
|
||||
self.device.end_timer_query(&timer_query);
|
||||
self.current_timers.stage_0.push(timer_query);
|
||||
}
|
||||
|
||||
fn draw_buffered_fills(&mut self) {
|
||||
|
@ -854,6 +873,24 @@ where
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn allocate_timer_query(&mut self) -> D::TimerQuery {
|
||||
match self.free_timer_queries.pop() {
|
||||
Some(query) => query,
|
||||
None => self.device.create_timer_query(),
|
||||
}
|
||||
}
|
||||
|
||||
fn begin_composite_timer_query(&mut self) {
|
||||
let timer_query = self.allocate_timer_query();
|
||||
self.device.begin_timer_query(&timer_query);
|
||||
self.current_timers.stage_1 = Some(timer_query);
|
||||
}
|
||||
|
||||
fn end_composite_timer_query(&mut self) {
|
||||
let query = self.current_timers.stage_1.as_ref().expect("No stage 1 timer query yet?!");
|
||||
self.device.end_timer_query(&query);
|
||||
}
|
||||
}
|
||||
|
||||
struct FillVertexArray<D>
|
||||
|
@ -1537,3 +1574,39 @@ impl Div<usize> for RenderStats {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
struct RenderTimers<D> where D: Device {
|
||||
stage_0: Vec<D::TimerQuery>,
|
||||
stage_1: Option<D::TimerQuery>,
|
||||
}
|
||||
|
||||
impl<D> RenderTimers<D> where D: Device {
|
||||
fn new() -> RenderTimers<D> {
|
||||
RenderTimers { stage_0: vec![], stage_1: None }
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct RenderTime {
|
||||
pub stage_0: Duration,
|
||||
pub stage_1: Duration,
|
||||
}
|
||||
|
||||
impl Default for RenderTime {
|
||||
#[inline]
|
||||
fn default() -> RenderTime {
|
||||
RenderTime { stage_0: Duration::new(0, 0), stage_1: Duration::new(0, 0) }
|
||||
}
|
||||
}
|
||||
|
||||
impl Add<RenderTime> for RenderTime {
|
||||
type Output = RenderTime;
|
||||
|
||||
#[inline]
|
||||
fn add(self, other: RenderTime) -> RenderTime {
|
||||
RenderTime {
|
||||
stage_0: self.stage_0 + other.stage_0,
|
||||
stage_1: self.stage_1 + other.stage_1,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue