Turn off timer queries if the debug UI is off.

Closes #360.
This commit is contained in:
Patrick Walton 2020-06-25 11:06:17 -07:00
parent e21d79bf0f
commit e16f0a045e
4 changed files with 97 additions and 48 deletions

View File

@ -10,7 +10,7 @@
use crate::gpu::d3d11::shaders::{BOUND_WORKGROUP_SIZE, DICE_WORKGROUP_SIZE};
use crate::gpu::d3d11::shaders::{PROPAGATE_WORKGROUP_SIZE, ProgramsD3D11, SORT_WORKGROUP_SIZE};
use crate::gpu::perf::TimerFuture;
use crate::gpu::perf::TimeCategory;
use crate::gpu::renderer::{FramebufferFlags, RendererCore};
use crate::gpu_data::{AlphaTileD3D11, BackdropInfoD3D11, DiceMetadataD3D11, DrawTileBatchD3D11};
use crate::gpu_data::{Fill, FirstTileD3D11, MicrolineD3D11, PathSource, PropagateMetadataD3D11};
@ -78,8 +78,8 @@ impl<D> RendererD3D11<D> where D: Device {
let tiles_buffer = core.allocator.get_buffer(tiles_d3d11_buffer_id);
let timer_query = core.timer_query_cache.alloc(&core.device);
core.device.begin_timer_query(&timer_query);
let timer_query = core.timer_query_cache.start_timing_draw_call(&core.device,
&core.options);
let compute_dimensions = ComputeDimensions {
x: (tile_count + BOUND_WORKGROUP_SIZE - 1) / BOUND_WORKGROUP_SIZE,
@ -100,9 +100,9 @@ impl<D> RendererD3D11<D> where D: Device {
],
});
core.device.end_timer_query(&timer_query);
core.current_timer.as_mut().unwrap().other_times.push(TimerFuture::new(timer_query));
core.stats.drawcall_count += 1;
core.finish_timing_draw_call(&timer_query);
core.current_timer.as_mut().unwrap().push_query(TimeCategory::Other, timer_query);
core.allocator.free_buffer(path_info_buffer_id);
}
@ -174,8 +174,8 @@ impl<D> RendererD3D11<D> where D: Device {
&indirect_draw_params,
BufferTarget::Storage);
let timer_query = core.timer_query_cache.alloc(&core.device);
core.device.begin_timer_query(&timer_query);
let timer_query = core.timer_query_cache.start_timing_draw_call(&core.device,
&core.options);
let compute_dimensions = ComputeDimensions {
x: (microlines_storage.count + 63) / 64,
@ -204,9 +204,9 @@ impl<D> RendererD3D11<D> where D: Device {
],
});
core.device.end_timer_query(&timer_query);
core.current_timer.as_mut().unwrap().bin_times.push(TimerFuture::new(timer_query));
core.stats.drawcall_count += 1;
core.finish_timing_draw_call(&timer_query);
core.current_timer.as_mut().unwrap().push_query(TimeCategory::Bin, timer_query);
let indirect_draw_params_receiver =
core.device.read_buffer(fill_indirect_draw_params_buffer,
@ -294,8 +294,8 @@ impl<D> RendererD3D11<D> where D: Device {
dice_metadata,
BufferTarget::Storage);
let timer_query = core.timer_query_cache.alloc(&core.device);
core.device.begin_timer_query(&timer_query);
let timer_query = core.timer_query_cache.start_timing_draw_call(&core.device,
&core.options);
let workgroup_count = (batch_segment_count + DICE_WORKGROUP_SIZE - 1) /
DICE_WORKGROUP_SIZE;
@ -325,9 +325,9 @@ impl<D> RendererD3D11<D> where D: Device {
],
});
core.device.end_timer_query(&timer_query);
core.current_timer.as_mut().unwrap().dice_times.push(TimerFuture::new(timer_query));
core.stats.drawcall_count += 1;
core.finish_timing_draw_call(&timer_query);
core.current_timer.as_mut().unwrap().push_query(TimeCategory::Dice, timer_query);
let indirect_compute_params_receiver =
core.device.read_buffer(&dice_indirect_draw_params_buffer,
@ -374,8 +374,8 @@ impl<D> RendererD3D11<D> where D: Device {
let area_lut_texture = core.allocator.get_texture(core.area_lut_texture_id);
let timer_query = core.timer_query_cache.alloc(&core.device);
core.device.begin_timer_query(&timer_query);
let timer_query = core.timer_query_cache.start_timing_draw_call(&core.device,
&core.options);
// This setup is an annoying workaround for the 64K limit of compute invocation in OpenGL.
let alpha_tile_count = alpha_tile_range.end - alpha_tile_range.start;
@ -401,9 +401,9 @@ impl<D> RendererD3D11<D> where D: Device {
],
});
core.device.end_timer_query(&timer_query);
core.current_timer.as_mut().unwrap().fill_times.push(TimerFuture::new(timer_query));
core.stats.drawcall_count += 1;
core.finish_timing_draw_call(&timer_query);
core.current_timer.as_mut().unwrap().push_query(TimeCategory::Fill, timer_query);
core.framebuffer_flags.insert(FramebufferFlags::MASK_FRAMEBUFFER_IS_DIRTY);
}
@ -596,8 +596,8 @@ impl<D> RendererD3D11<D> where D: Device {
storage_buffers.push((&propagate_program.clip_tiles_storage_buffer, clip_tile_buffer));
}
let timer_query = core.timer_query_cache.alloc(&core.device);
core.device.begin_timer_query(&timer_query);
let timer_query = core.timer_query_cache.start_timing_draw_call(&core.device,
&core.options);
let dimensions = ComputeDimensions {
x: (column_count + PROPAGATE_WORKGROUP_SIZE - 1) / PROPAGATE_WORKGROUP_SIZE,
@ -618,9 +618,9 @@ impl<D> RendererD3D11<D> where D: Device {
storage_buffers: &storage_buffers,
});
core.device.end_timer_query(&timer_query);
core.current_timer.as_mut().unwrap().other_times.push(TimerFuture::new(timer_query));
core.stats.drawcall_count += 1;
core.finish_timing_draw_call(&timer_query);
core.current_timer.as_mut().unwrap().push_query(TimeCategory::Other, timer_query);
let fill_indirect_draw_params_receiver =
core.device.read_buffer(&fill_indirect_draw_params_buffer,
@ -654,8 +654,8 @@ impl<D> RendererD3D11<D> where D: Device {
let tile_count = core.framebuffer_tile_size().area();
let timer_query = core.timer_query_cache.alloc(&core.device);
core.device.begin_timer_query(&timer_query);
let timer_query = core.timer_query_cache.start_timing_draw_call(&core.device,
&core.options);
let dimensions = ComputeDimensions {
x: (tile_count as u32 + SORT_WORKGROUP_SIZE - 1) / SORT_WORKGROUP_SIZE,
@ -674,9 +674,9 @@ impl<D> RendererD3D11<D> where D: Device {
],
});
core.device.end_timer_query(&timer_query);
core.current_timer.as_mut().unwrap().other_times.push(TimerFuture::new(timer_query));
core.stats.drawcall_count += 1;
core.finish_timing_draw_call(&timer_query);
core.current_timer.as_mut().unwrap().push_query(TimeCategory::Other, timer_query);
}
fn allocate_first_tile_map(&mut self, core: &mut RendererCore<D>) -> BufferID {
@ -703,8 +703,8 @@ impl<D> RendererD3D11<D> where D: Device {
tiles_d3d11_buffer_id: BufferID,
first_tile_map_buffer_id: BufferID,
color_texture_0: Option<TileBatchTexture>) {
let timer_query = core.timer_query_cache.alloc(&core.device);
core.device.begin_timer_query(&timer_query);
let timer_query = core.timer_query_cache.start_timing_draw_call(&core.device,
&core.options);
let tile_program = &self.programs.tile_program;
@ -763,9 +763,9 @@ impl<D> RendererD3D11<D> where D: Device {
uniforms: &uniforms,
});
core.device.end_timer_query(&timer_query);
core.current_timer.as_mut().unwrap().composite_times.push(TimerFuture::new(timer_query));
core.stats.drawcall_count += 1;
core.finish_timing_draw_call(&timer_query);
core.current_timer.as_mut().unwrap().push_query(TimeCategory::Composite, timer_query);
core.preserve_draw_framebuffer();
}

View File

@ -9,7 +9,7 @@
// except according to those terms.
use crate::gpu::blend::{BlendModeExt, ToBlendState};
use crate::gpu::perf::TimerFuture;
use crate::gpu::perf::TimeCategory;
use crate::gpu::renderer::{FramebufferFlags, MASK_FRAMEBUFFER_HEIGHT, MASK_FRAMEBUFFER_WIDTH};
use crate::gpu::renderer::{RendererCore, RendererFlags};
use crate::gpu::d3d9::shaders::{ClipTileCombineVertexArrayD3D9, ClipTileCopyVertexArrayD3D9};
@ -225,8 +225,8 @@ impl<D> RendererD3D9<D> where D: Device {
clear_color = Some(ColorF::default());
};
let timer_query = core.timer_query_cache.alloc(&core.device);
core.device.begin_timer_query(&timer_query);
let timer_query = core.timer_query_cache.start_timing_draw_call(&core.device,
&core.options);
core.device.draw_elements_instanced(6, fill_count, &RenderState {
target: &RenderTarget::Framebuffer(mask_framebuffer),
@ -256,9 +256,9 @@ impl<D> RendererD3D9<D> where D: Device {
},
});
core.device.end_timer_query(&timer_query);
core.current_timer.as_mut().unwrap().fill_times.push(TimerFuture::new(timer_query));
core.stats.drawcall_count += 1;
core.finish_timing_draw_call(&timer_query);
core.current_timer.as_mut().unwrap().push_query(TimeCategory::Fill, timer_query);
core.framebuffer_flags.insert(FramebufferFlags::MASK_FRAMEBUFFER_IS_DIRTY);
}
@ -297,8 +297,8 @@ impl<D> RendererD3D9<D> where D: Device {
quad_vertex_positions_buffer,
quad_vertex_indices_buffer);
let timer_query = core.timer_query_cache.alloc(&core.device);
core.device.begin_timer_query(&timer_query);
let timer_query = core.timer_query_cache.start_timing_draw_call(&core.device,
&core.options);
// Copy out tiles.
//
@ -322,6 +322,12 @@ impl<D> RendererD3D9<D> where D: Device {
options: RenderOptions::default(),
});
core.stats.drawcall_count += 1;
core.finish_timing_draw_call(&timer_query);
core.current_timer.as_mut().unwrap().push_query(TimeCategory::Other, timer_query);
let timer_query = core.timer_query_cache.start_timing_draw_call(&core.device,
&core.options);
// Combine clip tiles.
core.device.draw_elements_instanced(6, clip_buffer_info.clip_count, &RenderState {
target: &RenderTarget::Framebuffer(mask_framebuffer),
@ -342,9 +348,9 @@ impl<D> RendererD3D9<D> where D: Device {
options: RenderOptions::default(),
});
core.device.end_timer_query(&timer_query);
core.current_timer.as_mut().unwrap().other_times.push(TimerFuture::new(timer_query));
core.stats.drawcall_count += 2;
core.stats.drawcall_count += 1;
core.finish_timing_draw_call(&timer_query);
core.current_timer.as_mut().unwrap().push_query(TimeCategory::Other, timer_query);
core.allocator.free_framebuffer(mask_temp_framebuffer_id);
}
@ -397,8 +403,8 @@ impl<D> RendererD3D9<D> where D: Device {
let clear_color = core.clear_color_for_draw_operation();
let draw_viewport = core.draw_viewport();
let timer_query = core.timer_query_cache.alloc(&core.device);
core.device.begin_timer_query(&timer_query);
let timer_query = core.timer_query_cache.start_timing_draw_call(&core.device,
&core.options);
let tile_raster_program = &self.programs.tile_program;
@ -454,9 +460,9 @@ impl<D> RendererD3D9<D> where D: Device {
},
});
core.device.end_timer_query(&timer_query);
core.current_timer.as_mut().unwrap().composite_times.push(TimerFuture::new(timer_query));
core.stats.drawcall_count += 1;
core.finish_timing_draw_call(&timer_query);
core.current_timer.as_mut().unwrap().push_query(TimeCategory::Composite, timer_query);
core.preserve_draw_framebuffer();
}

View File

@ -10,6 +10,7 @@
//! Performance monitoring infrastructure.
use crate::gpu::options::RendererOptions;
use pathfinder_gpu::Device;
use std::mem;
use std::ops::{Add, Div};
@ -76,6 +77,15 @@ pub(crate) enum TimerFuture<D> where D: Device {
Resolved(Duration),
}
#[derive(Clone, Copy, PartialEq)]
pub(crate) enum TimeCategory {
Dice,
Bin,
Fill,
Composite,
Other,
}
impl<D> TimerQueryCache<D> where D: Device {
pub(crate) fn new() -> TimerQueryCache<D> {
TimerQueryCache { free_queries: vec![] }
@ -88,6 +98,17 @@ impl<D> TimerQueryCache<D> where D: Device {
pub(crate) fn free(&mut self, old_query: D::TimerQuery) {
self.free_queries.push(old_query);
}
pub(crate) fn start_timing_draw_call(&mut self, device: &D, options: &RendererOptions<D>)
-> Option<D::TimerQuery> {
if !options.show_debug_ui {
return None;
}
let timer_query = self.alloc(device);
device.begin_timer_query(&timer_query);
Some(timer_query)
}
}
impl<D> PendingTimer<D> where D: Device {
@ -131,6 +152,22 @@ impl<D> PendingTimer<D> where D: Device {
_ => None,
}
}
pub(crate) fn push_query(&mut self,
time_category: TimeCategory,
timer_query: Option<D::TimerQuery>) {
let timer_future = match timer_query {
None => return,
Some(timer_query) => TimerFuture::new(timer_query),
};
match time_category {
TimeCategory::Dice => self.dice_times.push(timer_future),
TimeCategory::Bin => self.bin_times.push(timer_future),
TimeCategory::Fill => self.fill_times.push(timer_future),
TimeCategory::Composite => self.composite_times.push(timer_future),
TimeCategory::Other => self.other_times.push(timer_future),
}
}
}
impl<D> TimerFuture<D> where D: Device {

View File

@ -13,7 +13,7 @@ use crate::gpu::d3d9::renderer::RendererD3D9;
use crate::gpu::d3d11::renderer::RendererD3D11;
use crate::gpu::debug::DebugUIPresenter;
use crate::gpu::options::{DestFramebuffer, RendererLevel, RendererMode, RendererOptions};
use crate::gpu::perf::{PendingTimer, RenderStats, RenderTime, TimerFuture, TimerQueryCache};
use crate::gpu::perf::{PendingTimer, RenderStats, RenderTime, TimeCategory, TimerQueryCache};
use crate::gpu::shaders::{BlitProgram, BlitVertexArray, ClearProgram, ClearVertexArray};
use crate::gpu::shaders::{ProgramsCore, ReprojectionProgram, ReprojectionVertexArray};
use crate::gpu::shaders::{StencilProgram, StencilVertexArray, TileProgramCommon, VertexArraysCore};
@ -934,8 +934,8 @@ impl<D> RendererCore<D> where D: Device {
let old_mask_texture = self.device.framebuffer_texture(old_mask_framebuffer);
let old_size = self.device.texture_size(old_mask_texture);
let timer_query = self.timer_query_cache.alloc(&self.device);
self.device.begin_timer_query(&timer_query);
let timer_query = self.timer_query_cache.start_timing_draw_call(&self.device,
&self.options);
self.device.draw_elements(6, &RenderState {
target: &RenderTarget::Framebuffer(mask_framebuffer),
@ -961,9 +961,9 @@ impl<D> RendererCore<D> where D: Device {
},
});
self.device.end_timer_query(&timer_query);
self.current_timer.as_mut().unwrap().other_times.push(TimerFuture::new(timer_query));
self.stats.drawcall_count += 1;
self.finish_timing_draw_call(&timer_query);
self.current_timer.as_mut().unwrap().push_query(TimeCategory::Other, timer_query);
}
pub(crate) fn set_uniforms_for_drawing_tiles<'a>(
@ -1128,6 +1128,12 @@ impl<D> RendererCore<D> where D: Device {
fn render_target_location(&self, render_target_id: RenderTargetId) -> TextureLocation {
self.render_targets[render_target_id.render_target as usize].location
}
pub(crate) fn finish_timing_draw_call(&self, timer_query: &Option<D::TimerQuery>) {
if let Some(ref timer_query) = *timer_query {
self.device.end_timer_query(timer_query)
}
}
}
impl<D> Frame<D> where D: Device {