Make timer queries and framebuffer readback async

This commit is contained in:
Patrick Walton 2020-01-07 13:03:15 -08:00
parent eb66459ef4
commit ce3b40cd5c
5 changed files with 319 additions and 126 deletions

View File

@ -296,7 +296,9 @@ impl<W> DemoApp<W> where W: Window {
pub fn take_raster_screenshot(&mut self, path: PathBuf) {
let drawable_size = self.window_size.device_size();
let viewport = RectI::new(Vector2I::default(), drawable_size);
let pixels = match self.renderer.device.read_pixels(&RenderTarget::Default, viewport) {
let texture_data_receiver =
self.renderer.device.read_pixels(&RenderTarget::Default, viewport);
let pixels = match self.renderer.device.recv_texture_data(&texture_data_receiver) {
TextureData::U8(pixels) => pixels,
_ => panic!("Unexpected pixel format for default framebuffer!"),
};

View File

@ -13,13 +13,14 @@
#[macro_use]
extern crate log;
use gl::types::{GLboolean, GLchar, GLenum, GLfloat, GLint, GLsizei, GLsizeiptr, GLuint, GLvoid};
use gl::types::{GLboolean, GLchar, GLenum, GLfloat, GLint, GLsizei, GLsizeiptr, GLsync};
use gl::types::{GLuint, GLvoid};
use half::f16;
use pathfinder_geometry::rect::RectI;
use pathfinder_geometry::vector::Vector2I;
use pathfinder_gpu::resources::ResourceLoader;
use pathfinder_gpu::{RenderTarget, BlendFunc, BlendOp, BufferData, BufferTarget, BufferUploadMode};
use pathfinder_gpu::{ClearOps, DepthFunc, Device, Primitive, RenderOptions, RenderState};
use pathfinder_gpu::{BlendFunc, BlendOp, BufferData, BufferTarget, BufferUploadMode, ClearOps};
use pathfinder_gpu::{DepthFunc, Device, Primitive, RenderOptions, RenderState, RenderTarget};
use pathfinder_gpu::{ShaderKind, StencilFunc, TextureData, TextureDataRef, TextureFormat};
use pathfinder_gpu::{UniformData, VertexAttrClass, VertexAttrDescriptor, VertexAttrType};
use pathfinder_simd::default::F32x4;
@ -230,6 +231,7 @@ impl Device for GLDevice {
type Program = GLProgram;
type Shader = GLShader;
type Texture = GLTexture;
type TextureDataReceiver = GLTextureDataReceiver;
type TimerQuery = GLTimerQuery;
type Uniform = GLUniform;
type VertexArray = GLVertexArray;
@ -508,58 +510,33 @@ impl Device for GLDevice {
self.set_texture_parameters(texture);
}
fn read_pixels(&self, render_target: &RenderTarget<GLDevice>, viewport: RectI) -> TextureData {
fn read_pixels(&self, render_target: &RenderTarget<GLDevice>, viewport: RectI)
-> GLTextureDataReceiver {
let (origin, size) = (viewport.origin(), viewport.size());
let format = self.render_target_format(render_target);
self.bind_render_target(render_target);
let byte_size = size.x() as usize * size.y() as usize * format.bytes_per_pixel() as usize;
match format {
TextureFormat::R8 | TextureFormat::RGBA8 => {
let channels = format.channels();
let mut pixels = vec![0; size.x() as usize * size.y() as usize * channels];
unsafe {
gl::ReadPixels(origin.x(),
origin.y(),
size.x() as GLsizei,
size.y() as GLsizei,
format.gl_format(),
format.gl_type(),
pixels.as_mut_ptr() as *mut GLvoid); ck();
}
flip_y(&mut pixels, size, channels);
TextureData::U8(pixels)
}
TextureFormat::R16F | TextureFormat::RGBA16F => {
let channels = format.channels();
let mut pixels =
vec![f16::default(); size.x() as usize * size.y() as usize * channels];
unsafe {
gl::ReadPixels(origin.x(),
origin.y(),
size.x() as GLsizei,
size.y() as GLsizei,
format.gl_format(),
format.gl_type(),
pixels.as_mut_ptr() as *mut GLvoid); ck();
}
flip_y(&mut pixels, size, channels);
TextureData::F16(pixels)
}
TextureFormat::RGBA32F => {
let channels = format.channels();
let mut pixels = vec![0.0; size.x() as usize * size.y() as usize * channels];
unsafe {
gl::ReadPixels(origin.x(),
origin.y(),
size.x() as GLsizei,
size.y() as GLsizei,
format.gl_format(),
format.gl_type(),
pixels.as_mut_ptr() as *mut GLvoid); ck();
}
flip_y(&mut pixels, size, channels);
TextureData::F32(pixels)
}
unsafe {
let mut gl_pixel_buffer = 0;
gl::GenBuffers(1, &mut gl_pixel_buffer); ck();
gl::BindBuffer(gl::PIXEL_PACK_BUFFER, gl_pixel_buffer); ck();
gl::BufferData(gl::PIXEL_PACK_BUFFER,
byte_size as GLsizeiptr,
ptr::null(),
gl::STATIC_READ); ck();
gl::ReadPixels(origin.x(),
origin.y(),
size.x() as GLsizei,
size.y() as GLsizei,
format.gl_format(),
format.gl_type(),
0 as *mut GLvoid); ck();
let gl_sync = gl::FenceSync(gl::SYNC_GPU_COMMANDS_COMPLETE, 0);
GLTextureDataReceiver { gl_pixel_buffer, gl_sync, size, format }
}
}
@ -631,17 +608,46 @@ impl Device for GLDevice {
}
}
#[inline]
fn get_timer_query(&self, query: &Self::TimerQuery) -> Option<Duration> {
fn try_recv_timer_query(&self, query: &Self::TimerQuery) -> Option<Duration> {
unsafe {
let mut result = 0;
gl::GetQueryObjectiv(query.gl_query, gl::QUERY_RESULT_AVAILABLE, &mut result); ck();
if result == gl::FALSE as GLint {
return None;
None
} else {
Some(self.recv_timer_query(query))
}
}
}
fn recv_timer_query(&self, query: &Self::TimerQuery) -> Duration {
unsafe {
let mut result = 0;
gl::GetQueryObjectui64v(query.gl_query, gl::QUERY_RESULT, &mut result); ck();
Some(Duration::from_nanos(result))
Duration::from_nanos(result)
}
}
fn try_recv_texture_data(&self, receiver: &Self::TextureDataReceiver) -> Option<TextureData> {
unsafe {
let result = gl::ClientWaitSync(receiver.gl_sync,
gl::SYNC_FLUSH_COMMANDS_BIT,
0); ck();
if result == gl::TIMEOUT_EXPIRED || result == gl::WAIT_FAILED {
None
} else {
Some(self.get_texture_data(receiver))
}
}
}
fn recv_texture_data(&self, receiver: &Self::TextureDataReceiver) -> TextureData {
unsafe {
let result = gl::ClientWaitSync(receiver.gl_sync,
gl::SYNC_FLUSH_COMMANDS_BIT,
!0); ck();
debug_assert!(result != gl::TIMEOUT_EXPIRED && result != gl::WAIT_FAILED);
self.get_texture_data(receiver)
}
}
@ -782,6 +788,52 @@ impl GLDevice {
}
}
}
fn get_texture_data(&self, receiver: &GLTextureDataReceiver) -> TextureData {
unsafe {
let (format, size) = (receiver.format, receiver.size);
let channels = format.channels();
let (mut texture_data, texture_data_ptr, texture_data_len);
match format {
TextureFormat::R8 | TextureFormat::RGBA8 => {
let mut pixels: Vec<u8> =
vec![0; size.x() as usize * size.y() as usize * channels];
texture_data_ptr = pixels.as_mut_ptr();
texture_data_len = pixels.len() * mem::size_of::<u8>();
texture_data = TextureData::U8(pixels);
}
TextureFormat::R16F | TextureFormat::RGBA16F => {
let mut pixels: Vec<f16> =
vec![f16::default(); size.x() as usize * size.y() as usize * channels];
texture_data_ptr = pixels.as_mut_ptr() as *mut u8;
texture_data_len = pixels.len() * mem::size_of::<f16>();
texture_data = TextureData::F16(pixels);
}
TextureFormat::RGBA32F => {
let mut pixels = vec![0.0; size.x() as usize * size.y() as usize * channels];
texture_data_ptr = pixels.as_mut_ptr() as *mut u8;
texture_data_len = pixels.len() * mem::size_of::<f32>();
texture_data = TextureData::F32(pixels);
}
}
gl::BindBuffer(gl::PIXEL_PACK_BUFFER, receiver.gl_pixel_buffer); ck();
gl::GetBufferSubData(gl::PIXEL_PACK_BUFFER,
0,
texture_data_len as GLsizeiptr,
texture_data_ptr as *mut GLvoid); ck();
gl::BindBuffer(gl::PIXEL_PACK_BUFFER, 0); ck();
match texture_data {
TextureData::U8(ref mut pixels) => flip_y(pixels, size, channels),
TextureData::U16(ref mut pixels) => flip_y(pixels, size, channels),
TextureData::F16(ref mut pixels) => flip_y(pixels, size, channels),
TextureData::F32(ref mut pixels) => flip_y(pixels, size, channels),
}
texture_data
}
}
}
pub struct GLVertexArray {
@ -1030,6 +1082,22 @@ impl VertexAttrTypeExt for VertexAttrType {
}
}
pub struct GLTextureDataReceiver {
gl_pixel_buffer: GLuint,
gl_sync: GLsync,
size: Vector2I,
format: TextureFormat,
}
impl Drop for GLTextureDataReceiver {
fn drop(&mut self) {
unsafe {
gl::DeleteBuffers(1, &mut self.gl_pixel_buffer); ck();
gl::DeleteSync(self.gl_sync); ck();
}
}
}
/// The version/dialect of OpenGL we should render with.
#[derive(Clone, Copy)]
#[repr(u32)]

View File

@ -29,6 +29,7 @@ pub trait Device: Sized {
type Program;
type Shader;
type Texture;
type TextureDataReceiver;
type TimerQuery;
type Uniform;
type VertexArray;
@ -71,7 +72,8 @@ pub trait Device: Sized {
fn framebuffer_texture<'f>(&self, framebuffer: &'f Self::Framebuffer) -> &'f Self::Texture;
fn texture_size(&self, texture: &Self::Texture) -> Vector2I;
fn upload_to_texture(&self, texture: &Self::Texture, rect: RectI, data: TextureDataRef);
fn read_pixels(&self, target: &RenderTarget<Self>, viewport: RectI) -> TextureData;
fn read_pixels(&self, target: &RenderTarget<Self>, viewport: RectI)
-> Self::TextureDataReceiver;
fn begin_commands(&self);
fn end_commands(&self);
fn draw_arrays(&self, index_count: u32, render_state: &RenderState<Self>);
@ -83,7 +85,10 @@ pub trait Device: Sized {
fn create_timer_query(&self) -> Self::TimerQuery;
fn begin_timer_query(&self, query: &Self::TimerQuery);
fn end_timer_query(&self, query: &Self::TimerQuery);
fn get_timer_query(&self, query: &Self::TimerQuery) -> Option<Duration>;
fn try_recv_timer_query(&self, query: &Self::TimerQuery) -> Option<Duration>;
fn recv_timer_query(&self, query: &Self::TimerQuery) -> Duration;
fn try_recv_texture_data(&self, receiver: &Self::TextureDataReceiver) -> Option<TextureData>;
fn recv_texture_data(&self, receiver: &Self::TextureDataReceiver) -> TextureData;
fn create_texture_from_png(&self, resources: &dyn ResourceLoader, name: &str) -> Self::Texture {
let data = resources.slurp(&format!("textures/{}.png", name)).unwrap();

View File

@ -54,7 +54,7 @@ use std::mem;
use std::ptr;
use std::rc::Rc;
use std::slice;
use std::sync::Arc;
use std::sync::{Arc, Condvar, Mutex, MutexGuard};
use std::time::{Duration, Instant};
const FIRST_VERTEX_BUFFER_INDEX: u64 = 1;
@ -148,10 +148,34 @@ pub struct MetalTexture {
dirty: Cell<bool>,
}
pub struct MetalTimerQuery {
#[derive(Clone)]
pub struct MetalTextureDataReceiver(Arc<MetalTextureDataReceiverInfo>);
struct MetalTextureDataReceiverInfo {
mutex: Mutex<MetalTextureDataReceiverState>,
cond: Condvar,
texture: Texture,
viewport: RectI,
}
enum MetalTextureDataReceiverState {
Pending,
Downloaded(TextureData),
Finished,
}
#[derive(Clone)]
pub struct MetalTimerQuery(Arc<MetalTimerQueryInfo>);
struct MetalTimerQueryInfo {
mutex: Mutex<MetalTimerQueryData>,
cond: Condvar,
event_value: u64,
start_time: Cell<Option<Instant>>,
end_time: Cell<Option<Instant>>,
}
struct MetalTimerQueryData {
start_time: Option<Instant>,
end_time: Option<Instant>,
}
#[derive(Clone)]
@ -184,7 +208,8 @@ impl Device for MetalDevice {
type Program = MetalProgram;
type Shader = MetalShader;
type Texture = MetalTexture;
type TimerQuery = Arc<MetalTimerQuery>;
type TextureDataReceiver = MetalTextureDataReceiver;
type TimerQuery = MetalTimerQuery;
type Uniform = MetalUniform;
type VertexArray = MetalVertexArray;
type VertexAttr = VertexAttribute;
@ -439,46 +464,24 @@ impl Device for MetalDevice {
texture.dirty.set(true);
}
fn read_pixels(&self, target: &RenderTarget<MetalDevice>, viewport: RectI) -> TextureData {
fn read_pixels(&self, target: &RenderTarget<MetalDevice>, viewport: RectI)
-> MetalTextureDataReceiver {
let texture = self.render_target_color_texture(target);
self.synchronize_texture(&texture);
let texture_data_receiver =
MetalTextureDataReceiver(Arc::new(MetalTextureDataReceiverInfo {
mutex: Mutex::new(MetalTextureDataReceiverState::Pending),
cond: Condvar::new(),
texture,
viewport,
}));
let (origin, size) = (viewport.origin(), viewport.size());
let metal_origin = MTLOrigin { x: origin.x() as u64, y: origin.y() as u64, z: 0 };
let metal_size = MTLSize { width: size.x() as u64, height: size.y() as u64, depth: 1 };
let metal_region = MTLRegion { origin: metal_origin, size: metal_size };
let texture_data_receiver_for_block = texture_data_receiver.clone();
let block = ConcreteBlock::new(move |_| {
texture_data_receiver_for_block.download();
});
let format = self.texture_format(&texture)
.expect("Unexpected framebuffer texture format!");
match format {
TextureFormat::R8 | TextureFormat::RGBA8 => {
let channels = format.channels();
let stride = size.x() as usize * channels;
let mut pixels = vec![0; stride * size.y() as usize];
texture.get_bytes(pixels.as_mut_ptr() as *mut _, metal_region, 0, stride as u64);
TextureData::U8(pixels)
}
TextureFormat::R16F | TextureFormat::RGBA16F => {
let channels = format.channels();
let stride = size.x() as usize * channels;
let mut pixels = vec![f16::default(); stride * size.y() as usize];
texture.get_bytes(pixels.as_mut_ptr() as *mut _,
metal_region,
0,
stride as u64 * 2);
TextureData::F16(pixels)
}
TextureFormat::RGBA32F => {
let channels = format.channels();
let stride = size.x() as usize * channels;
let mut pixels = vec![0.0; stride * size.y() as usize];
texture.get_bytes(pixels.as_mut_ptr() as *mut _,
metal_region,
0,
stride as u64 * 4);
TextureData::F32(pixels)
}
}
self.synchronize_texture(&texture_data_receiver.0.texture, block.copy());
texture_data_receiver
}
fn begin_commands(&self) {
@ -534,23 +537,26 @@ impl Device for MetalDevice {
encoder.end_encoding();
}
fn create_timer_query(&self) -> Arc<MetalTimerQuery> {
fn create_timer_query(&self) -> MetalTimerQuery {
let event_value = self.next_timer_query_event_value.get();
self.next_timer_query_event_value.set(event_value + 2);
let query = Arc::new(MetalTimerQuery {
let query = MetalTimerQuery(Arc::new(MetalTimerQueryInfo {
event_value,
start_time: Cell::new(None),
end_time: Cell::new(None),
});
mutex: Mutex::new(MetalTimerQueryData { start_time: None, end_time: None }),
cond: Condvar::new(),
}));
let captured_query = query.clone();
let start_block = ConcreteBlock::new(move |_: *mut Object, _: u64| {
captured_query.start_time.set(Some(Instant::now()))
let mut guard = captured_query.0.mutex.lock().unwrap();
guard.start_time = Some(Instant::now());
});
let captured_query = query.clone();
let end_block = ConcreteBlock::new(move |_: *mut Object, _: u64| {
captured_query.end_time.set(Some(Instant::now()))
let mut guard = captured_query.0.mutex.lock().unwrap();
guard.end_time = Some(Instant::now());
captured_query.0.cond.notify_all();
});
self.shared_event.notify_listener_at_value(&self.shared_event_listener,
event_value,
@ -562,26 +568,49 @@ impl Device for MetalDevice {
query
}
fn begin_timer_query(&self, query: &Arc<MetalTimerQuery>) {
fn begin_timer_query(&self, query: &MetalTimerQuery) {
self.command_buffers
.borrow_mut()
.last()
.unwrap()
.encode_signal_event(&self.shared_event, query.event_value);
.encode_signal_event(&self.shared_event, query.0.event_value);
}
fn end_timer_query(&self, query: &Arc<MetalTimerQuery>) {
fn end_timer_query(&self, query: &MetalTimerQuery) {
self.command_buffers
.borrow_mut()
.last()
.unwrap()
.encode_signal_event(&self.shared_event, query.event_value + 1);
.encode_signal_event(&self.shared_event, query.0.event_value + 1);
}
fn get_timer_query(&self, query: &Arc<MetalTimerQuery>) -> Option<Duration> {
match (query.start_time.get(), query.end_time.get()) {
(Some(start_time), Some(end_time)) => Some(end_time - start_time),
_ => None,
fn try_recv_timer_query(&self, query: &MetalTimerQuery) -> Option<Duration> {
try_recv_timer_query_with_guard(&mut query.0.mutex.lock().unwrap())
}
fn recv_timer_query(&self, query: &MetalTimerQuery) -> Duration {
let mut guard = query.0.mutex.lock().unwrap();
loop {
let duration = try_recv_timer_query_with_guard(&mut guard);
if let Some(duration) = duration {
return duration
}
guard = query.0.cond.wait(guard).unwrap();
}
}
fn try_recv_texture_data(&self, receiver: &MetalTextureDataReceiver) -> Option<TextureData> {
try_recv_texture_data_with_guard(&mut receiver.0.mutex.lock().unwrap())
}
fn recv_texture_data(&self, receiver: &MetalTextureDataReceiver) -> TextureData {
let mut guard = receiver.0.mutex.lock().unwrap();
loop {
let texture_data = try_recv_texture_data_with_guard(&mut guard);
if let Some(texture_data) = texture_data {
return texture_data
}
guard = receiver.0.cond.wait(guard).unwrap();
}
}
@ -1096,12 +1125,7 @@ impl MetalDevice {
}
fn texture_format(&self, texture: &Texture) -> Option<TextureFormat> {
match texture.pixel_format() {
MTLPixelFormat::R8Unorm => Some(TextureFormat::R8),
MTLPixelFormat::R16Float => Some(TextureFormat::R16F),
MTLPixelFormat::RGBA8Unorm => Some(TextureFormat::RGBA8),
_ => None,
}
TextureFormat::from_metal_pixel_format(texture.pixel_format())
}
fn set_viewport(&self, encoder: &RenderCommandEncoderRef, viewport: &RectI) {
@ -1115,11 +1139,13 @@ impl MetalDevice {
})
}
fn synchronize_texture(&self, texture: &Texture) {
{
fn synchronize_texture(&self, texture: &Texture, block: RcBlock<(*mut Object,), ()>) {
unsafe {
let command_buffers = self.command_buffers.borrow();
let encoder = command_buffers.last().unwrap().new_blit_command_encoder();
let command_buffer = command_buffers.last().unwrap();
let encoder = command_buffer.new_blit_command_encoder();
encoder.synchronize_resource(&texture);
let () = msg_send![*command_buffer, addCompletedHandler:&*block];
encoder.end_encoding();
}
@ -1218,6 +1244,98 @@ impl UniformDataExt for UniformData {
}
}
trait TextureFormatExt: Sized {
fn from_metal_pixel_format(metal_pixel_format: MTLPixelFormat) -> Option<Self>;
}
impl TextureFormatExt for TextureFormat {
fn from_metal_pixel_format(metal_pixel_format: MTLPixelFormat) -> Option<TextureFormat> {
match metal_pixel_format {
MTLPixelFormat::R8Unorm => Some(TextureFormat::R8),
MTLPixelFormat::R16Float => Some(TextureFormat::R16F),
MTLPixelFormat::RGBA8Unorm => Some(TextureFormat::RGBA8),
MTLPixelFormat::BGRA8Unorm => {
// FIXME(pcwalton): This is wrong! But it prevents a crash for now.
Some(TextureFormat::RGBA8)
}
_ => None,
}
}
}
// Synchronization helpers
fn try_recv_timer_query_with_guard(guard: &mut MutexGuard<MetalTimerQueryData>)
-> Option<Duration> {
match (guard.start_time, guard.end_time) {
(Some(start_time), Some(end_time)) => Some(end_time - start_time),
_ => None,
}
}
impl MetalTextureDataReceiver {
fn download(&self) {
let (origin, size) = (self.0.viewport.origin(), self.0.viewport.size());
let metal_origin = MTLOrigin { x: origin.x() as u64, y: origin.y() as u64, z: 0 };
let metal_size = MTLSize { width: size.x() as u64, height: size.y() as u64, depth: 1 };
let metal_region = MTLRegion { origin: metal_origin, size: metal_size };
let format = TextureFormat::from_metal_pixel_format(self.0.texture.pixel_format());
let format = format.expect("Unexpected framebuffer texture format!");
let texture_data = match format {
TextureFormat::R8 | TextureFormat::RGBA8 => {
let channels = format.channels();
let stride = size.x() as usize * channels;
let mut pixels = vec![0; stride * size.y() as usize];
self.0.texture.get_bytes(pixels.as_mut_ptr() as *mut _,
metal_region,
0,
stride as u64);
TextureData::U8(pixels)
}
TextureFormat::R16F | TextureFormat::RGBA16F => {
let channels = format.channels();
let stride = size.x() as usize * channels;
let mut pixels = vec![f16::default(); stride * size.y() as usize];
self.0.texture.get_bytes(pixels.as_mut_ptr() as *mut _,
metal_region,
0,
stride as u64 * 2);
TextureData::F16(pixels)
}
TextureFormat::RGBA32F => {
let channels = format.channels();
let stride = size.x() as usize * channels;
let mut pixels = vec![0.0; stride * size.y() as usize];
self.0.texture.get_bytes(pixels.as_mut_ptr() as *mut _,
metal_region,
0,
stride as u64 * 4);
TextureData::F32(pixels)
}
};
let mut guard = self.0.mutex.lock().unwrap();
*guard = MetalTextureDataReceiverState::Downloaded(texture_data);
self.0.cond.notify_all();
}
}
fn try_recv_texture_data_with_guard(guard: &mut MutexGuard<MetalTextureDataReceiverState>)
-> Option<TextureData> {
match **guard {
MetalTextureDataReceiverState::Pending | MetalTextureDataReceiverState::Finished => {
return None
}
MetalTextureDataReceiverState::Downloaded(_) => {}
}
match mem::replace(&mut **guard, MetalTextureDataReceiverState::Finished) {
MetalTextureDataReceiverState::Downloaded(texture_data) => Some(texture_data),
_ => unreachable!(),
}
}
// Extra structs missing from `metal-rs`
bitflags! {

View File

@ -296,7 +296,7 @@ where
// Accumulate stage-0 time.
let mut total_stage_0_time = Duration::new(0, 0);
for timer_query in &timers.stage_0 {
match self.device.get_timer_query(timer_query) {
match self.device.try_recv_timer_query(timer_query) {
None => return None,
Some(stage_0_time) => total_stage_0_time += stage_0_time,
}
@ -305,7 +305,7 @@ where
// Get stage-1 time.
let stage_1_time = {
let stage_1_timer_query = timers.stage_1.as_ref().unwrap();
match self.device.get_timer_query(stage_1_timer_query) {
match self.device.try_recv_timer_query(stage_1_timer_query) {
None => return None,
Some(query) => query,
}