diff --git a/renderer/src/gpu/d3d11/renderer.rs b/renderer/src/gpu/d3d11/renderer.rs index 9bfa66d9..0e4c32c6 100644 --- a/renderer/src/gpu/d3d11/renderer.rs +++ b/renderer/src/gpu/d3d11/renderer.rs @@ -29,6 +29,7 @@ use vec_map::VecMap; const FILL_INDIRECT_DRAW_PARAMS_INSTANCE_COUNT_INDEX: usize = 1; const FILL_INDIRECT_DRAW_PARAMS_ALPHA_TILE_COUNT_INDEX: usize = 4; +const FILL_INDIRECT_DRAW_PARAMS_SIZE: usize = 8; const BIN_INDIRECT_DRAW_PARAMS_MICROLINE_COUNT_INDEX: usize = 3; @@ -146,7 +147,8 @@ impl RendererD3D11 where D: Device { core: &mut RendererCore, microlines_storage: &MicrolinesBufferIDsD3D11, propagate_metadata_buffer_ids: &PropagateMetadataBufferIDsD3D11, - tiles_d3d11_buffer_id: BufferID) + tiles_d3d11_buffer_id: BufferID, + z_buffer_id: BufferID) -> Option { let bin_program = &self.programs.bin_program; @@ -154,10 +156,6 @@ impl RendererD3D11 where D: Device { core.allocator.allocate_buffer::(&core.device, self.allocated_fill_count as u64, BufferTag("Fill")); - let fill_indirect_draw_params_buffer_id = - core.allocator.allocate_buffer::(&core.device, - 8, - BufferTag("FillIndirectDrawParamsD3D11")); let fill_vertex_buffer = core.allocator.get_buffer(fill_vertex_buffer_id); let microlines_buffer = core.allocator.get_buffer(microlines_storage.buffer_id); @@ -166,10 +164,13 @@ impl RendererD3D11 where D: Device { core.allocator.get_buffer(propagate_metadata_buffer_ids.propagate_metadata); let backdrops_buffer = core.allocator.get_buffer(propagate_metadata_buffer_ids.backdrops); - let fill_indirect_draw_params_buffer = - core.allocator.get_buffer(fill_indirect_draw_params_buffer_id); + // Upload fill indirect draw params to header of the Z-buffer. + // + // This is in the Z-buffer, not its own buffer, to work around the 8 SSBO limitation on + // some drivers (#373). + let z_buffer = core.allocator.get_buffer(z_buffer_id); let indirect_draw_params = [6, 0, 0, 0, 0, microlines_storage.count, 0, 0]; - core.device.upload_to_buffer::(&fill_indirect_draw_params_buffer, + core.device.upload_to_buffer::(&z_buffer, 0, &indirect_draw_params, BufferTarget::Storage); @@ -196,8 +197,7 @@ impl RendererD3D11 where D: Device { storage_buffers: &[ (&bin_program.microlines_storage_buffer, microlines_buffer), (&bin_program.metadata_storage_buffer, propagate_metadata_buffer), - (&bin_program.indirect_draw_params_storage_buffer, - fill_indirect_draw_params_buffer), + (&bin_program.indirect_draw_params_storage_buffer, z_buffer), (&bin_program.fills_storage_buffer, fill_vertex_buffer), (&bin_program.tiles_storage_buffer, tiles_buffer), (&bin_program.backdrops_storage_buffer, backdrops_buffer), @@ -208,10 +208,9 @@ impl RendererD3D11 where D: Device { core.finish_timing_draw_call(&timer_query); core.current_timer.as_mut().unwrap().push_query(TimeCategory::Bin, timer_query); - let indirect_draw_params_receiver = - core.device.read_buffer(fill_indirect_draw_params_buffer, - BufferTarget::Storage, - 0..32); + let indirect_draw_params_receiver = core.device.read_buffer(z_buffer, + BufferTarget::Storage, + 0..32); let indirect_draw_params = core.device.recv_buffer(&indirect_draw_params_receiver); let indirect_draw_params: &[u32] = indirect_draw_params.as_slice_of().unwrap(); @@ -224,7 +223,7 @@ impl RendererD3D11 where D: Device { core.stats.fill_count += needed_fill_count as usize; - Some(FillBufferInfoD3D11 { fill_vertex_buffer_id, fill_indirect_draw_params_buffer_id }) + Some(FillBufferInfoD3D11 { fill_vertex_buffer_id }) } pub(crate) fn upload_scene(&mut self, @@ -355,10 +354,7 @@ impl RendererD3D11 where D: Device { tiles_d3d11_buffer_id: BufferID, alpha_tiles_buffer_id: BufferID, propagate_tiles_info: &PropagateTilesInfoD3D11) { - let &FillBufferInfoD3D11 { - fill_vertex_buffer_id, - fill_indirect_draw_params_buffer_id: _, - } = fill_storage_info; + let &FillBufferInfoD3D11 { fill_vertex_buffer_id } = fill_storage_info; let &PropagateTilesInfoD3D11 { ref alpha_tile_range } = propagate_tiles_info; let fill_program = &self.programs.fill_program; @@ -486,7 +482,8 @@ impl RendererD3D11 where D: Device { fill_buffer_info = self.bin_segments(core, µlines_storage, &propagate_metadata_buffer_ids, - tiles_d3d11_buffer_id); + tiles_d3d11_buffer_id, + z_buffer_id); if fill_buffer_info.is_some() { break; } @@ -505,7 +502,6 @@ impl RendererD3D11 where D: Device { self.propagate_tiles(core, batch.prepare_info.backdrops.len() as u32, tiles_d3d11_buffer_id, - fill_buffer_info.fill_indirect_draw_params_buffer_id, z_buffer_id, first_tile_map_buffer_id, alpha_tiles_buffer_id, @@ -523,7 +519,6 @@ impl RendererD3D11 where D: Device { &propagate_tiles_info); core.allocator.free_buffer(fill_buffer_info.fill_vertex_buffer_id); - core.allocator.free_buffer(fill_buffer_info.fill_indirect_draw_params_buffer_id); core.allocator.free_buffer(alpha_tiles_buffer_id); // FIXME(pcwalton): This seems like the wrong place to do this... @@ -543,7 +538,6 @@ impl RendererD3D11 where D: Device { core: &mut RendererCore, column_count: u32, tiles_d3d11_buffer_id: BufferID, - fill_indirect_draw_params_buffer_id: BufferID, z_buffer_id: BufferID, first_tile_map_buffer_id: BufferID, alpha_tiles_buffer_id: BufferID, @@ -572,8 +566,6 @@ impl RendererD3D11 where D: Device { BufferTarget::Storage); let alpha_tiles_storage_buffer = core.allocator.get_buffer(alpha_tiles_buffer_id); - let fill_indirect_draw_params_buffer = - core.allocator.get_buffer(fill_indirect_draw_params_buffer_id); let mut storage_buffers = vec![ (&propagate_program.draw_metadata_storage_buffer, propagate_metadata_storage_buffer), @@ -581,8 +573,6 @@ impl RendererD3D11 where D: Device { (&propagate_program.draw_tiles_storage_buffer, tiles_d3d11_buffer), (&propagate_program.z_buffer_storage_buffer, z_buffer), (&propagate_program.first_tile_map_storage_buffer, first_tile_map_storage_buffer), - (&propagate_program.indirect_draw_params_storage_buffer, - fill_indirect_draw_params_buffer), (&propagate_program.alpha_tiles_storage_buffer, alpha_tiles_storage_buffer), ]; @@ -633,9 +623,7 @@ impl RendererD3D11 where D: Device { core.current_timer.as_mut().unwrap().push_query(TimeCategory::Other, timer_query); let fill_indirect_draw_params_receiver = - core.device.read_buffer(&fill_indirect_draw_params_buffer, - BufferTarget::Storage, - 0..32); + core.device.read_buffer(&z_buffer, BufferTarget::Storage, 0..32); let fill_indirect_draw_params = core.device .recv_buffer(&fill_indirect_draw_params_receiver); let fill_indirect_draw_params: &[u32] = fill_indirect_draw_params.as_slice_of().unwrap(); @@ -703,9 +691,10 @@ impl RendererD3D11 where D: Device { } fn allocate_z_buffer(&mut self, core: &mut RendererCore) -> BufferID { - core.allocator.allocate_buffer::(&core.device, - core.tile_size().area() as u64, - BufferTag("ZBufferD3D11")) + // This includes the fill indirect draw params because some drivers limit the number of + // SSBOs to 8 (#373). + let size = core.tile_size().area() as u64 + FILL_INDIRECT_DRAW_PARAMS_SIZE as u64; + core.allocator.allocate_buffer::( &core.device, size, BufferTag("ZBufferD3D11")) } pub(crate) fn draw_tiles(&mut self, @@ -808,7 +797,6 @@ struct TileBatchInfoD3D11 { #[derive(Clone)] struct FillBufferInfoD3D11 { fill_vertex_buffer_id: BufferID, - fill_indirect_draw_params_buffer_id: BufferID, } #[derive(Debug)] diff --git a/renderer/src/gpu/d3d11/shaders.rs b/renderer/src/gpu/d3d11/shaders.rs index cd8249c7..52fe5f24 100644 --- a/renderer/src/gpu/d3d11/shaders.rs +++ b/renderer/src/gpu/d3d11/shaders.rs @@ -55,7 +55,6 @@ pub struct PropagateProgramD3D11 where D: Device { pub clip_tiles_storage_buffer: D::StorageBuffer, pub z_buffer_storage_buffer: D::StorageBuffer, pub first_tile_map_storage_buffer: D::StorageBuffer, - pub indirect_draw_params_storage_buffer: D::StorageBuffer, pub alpha_tiles_storage_buffer: D::StorageBuffer, } @@ -75,9 +74,7 @@ impl PropagateProgramD3D11 where D: Device { let clip_tiles_storage_buffer = device.get_storage_buffer(&program, "ClipTiles", 4); let z_buffer_storage_buffer = device.get_storage_buffer(&program, "ZBuffer", 5); let first_tile_map_storage_buffer = device.get_storage_buffer(&program, "FirstTileMap", 6); - let indirect_draw_params_storage_buffer = - device.get_storage_buffer(&program, "IndirectDrawParams", 7); - let alpha_tiles_storage_buffer = device.get_storage_buffer(&program, "AlphaTiles", 8); + let alpha_tiles_storage_buffer = device.get_storage_buffer(&program, "AlphaTiles", 7); PropagateProgramD3D11 { program, @@ -91,7 +88,6 @@ impl PropagateProgramD3D11 where D: Device { clip_tiles_storage_buffer, z_buffer_storage_buffer, first_tile_map_storage_buffer, - indirect_draw_params_storage_buffer, alpha_tiles_storage_buffer, } } diff --git a/resources/shaders/gl4/d3d11/propagate.cs.glsl b/resources/shaders/gl4/d3d11/propagate.cs.glsl index 538e7532..744eaff5 100644 --- a/resources/shaders/gl4/d3d11/propagate.cs.glsl +++ b/resources/shaders/gl4/d3d11/propagate.cs.glsl @@ -29,6 +29,9 @@ layout(local_size_x = 64)in; + + + uniform ivec2 uFramebufferTileSize; uniform int uColumnCount; uniform int uFirstAlphaTileIndex; @@ -76,6 +79,12 @@ layout(std430, binding = 4)buffer bClipTiles { }; layout(std430, binding = 5)buffer bZBuffer { + + + + + + restrict int iZBuffer[]; }; @@ -83,16 +92,7 @@ layout(std430, binding = 6)buffer bFirstTileMap { restrict int iFirstTileMap[]; }; -layout(std430, binding = 7)buffer bIndirectDrawParams { - - - - - - restrict uint iIndirectDrawParams[]; -}; - -layout(std430, binding = 8)buffer bAlphaTiles { +layout(std430, binding = 7)buffer bAlphaTiles { restrict uint iAlphaTiles[]; @@ -191,7 +191,8 @@ void main(){ } if(needNewAlphaTile){ - uint drawBatchAlphaTileIndex = atomicAdd(iIndirectDrawParams[4], 1); + uint drawBatchAlphaTileIndex = + atomicAdd(iZBuffer[4], 1); iAlphaTiles[drawBatchAlphaTileIndex * 2 + 0]= drawTileIndex; iAlphaTiles[drawBatchAlphaTileIndex * 2 + 1]= clipAlphaTileIndex; drawAlphaTileIndex = int(drawBatchAlphaTileIndex)+ uFirstAlphaTileIndex; @@ -206,7 +207,7 @@ void main(){ ivec2 tileCoord = ivec2(tileX, tileY)+ ivec2(drawTileRect . xy); int tileMapIndex = tileCoord . y * uFramebufferTileSize . x + tileCoord . x; if(zWrite && drawTileBackdrop != 0 && drawAlphaTileIndex < 0) - atomicMax(iZBuffer[tileMapIndex], int(drawTileIndex)); + atomicMax(iZBuffer[tileMapIndex + 8], int(drawTileIndex)); if(drawTileBackdrop != 0 || drawAlphaTileIndex >= 0){ diff --git a/resources/shaders/gl4/d3d11/sort.cs.glsl b/resources/shaders/gl4/d3d11/sort.cs.glsl index 42de2d39..5ecbcdb4 100644 --- a/resources/shaders/gl4/d3d11/sort.cs.glsl +++ b/resources/shaders/gl4/d3d11/sort.cs.glsl @@ -25,6 +25,8 @@ precision highp float; + + uniform int uTileCount; layout(std430, binding = 0)buffer bTiles { @@ -62,7 +64,7 @@ void main(){ if(globalTileIndex >= uint(uTileCount)) return; - int zValue = iZBuffer[globalTileIndex]; + int zValue = iZBuffer[8 + globalTileIndex]; int unsortedFirstTileIndex = getFirst(globalTileIndex); int sortedFirstTileIndex = - 1; diff --git a/resources/shaders/metal/d3d11/propagate.cs.metal b/resources/shaders/metal/d3d11/propagate.cs.metal index 60af665d..92d18dab 100644 --- a/resources/shaders/metal/d3d11/propagate.cs.metal +++ b/resources/shaders/metal/d3d11/propagate.cs.metal @@ -33,9 +33,9 @@ struct bClipTiles uint iClipTiles[1]; }; -struct bIndirectDrawParams +struct bZBuffer { - uint iIndirectDrawParams[1]; + int iZBuffer[1]; }; struct bAlphaTiles @@ -43,11 +43,6 @@ struct bAlphaTiles uint iAlphaTiles[1]; }; -struct bZBuffer -{ - int iZBuffer[1]; -}; - struct bFirstTileMap { int iFirstTileMap[1]; @@ -61,7 +56,7 @@ uint calculateTileIndex(thread const uint& bufferOffset, thread const uint4& til return (bufferOffset + (tileCoord.y * (tileRect.z - tileRect.x))) + tileCoord.x; } -kernel void main0(constant int& uColumnCount [[buffer(0)]], constant int& uFirstAlphaTileIndex [[buffer(8)]], constant int2& uFramebufferTileSize [[buffer(9)]], const device bBackdrops& _59 [[buffer(1)]], const device bDrawMetadata& _85 [[buffer(2)]], const device bClipMetadata& _126 [[buffer(3)]], device bDrawTiles& _175 [[buffer(4)]], device bClipTiles& _252 [[buffer(5)]], device bIndirectDrawParams& _302 [[buffer(6)]], device bAlphaTiles& _309 [[buffer(7)]], device bZBuffer& _380 [[buffer(10)]], device bFirstTileMap& _397 [[buffer(11)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +kernel void main0(constant int& uColumnCount [[buffer(0)]], constant int& uFirstAlphaTileIndex [[buffer(8)]], constant int2& uFramebufferTileSize [[buffer(9)]], const device bBackdrops& _59 [[buffer(1)]], const device bDrawMetadata& _85 [[buffer(2)]], const device bClipMetadata& _126 [[buffer(3)]], device bDrawTiles& _175 [[buffer(4)]], device bClipTiles& _252 [[buffer(5)]], device bZBuffer& _302 [[buffer(6)]], device bAlphaTiles& _310 [[buffer(7)]], device bFirstTileMap& _395 [[buffer(10)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { uint columnIndex = gl_GlobalInvocationID.x; if (int(columnIndex) >= uColumnCount) @@ -154,10 +149,10 @@ kernel void main0(constant int& uColumnCount [[buffer(0)]], constant int& uFirst } if (needNewAlphaTile) { - uint _305 = atomic_fetch_add_explicit((device atomic_uint*)&_302.iIndirectDrawParams[4], 1u, memory_order_relaxed); - uint drawBatchAlphaTileIndex = _305; - _309.iAlphaTiles[(drawBatchAlphaTileIndex * 2u) + 0u] = drawTileIndex; - _309.iAlphaTiles[(drawBatchAlphaTileIndex * 2u) + 1u] = uint(clipAlphaTileIndex); + int _305 = atomic_fetch_add_explicit((device atomic_int*)&_302.iZBuffer[4], 1, memory_order_relaxed); + uint drawBatchAlphaTileIndex = uint(_305); + _310.iAlphaTiles[(drawBatchAlphaTileIndex * 2u) + 0u] = drawTileIndex; + _310.iAlphaTiles[(drawBatchAlphaTileIndex * 2u) + 1u] = uint(clipAlphaTileIndex); drawAlphaTileIndex = int(drawBatchAlphaTileIndex) + uFirstAlphaTileIndex; } _175.iDrawTiles[(drawTileIndex * 4u) + 2u] = (uint(drawAlphaTileIndex) & 16777215u) | (uint(drawBackdropDelta) << uint(24)); @@ -166,12 +161,12 @@ kernel void main0(constant int& uColumnCount [[buffer(0)]], constant int& uFirst int tileMapIndex = (tileCoord_1.y * uFramebufferTileSize.x) + tileCoord_1.x; if ((zWrite && (drawTileBackdrop != 0)) && (drawAlphaTileIndex < 0)) { - int _385 = atomic_fetch_max_explicit((device atomic_int*)&_380.iZBuffer[tileMapIndex], int(drawTileIndex), memory_order_relaxed); + int _383 = atomic_fetch_max_explicit((device atomic_int*)&_302.iZBuffer[tileMapIndex + 8], int(drawTileIndex), memory_order_relaxed); } if ((drawTileBackdrop != 0) || (drawAlphaTileIndex >= 0)) { - int _402 = atomic_exchange_explicit((device atomic_int*)&_397.iFirstTileMap[tileMapIndex], int(drawTileIndex), memory_order_relaxed); - int nextTileIndex = _402; + int _400 = atomic_exchange_explicit((device atomic_int*)&_395.iFirstTileMap[tileMapIndex], int(drawTileIndex), memory_order_relaxed); + int nextTileIndex = _400; _175.iDrawTiles[(drawTileIndex * 4u) + 0u] = uint(nextTileIndex); } currentBackdrop += drawBackdropDelta; diff --git a/resources/shaders/metal/d3d11/sort.cs.metal b/resources/shaders/metal/d3d11/sort.cs.metal index ae01b505..0d91a330 100644 --- a/resources/shaders/metal/d3d11/sort.cs.metal +++ b/resources/shaders/metal/d3d11/sort.cs.metal @@ -48,7 +48,7 @@ kernel void main0(constant int& uTileCount [[buffer(2)]], device bFirstTileMap& { return; } - int zValue = _76.iZBuffer[globalTileIndex]; + int zValue = _76.iZBuffer[8u + globalTileIndex]; uint param = globalTileIndex; int unsortedFirstTileIndex = getFirst(param, v_26); int sortedFirstTileIndex = -1; diff --git a/shaders/d3d11/propagate.cs.glsl b/shaders/d3d11/propagate.cs.glsl index 543440f3..5a382c79 100644 --- a/shaders/d3d11/propagate.cs.glsl +++ b/shaders/d3d11/propagate.cs.glsl @@ -27,6 +27,9 @@ layout(local_size_x = 64) in; #define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2 #define TILE_FIELD_CONTROL 3 +#define FILL_INDIRECT_DRAW_PARAMS_ALPHA_TILE_COUNT_INDEX 4 +#define FILL_INDIRECT_DRAW_PARAMS_SIZE 8 + uniform ivec2 uFramebufferTileSize; uniform int uColumnCount; uniform int uFirstAlphaTileIndex; @@ -74,6 +77,12 @@ layout(std430, binding = 4) buffer bClipTiles { }; layout(std430, binding = 5) buffer bZBuffer { + // [0]: vertexCount (6) + // [1]: instanceCount (of fills) + // [2]: vertexStart (0) + // [3]: baseInstance (0) + // [4]: alpha tile count + // [8..]: z-buffer restrict int iZBuffer[]; }; @@ -81,16 +90,7 @@ layout(std430, binding = 6) buffer bFirstTileMap { restrict int iFirstTileMap[]; }; -layout(std430, binding = 7) buffer bIndirectDrawParams { - // [0]: vertexCount (6) - // [1]: instanceCount (of fills) - // [2]: vertexStart (0) - // [3]: baseInstance (0) - // [4]: alpha tile count - restrict uint iIndirectDrawParams[]; -}; - -layout(std430, binding = 8) buffer bAlphaTiles { +layout(std430, binding = 7) buffer bAlphaTiles { // [0]: alpha tile index // [1]: clip tile index restrict uint iAlphaTiles[]; @@ -189,7 +189,8 @@ void main() { } if (needNewAlphaTile) { - uint drawBatchAlphaTileIndex = atomicAdd(iIndirectDrawParams[4], 1); + uint drawBatchAlphaTileIndex = + atomicAdd(iZBuffer[FILL_INDIRECT_DRAW_PARAMS_ALPHA_TILE_COUNT_INDEX], 1); iAlphaTiles[drawBatchAlphaTileIndex * 2 + 0] = drawTileIndex; iAlphaTiles[drawBatchAlphaTileIndex * 2 + 1] = clipAlphaTileIndex; drawAlphaTileIndex = int(drawBatchAlphaTileIndex) + uFirstAlphaTileIndex; @@ -204,7 +205,7 @@ void main() { ivec2 tileCoord = ivec2(tileX, tileY) + ivec2(drawTileRect.xy); int tileMapIndex = tileCoord.y * uFramebufferTileSize.x + tileCoord.x; if (zWrite && drawTileBackdrop != 0 && drawAlphaTileIndex < 0) - atomicMax(iZBuffer[tileMapIndex], int(drawTileIndex)); + atomicMax(iZBuffer[tileMapIndex + FILL_INDIRECT_DRAW_PARAMS_SIZE], int(drawTileIndex)); // Stitch into the linked list if necessary. if (drawTileBackdrop != 0 || drawAlphaTileIndex >= 0) { diff --git a/shaders/d3d11/sort.cs.glsl b/shaders/d3d11/sort.cs.glsl index b89f1664..a7436cb1 100644 --- a/shaders/d3d11/sort.cs.glsl +++ b/shaders/d3d11/sort.cs.glsl @@ -23,6 +23,8 @@ precision highp sampler2D; #define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2 #define TILE_FIELD_CONTROL 3 +#define FILL_INDIRECT_DRAW_PARAMS_SIZE 8 + uniform int uTileCount; layout(std430, binding = 0) buffer bTiles { @@ -60,7 +62,7 @@ void main() { if (globalTileIndex >= uint(uTileCount)) return; - int zValue = iZBuffer[globalTileIndex]; + int zValue = iZBuffer[FILL_INDIRECT_DRAW_PARAMS_SIZE + globalTileIndex]; int unsortedFirstTileIndex = getFirst(globalTileIndex); int sortedFirstTileIndex = -1;