Combine the Z-buffer and fill indirect draw params buffers to reduce the number

of SSBO bindings.

Apparently Mesa RadeonSI drivers have a limit of 8 SSBOs.

Closes #373.
This commit is contained in:
Patrick Walton 2020-07-02 11:55:02 -07:00
parent 90eeebcae6
commit 13ae83d6c5
8 changed files with 66 additions and 81 deletions

View File

@ -29,6 +29,7 @@ use vec_map::VecMap;
const FILL_INDIRECT_DRAW_PARAMS_INSTANCE_COUNT_INDEX: usize = 1;
const FILL_INDIRECT_DRAW_PARAMS_ALPHA_TILE_COUNT_INDEX: usize = 4;
const FILL_INDIRECT_DRAW_PARAMS_SIZE: usize = 8;
const BIN_INDIRECT_DRAW_PARAMS_MICROLINE_COUNT_INDEX: usize = 3;
@ -146,7 +147,8 @@ impl<D> RendererD3D11<D> where D: Device {
core: &mut RendererCore<D>,
microlines_storage: &MicrolinesBufferIDsD3D11,
propagate_metadata_buffer_ids: &PropagateMetadataBufferIDsD3D11,
tiles_d3d11_buffer_id: BufferID)
tiles_d3d11_buffer_id: BufferID,
z_buffer_id: BufferID)
-> Option<FillBufferInfoD3D11> {
let bin_program = &self.programs.bin_program;
@ -154,10 +156,6 @@ impl<D> RendererD3D11<D> where D: Device {
core.allocator.allocate_buffer::<Fill>(&core.device,
self.allocated_fill_count as u64,
BufferTag("Fill"));
let fill_indirect_draw_params_buffer_id =
core.allocator.allocate_buffer::<u32>(&core.device,
8,
BufferTag("FillIndirectDrawParamsD3D11"));
let fill_vertex_buffer = core.allocator.get_buffer(fill_vertex_buffer_id);
let microlines_buffer = core.allocator.get_buffer(microlines_storage.buffer_id);
@ -166,10 +164,13 @@ impl<D> RendererD3D11<D> where D: Device {
core.allocator.get_buffer(propagate_metadata_buffer_ids.propagate_metadata);
let backdrops_buffer = core.allocator.get_buffer(propagate_metadata_buffer_ids.backdrops);
let fill_indirect_draw_params_buffer =
core.allocator.get_buffer(fill_indirect_draw_params_buffer_id);
// Upload fill indirect draw params to header of the Z-buffer.
//
// This is in the Z-buffer, not its own buffer, to work around the 8 SSBO limitation on
// some drivers (#373).
let z_buffer = core.allocator.get_buffer(z_buffer_id);
let indirect_draw_params = [6, 0, 0, 0, 0, microlines_storage.count, 0, 0];
core.device.upload_to_buffer::<u32>(&fill_indirect_draw_params_buffer,
core.device.upload_to_buffer::<u32>(&z_buffer,
0,
&indirect_draw_params,
BufferTarget::Storage);
@ -196,8 +197,7 @@ impl<D> RendererD3D11<D> where D: Device {
storage_buffers: &[
(&bin_program.microlines_storage_buffer, microlines_buffer),
(&bin_program.metadata_storage_buffer, propagate_metadata_buffer),
(&bin_program.indirect_draw_params_storage_buffer,
fill_indirect_draw_params_buffer),
(&bin_program.indirect_draw_params_storage_buffer, z_buffer),
(&bin_program.fills_storage_buffer, fill_vertex_buffer),
(&bin_program.tiles_storage_buffer, tiles_buffer),
(&bin_program.backdrops_storage_buffer, backdrops_buffer),
@ -208,8 +208,7 @@ impl<D> RendererD3D11<D> where D: Device {
core.finish_timing_draw_call(&timer_query);
core.current_timer.as_mut().unwrap().push_query(TimeCategory::Bin, timer_query);
let indirect_draw_params_receiver =
core.device.read_buffer(fill_indirect_draw_params_buffer,
let indirect_draw_params_receiver = core.device.read_buffer(z_buffer,
BufferTarget::Storage,
0..32);
let indirect_draw_params = core.device.recv_buffer(&indirect_draw_params_receiver);
@ -224,7 +223,7 @@ impl<D> RendererD3D11<D> where D: Device {
core.stats.fill_count += needed_fill_count as usize;
Some(FillBufferInfoD3D11 { fill_vertex_buffer_id, fill_indirect_draw_params_buffer_id })
Some(FillBufferInfoD3D11 { fill_vertex_buffer_id })
}
pub(crate) fn upload_scene(&mut self,
@ -355,10 +354,7 @@ impl<D> RendererD3D11<D> where D: Device {
tiles_d3d11_buffer_id: BufferID,
alpha_tiles_buffer_id: BufferID,
propagate_tiles_info: &PropagateTilesInfoD3D11) {
let &FillBufferInfoD3D11 {
fill_vertex_buffer_id,
fill_indirect_draw_params_buffer_id: _,
} = fill_storage_info;
let &FillBufferInfoD3D11 { fill_vertex_buffer_id } = fill_storage_info;
let &PropagateTilesInfoD3D11 { ref alpha_tile_range } = propagate_tiles_info;
let fill_program = &self.programs.fill_program;
@ -486,7 +482,8 @@ impl<D> RendererD3D11<D> where D: Device {
fill_buffer_info = self.bin_segments(core,
&microlines_storage,
&propagate_metadata_buffer_ids,
tiles_d3d11_buffer_id);
tiles_d3d11_buffer_id,
z_buffer_id);
if fill_buffer_info.is_some() {
break;
}
@ -505,7 +502,6 @@ impl<D> RendererD3D11<D> where D: Device {
self.propagate_tiles(core,
batch.prepare_info.backdrops.len() as u32,
tiles_d3d11_buffer_id,
fill_buffer_info.fill_indirect_draw_params_buffer_id,
z_buffer_id,
first_tile_map_buffer_id,
alpha_tiles_buffer_id,
@ -523,7 +519,6 @@ impl<D> RendererD3D11<D> where D: Device {
&propagate_tiles_info);
core.allocator.free_buffer(fill_buffer_info.fill_vertex_buffer_id);
core.allocator.free_buffer(fill_buffer_info.fill_indirect_draw_params_buffer_id);
core.allocator.free_buffer(alpha_tiles_buffer_id);
// FIXME(pcwalton): This seems like the wrong place to do this...
@ -543,7 +538,6 @@ impl<D> RendererD3D11<D> where D: Device {
core: &mut RendererCore<D>,
column_count: u32,
tiles_d3d11_buffer_id: BufferID,
fill_indirect_draw_params_buffer_id: BufferID,
z_buffer_id: BufferID,
first_tile_map_buffer_id: BufferID,
alpha_tiles_buffer_id: BufferID,
@ -572,8 +566,6 @@ impl<D> RendererD3D11<D> where D: Device {
BufferTarget::Storage);
let alpha_tiles_storage_buffer = core.allocator.get_buffer(alpha_tiles_buffer_id);
let fill_indirect_draw_params_buffer =
core.allocator.get_buffer(fill_indirect_draw_params_buffer_id);
let mut storage_buffers = vec![
(&propagate_program.draw_metadata_storage_buffer, propagate_metadata_storage_buffer),
@ -581,8 +573,6 @@ impl<D> RendererD3D11<D> where D: Device {
(&propagate_program.draw_tiles_storage_buffer, tiles_d3d11_buffer),
(&propagate_program.z_buffer_storage_buffer, z_buffer),
(&propagate_program.first_tile_map_storage_buffer, first_tile_map_storage_buffer),
(&propagate_program.indirect_draw_params_storage_buffer,
fill_indirect_draw_params_buffer),
(&propagate_program.alpha_tiles_storage_buffer, alpha_tiles_storage_buffer),
];
@ -633,9 +623,7 @@ impl<D> RendererD3D11<D> where D: Device {
core.current_timer.as_mut().unwrap().push_query(TimeCategory::Other, timer_query);
let fill_indirect_draw_params_receiver =
core.device.read_buffer(&fill_indirect_draw_params_buffer,
BufferTarget::Storage,
0..32);
core.device.read_buffer(&z_buffer, BufferTarget::Storage, 0..32);
let fill_indirect_draw_params = core.device
.recv_buffer(&fill_indirect_draw_params_receiver);
let fill_indirect_draw_params: &[u32] = fill_indirect_draw_params.as_slice_of().unwrap();
@ -703,9 +691,10 @@ impl<D> RendererD3D11<D> where D: Device {
}
fn allocate_z_buffer(&mut self, core: &mut RendererCore<D>) -> BufferID {
core.allocator.allocate_buffer::<i32>(&core.device,
core.tile_size().area() as u64,
BufferTag("ZBufferD3D11"))
// This includes the fill indirect draw params because some drivers limit the number of
// SSBOs to 8 (#373).
let size = core.tile_size().area() as u64 + FILL_INDIRECT_DRAW_PARAMS_SIZE as u64;
core.allocator.allocate_buffer::<i32>( &core.device, size, BufferTag("ZBufferD3D11"))
}
pub(crate) fn draw_tiles(&mut self,
@ -808,7 +797,6 @@ struct TileBatchInfoD3D11 {
#[derive(Clone)]
struct FillBufferInfoD3D11 {
fill_vertex_buffer_id: BufferID,
fill_indirect_draw_params_buffer_id: BufferID,
}
#[derive(Debug)]

View File

@ -55,7 +55,6 @@ pub struct PropagateProgramD3D11<D> where D: Device {
pub clip_tiles_storage_buffer: D::StorageBuffer,
pub z_buffer_storage_buffer: D::StorageBuffer,
pub first_tile_map_storage_buffer: D::StorageBuffer,
pub indirect_draw_params_storage_buffer: D::StorageBuffer,
pub alpha_tiles_storage_buffer: D::StorageBuffer,
}
@ -75,9 +74,7 @@ impl<D> PropagateProgramD3D11<D> where D: Device {
let clip_tiles_storage_buffer = device.get_storage_buffer(&program, "ClipTiles", 4);
let z_buffer_storage_buffer = device.get_storage_buffer(&program, "ZBuffer", 5);
let first_tile_map_storage_buffer = device.get_storage_buffer(&program, "FirstTileMap", 6);
let indirect_draw_params_storage_buffer =
device.get_storage_buffer(&program, "IndirectDrawParams", 7);
let alpha_tiles_storage_buffer = device.get_storage_buffer(&program, "AlphaTiles", 8);
let alpha_tiles_storage_buffer = device.get_storage_buffer(&program, "AlphaTiles", 7);
PropagateProgramD3D11 {
program,
@ -91,7 +88,6 @@ impl<D> PropagateProgramD3D11<D> where D: Device {
clip_tiles_storage_buffer,
z_buffer_storage_buffer,
first_tile_map_storage_buffer,
indirect_draw_params_storage_buffer,
alpha_tiles_storage_buffer,
}
}

View File

@ -29,6 +29,9 @@ layout(local_size_x = 64)in;
uniform ivec2 uFramebufferTileSize;
uniform int uColumnCount;
uniform int uFirstAlphaTileIndex;
@ -76,6 +79,12 @@ layout(std430, binding = 4)buffer bClipTiles {
};
layout(std430, binding = 5)buffer bZBuffer {
restrict int iZBuffer[];
};
@ -83,16 +92,7 @@ layout(std430, binding = 6)buffer bFirstTileMap {
restrict int iFirstTileMap[];
};
layout(std430, binding = 7)buffer bIndirectDrawParams {
restrict uint iIndirectDrawParams[];
};
layout(std430, binding = 8)buffer bAlphaTiles {
layout(std430, binding = 7)buffer bAlphaTiles {
restrict uint iAlphaTiles[];
@ -191,7 +191,8 @@ void main(){
}
if(needNewAlphaTile){
uint drawBatchAlphaTileIndex = atomicAdd(iIndirectDrawParams[4], 1);
uint drawBatchAlphaTileIndex =
atomicAdd(iZBuffer[4], 1);
iAlphaTiles[drawBatchAlphaTileIndex * 2 + 0]= drawTileIndex;
iAlphaTiles[drawBatchAlphaTileIndex * 2 + 1]= clipAlphaTileIndex;
drawAlphaTileIndex = int(drawBatchAlphaTileIndex)+ uFirstAlphaTileIndex;
@ -206,7 +207,7 @@ void main(){
ivec2 tileCoord = ivec2(tileX, tileY)+ ivec2(drawTileRect . xy);
int tileMapIndex = tileCoord . y * uFramebufferTileSize . x + tileCoord . x;
if(zWrite && drawTileBackdrop != 0 && drawAlphaTileIndex < 0)
atomicMax(iZBuffer[tileMapIndex], int(drawTileIndex));
atomicMax(iZBuffer[tileMapIndex + 8], int(drawTileIndex));
if(drawTileBackdrop != 0 || drawAlphaTileIndex >= 0){

View File

@ -25,6 +25,8 @@ precision highp float;
uniform int uTileCount;
layout(std430, binding = 0)buffer bTiles {
@ -62,7 +64,7 @@ void main(){
if(globalTileIndex >= uint(uTileCount))
return;
int zValue = iZBuffer[globalTileIndex];
int zValue = iZBuffer[8 + globalTileIndex];
int unsortedFirstTileIndex = getFirst(globalTileIndex);
int sortedFirstTileIndex = - 1;

View File

@ -33,9 +33,9 @@ struct bClipTiles
uint iClipTiles[1];
};
struct bIndirectDrawParams
struct bZBuffer
{
uint iIndirectDrawParams[1];
int iZBuffer[1];
};
struct bAlphaTiles
@ -43,11 +43,6 @@ struct bAlphaTiles
uint iAlphaTiles[1];
};
struct bZBuffer
{
int iZBuffer[1];
};
struct bFirstTileMap
{
int iFirstTileMap[1];
@ -61,7 +56,7 @@ uint calculateTileIndex(thread const uint& bufferOffset, thread const uint4& til
return (bufferOffset + (tileCoord.y * (tileRect.z - tileRect.x))) + tileCoord.x;
}
kernel void main0(constant int& uColumnCount [[buffer(0)]], constant int& uFirstAlphaTileIndex [[buffer(8)]], constant int2& uFramebufferTileSize [[buffer(9)]], const device bBackdrops& _59 [[buffer(1)]], const device bDrawMetadata& _85 [[buffer(2)]], const device bClipMetadata& _126 [[buffer(3)]], device bDrawTiles& _175 [[buffer(4)]], device bClipTiles& _252 [[buffer(5)]], device bIndirectDrawParams& _302 [[buffer(6)]], device bAlphaTiles& _309 [[buffer(7)]], device bZBuffer& _380 [[buffer(10)]], device bFirstTileMap& _397 [[buffer(11)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
kernel void main0(constant int& uColumnCount [[buffer(0)]], constant int& uFirstAlphaTileIndex [[buffer(8)]], constant int2& uFramebufferTileSize [[buffer(9)]], const device bBackdrops& _59 [[buffer(1)]], const device bDrawMetadata& _85 [[buffer(2)]], const device bClipMetadata& _126 [[buffer(3)]], device bDrawTiles& _175 [[buffer(4)]], device bClipTiles& _252 [[buffer(5)]], device bZBuffer& _302 [[buffer(6)]], device bAlphaTiles& _310 [[buffer(7)]], device bFirstTileMap& _395 [[buffer(10)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint columnIndex = gl_GlobalInvocationID.x;
if (int(columnIndex) >= uColumnCount)
@ -154,10 +149,10 @@ kernel void main0(constant int& uColumnCount [[buffer(0)]], constant int& uFirst
}
if (needNewAlphaTile)
{
uint _305 = atomic_fetch_add_explicit((device atomic_uint*)&_302.iIndirectDrawParams[4], 1u, memory_order_relaxed);
uint drawBatchAlphaTileIndex = _305;
_309.iAlphaTiles[(drawBatchAlphaTileIndex * 2u) + 0u] = drawTileIndex;
_309.iAlphaTiles[(drawBatchAlphaTileIndex * 2u) + 1u] = uint(clipAlphaTileIndex);
int _305 = atomic_fetch_add_explicit((device atomic_int*)&_302.iZBuffer[4], 1, memory_order_relaxed);
uint drawBatchAlphaTileIndex = uint(_305);
_310.iAlphaTiles[(drawBatchAlphaTileIndex * 2u) + 0u] = drawTileIndex;
_310.iAlphaTiles[(drawBatchAlphaTileIndex * 2u) + 1u] = uint(clipAlphaTileIndex);
drawAlphaTileIndex = int(drawBatchAlphaTileIndex) + uFirstAlphaTileIndex;
}
_175.iDrawTiles[(drawTileIndex * 4u) + 2u] = (uint(drawAlphaTileIndex) & 16777215u) | (uint(drawBackdropDelta) << uint(24));
@ -166,12 +161,12 @@ kernel void main0(constant int& uColumnCount [[buffer(0)]], constant int& uFirst
int tileMapIndex = (tileCoord_1.y * uFramebufferTileSize.x) + tileCoord_1.x;
if ((zWrite && (drawTileBackdrop != 0)) && (drawAlphaTileIndex < 0))
{
int _385 = atomic_fetch_max_explicit((device atomic_int*)&_380.iZBuffer[tileMapIndex], int(drawTileIndex), memory_order_relaxed);
int _383 = atomic_fetch_max_explicit((device atomic_int*)&_302.iZBuffer[tileMapIndex + 8], int(drawTileIndex), memory_order_relaxed);
}
if ((drawTileBackdrop != 0) || (drawAlphaTileIndex >= 0))
{
int _402 = atomic_exchange_explicit((device atomic_int*)&_397.iFirstTileMap[tileMapIndex], int(drawTileIndex), memory_order_relaxed);
int nextTileIndex = _402;
int _400 = atomic_exchange_explicit((device atomic_int*)&_395.iFirstTileMap[tileMapIndex], int(drawTileIndex), memory_order_relaxed);
int nextTileIndex = _400;
_175.iDrawTiles[(drawTileIndex * 4u) + 0u] = uint(nextTileIndex);
}
currentBackdrop += drawBackdropDelta;

View File

@ -48,7 +48,7 @@ kernel void main0(constant int& uTileCount [[buffer(2)]], device bFirstTileMap&
{
return;
}
int zValue = _76.iZBuffer[globalTileIndex];
int zValue = _76.iZBuffer[8u + globalTileIndex];
uint param = globalTileIndex;
int unsortedFirstTileIndex = getFirst(param, v_26);
int sortedFirstTileIndex = -1;

View File

@ -27,6 +27,9 @@ layout(local_size_x = 64) in;
#define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2
#define TILE_FIELD_CONTROL 3
#define FILL_INDIRECT_DRAW_PARAMS_ALPHA_TILE_COUNT_INDEX 4
#define FILL_INDIRECT_DRAW_PARAMS_SIZE 8
uniform ivec2 uFramebufferTileSize;
uniform int uColumnCount;
uniform int uFirstAlphaTileIndex;
@ -74,6 +77,12 @@ layout(std430, binding = 4) buffer bClipTiles {
};
layout(std430, binding = 5) buffer bZBuffer {
// [0]: vertexCount (6)
// [1]: instanceCount (of fills)
// [2]: vertexStart (0)
// [3]: baseInstance (0)
// [4]: alpha tile count
// [8..]: z-buffer
restrict int iZBuffer[];
};
@ -81,16 +90,7 @@ layout(std430, binding = 6) buffer bFirstTileMap {
restrict int iFirstTileMap[];
};
layout(std430, binding = 7) buffer bIndirectDrawParams {
// [0]: vertexCount (6)
// [1]: instanceCount (of fills)
// [2]: vertexStart (0)
// [3]: baseInstance (0)
// [4]: alpha tile count
restrict uint iIndirectDrawParams[];
};
layout(std430, binding = 8) buffer bAlphaTiles {
layout(std430, binding = 7) buffer bAlphaTiles {
// [0]: alpha tile index
// [1]: clip tile index
restrict uint iAlphaTiles[];
@ -189,7 +189,8 @@ void main() {
}
if (needNewAlphaTile) {
uint drawBatchAlphaTileIndex = atomicAdd(iIndirectDrawParams[4], 1);
uint drawBatchAlphaTileIndex =
atomicAdd(iZBuffer[FILL_INDIRECT_DRAW_PARAMS_ALPHA_TILE_COUNT_INDEX], 1);
iAlphaTiles[drawBatchAlphaTileIndex * 2 + 0] = drawTileIndex;
iAlphaTiles[drawBatchAlphaTileIndex * 2 + 1] = clipAlphaTileIndex;
drawAlphaTileIndex = int(drawBatchAlphaTileIndex) + uFirstAlphaTileIndex;
@ -204,7 +205,7 @@ void main() {
ivec2 tileCoord = ivec2(tileX, tileY) + ivec2(drawTileRect.xy);
int tileMapIndex = tileCoord.y * uFramebufferTileSize.x + tileCoord.x;
if (zWrite && drawTileBackdrop != 0 && drawAlphaTileIndex < 0)
atomicMax(iZBuffer[tileMapIndex], int(drawTileIndex));
atomicMax(iZBuffer[tileMapIndex + FILL_INDIRECT_DRAW_PARAMS_SIZE], int(drawTileIndex));
// Stitch into the linked list if necessary.
if (drawTileBackdrop != 0 || drawAlphaTileIndex >= 0) {

View File

@ -23,6 +23,8 @@ precision highp sampler2D;
#define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2
#define TILE_FIELD_CONTROL 3
#define FILL_INDIRECT_DRAW_PARAMS_SIZE 8
uniform int uTileCount;
layout(std430, binding = 0) buffer bTiles {
@ -60,7 +62,7 @@ void main() {
if (globalTileIndex >= uint(uTileCount))
return;
int zValue = iZBuffer[globalTileIndex];
int zValue = iZBuffer[FILL_INDIRECT_DRAW_PARAMS_SIZE + globalTileIndex];
int unsortedFirstTileIndex = getFirst(globalTileIndex);
int sortedFirstTileIndex = -1;