Auto merge of #382 - pcwalton:ssbo-cap, r=pcwalton

Combine the Z-buffer and fill indirect draw params buffers to reduce the number of SSBO bindings.

Apparently Mesa RadeonSI drivers have a limit of 8 SSBOs.

Closes #373.
This commit is contained in:
bors-servo 2020-07-02 14:59:46 -04:00 committed by GitHub
commit 9b85b077f7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 66 additions and 81 deletions

View File

@ -29,6 +29,7 @@ use vec_map::VecMap;
const FILL_INDIRECT_DRAW_PARAMS_INSTANCE_COUNT_INDEX: usize = 1; const FILL_INDIRECT_DRAW_PARAMS_INSTANCE_COUNT_INDEX: usize = 1;
const FILL_INDIRECT_DRAW_PARAMS_ALPHA_TILE_COUNT_INDEX: usize = 4; const FILL_INDIRECT_DRAW_PARAMS_ALPHA_TILE_COUNT_INDEX: usize = 4;
const FILL_INDIRECT_DRAW_PARAMS_SIZE: usize = 8;
const BIN_INDIRECT_DRAW_PARAMS_MICROLINE_COUNT_INDEX: usize = 3; const BIN_INDIRECT_DRAW_PARAMS_MICROLINE_COUNT_INDEX: usize = 3;
@ -146,7 +147,8 @@ impl<D> RendererD3D11<D> where D: Device {
core: &mut RendererCore<D>, core: &mut RendererCore<D>,
microlines_storage: &MicrolinesBufferIDsD3D11, microlines_storage: &MicrolinesBufferIDsD3D11,
propagate_metadata_buffer_ids: &PropagateMetadataBufferIDsD3D11, propagate_metadata_buffer_ids: &PropagateMetadataBufferIDsD3D11,
tiles_d3d11_buffer_id: BufferID) tiles_d3d11_buffer_id: BufferID,
z_buffer_id: BufferID)
-> Option<FillBufferInfoD3D11> { -> Option<FillBufferInfoD3D11> {
let bin_program = &self.programs.bin_program; let bin_program = &self.programs.bin_program;
@ -154,10 +156,6 @@ impl<D> RendererD3D11<D> where D: Device {
core.allocator.allocate_buffer::<Fill>(&core.device, core.allocator.allocate_buffer::<Fill>(&core.device,
self.allocated_fill_count as u64, self.allocated_fill_count as u64,
BufferTag("Fill")); BufferTag("Fill"));
let fill_indirect_draw_params_buffer_id =
core.allocator.allocate_buffer::<u32>(&core.device,
8,
BufferTag("FillIndirectDrawParamsD3D11"));
let fill_vertex_buffer = core.allocator.get_buffer(fill_vertex_buffer_id); let fill_vertex_buffer = core.allocator.get_buffer(fill_vertex_buffer_id);
let microlines_buffer = core.allocator.get_buffer(microlines_storage.buffer_id); let microlines_buffer = core.allocator.get_buffer(microlines_storage.buffer_id);
@ -166,10 +164,13 @@ impl<D> RendererD3D11<D> where D: Device {
core.allocator.get_buffer(propagate_metadata_buffer_ids.propagate_metadata); core.allocator.get_buffer(propagate_metadata_buffer_ids.propagate_metadata);
let backdrops_buffer = core.allocator.get_buffer(propagate_metadata_buffer_ids.backdrops); let backdrops_buffer = core.allocator.get_buffer(propagate_metadata_buffer_ids.backdrops);
let fill_indirect_draw_params_buffer = // Upload fill indirect draw params to header of the Z-buffer.
core.allocator.get_buffer(fill_indirect_draw_params_buffer_id); //
// This is in the Z-buffer, not its own buffer, to work around the 8 SSBO limitation on
// some drivers (#373).
let z_buffer = core.allocator.get_buffer(z_buffer_id);
let indirect_draw_params = [6, 0, 0, 0, 0, microlines_storage.count, 0, 0]; let indirect_draw_params = [6, 0, 0, 0, 0, microlines_storage.count, 0, 0];
core.device.upload_to_buffer::<u32>(&fill_indirect_draw_params_buffer, core.device.upload_to_buffer::<u32>(&z_buffer,
0, 0,
&indirect_draw_params, &indirect_draw_params,
BufferTarget::Storage); BufferTarget::Storage);
@ -196,8 +197,7 @@ impl<D> RendererD3D11<D> where D: Device {
storage_buffers: &[ storage_buffers: &[
(&bin_program.microlines_storage_buffer, microlines_buffer), (&bin_program.microlines_storage_buffer, microlines_buffer),
(&bin_program.metadata_storage_buffer, propagate_metadata_buffer), (&bin_program.metadata_storage_buffer, propagate_metadata_buffer),
(&bin_program.indirect_draw_params_storage_buffer, (&bin_program.indirect_draw_params_storage_buffer, z_buffer),
fill_indirect_draw_params_buffer),
(&bin_program.fills_storage_buffer, fill_vertex_buffer), (&bin_program.fills_storage_buffer, fill_vertex_buffer),
(&bin_program.tiles_storage_buffer, tiles_buffer), (&bin_program.tiles_storage_buffer, tiles_buffer),
(&bin_program.backdrops_storage_buffer, backdrops_buffer), (&bin_program.backdrops_storage_buffer, backdrops_buffer),
@ -208,8 +208,7 @@ impl<D> RendererD3D11<D> where D: Device {
core.finish_timing_draw_call(&timer_query); core.finish_timing_draw_call(&timer_query);
core.current_timer.as_mut().unwrap().push_query(TimeCategory::Bin, timer_query); core.current_timer.as_mut().unwrap().push_query(TimeCategory::Bin, timer_query);
let indirect_draw_params_receiver = let indirect_draw_params_receiver = core.device.read_buffer(z_buffer,
core.device.read_buffer(fill_indirect_draw_params_buffer,
BufferTarget::Storage, BufferTarget::Storage,
0..32); 0..32);
let indirect_draw_params = core.device.recv_buffer(&indirect_draw_params_receiver); let indirect_draw_params = core.device.recv_buffer(&indirect_draw_params_receiver);
@ -224,7 +223,7 @@ impl<D> RendererD3D11<D> where D: Device {
core.stats.fill_count += needed_fill_count as usize; core.stats.fill_count += needed_fill_count as usize;
Some(FillBufferInfoD3D11 { fill_vertex_buffer_id, fill_indirect_draw_params_buffer_id }) Some(FillBufferInfoD3D11 { fill_vertex_buffer_id })
} }
pub(crate) fn upload_scene(&mut self, pub(crate) fn upload_scene(&mut self,
@ -355,10 +354,7 @@ impl<D> RendererD3D11<D> where D: Device {
tiles_d3d11_buffer_id: BufferID, tiles_d3d11_buffer_id: BufferID,
alpha_tiles_buffer_id: BufferID, alpha_tiles_buffer_id: BufferID,
propagate_tiles_info: &PropagateTilesInfoD3D11) { propagate_tiles_info: &PropagateTilesInfoD3D11) {
let &FillBufferInfoD3D11 { let &FillBufferInfoD3D11 { fill_vertex_buffer_id } = fill_storage_info;
fill_vertex_buffer_id,
fill_indirect_draw_params_buffer_id: _,
} = fill_storage_info;
let &PropagateTilesInfoD3D11 { ref alpha_tile_range } = propagate_tiles_info; let &PropagateTilesInfoD3D11 { ref alpha_tile_range } = propagate_tiles_info;
let fill_program = &self.programs.fill_program; let fill_program = &self.programs.fill_program;
@ -486,7 +482,8 @@ impl<D> RendererD3D11<D> where D: Device {
fill_buffer_info = self.bin_segments(core, fill_buffer_info = self.bin_segments(core,
&microlines_storage, &microlines_storage,
&propagate_metadata_buffer_ids, &propagate_metadata_buffer_ids,
tiles_d3d11_buffer_id); tiles_d3d11_buffer_id,
z_buffer_id);
if fill_buffer_info.is_some() { if fill_buffer_info.is_some() {
break; break;
} }
@ -505,7 +502,6 @@ impl<D> RendererD3D11<D> where D: Device {
self.propagate_tiles(core, self.propagate_tiles(core,
batch.prepare_info.backdrops.len() as u32, batch.prepare_info.backdrops.len() as u32,
tiles_d3d11_buffer_id, tiles_d3d11_buffer_id,
fill_buffer_info.fill_indirect_draw_params_buffer_id,
z_buffer_id, z_buffer_id,
first_tile_map_buffer_id, first_tile_map_buffer_id,
alpha_tiles_buffer_id, alpha_tiles_buffer_id,
@ -523,7 +519,6 @@ impl<D> RendererD3D11<D> where D: Device {
&propagate_tiles_info); &propagate_tiles_info);
core.allocator.free_buffer(fill_buffer_info.fill_vertex_buffer_id); core.allocator.free_buffer(fill_buffer_info.fill_vertex_buffer_id);
core.allocator.free_buffer(fill_buffer_info.fill_indirect_draw_params_buffer_id);
core.allocator.free_buffer(alpha_tiles_buffer_id); core.allocator.free_buffer(alpha_tiles_buffer_id);
// FIXME(pcwalton): This seems like the wrong place to do this... // FIXME(pcwalton): This seems like the wrong place to do this...
@ -543,7 +538,6 @@ impl<D> RendererD3D11<D> where D: Device {
core: &mut RendererCore<D>, core: &mut RendererCore<D>,
column_count: u32, column_count: u32,
tiles_d3d11_buffer_id: BufferID, tiles_d3d11_buffer_id: BufferID,
fill_indirect_draw_params_buffer_id: BufferID,
z_buffer_id: BufferID, z_buffer_id: BufferID,
first_tile_map_buffer_id: BufferID, first_tile_map_buffer_id: BufferID,
alpha_tiles_buffer_id: BufferID, alpha_tiles_buffer_id: BufferID,
@ -572,8 +566,6 @@ impl<D> RendererD3D11<D> where D: Device {
BufferTarget::Storage); BufferTarget::Storage);
let alpha_tiles_storage_buffer = core.allocator.get_buffer(alpha_tiles_buffer_id); let alpha_tiles_storage_buffer = core.allocator.get_buffer(alpha_tiles_buffer_id);
let fill_indirect_draw_params_buffer =
core.allocator.get_buffer(fill_indirect_draw_params_buffer_id);
let mut storage_buffers = vec![ let mut storage_buffers = vec![
(&propagate_program.draw_metadata_storage_buffer, propagate_metadata_storage_buffer), (&propagate_program.draw_metadata_storage_buffer, propagate_metadata_storage_buffer),
@ -581,8 +573,6 @@ impl<D> RendererD3D11<D> where D: Device {
(&propagate_program.draw_tiles_storage_buffer, tiles_d3d11_buffer), (&propagate_program.draw_tiles_storage_buffer, tiles_d3d11_buffer),
(&propagate_program.z_buffer_storage_buffer, z_buffer), (&propagate_program.z_buffer_storage_buffer, z_buffer),
(&propagate_program.first_tile_map_storage_buffer, first_tile_map_storage_buffer), (&propagate_program.first_tile_map_storage_buffer, first_tile_map_storage_buffer),
(&propagate_program.indirect_draw_params_storage_buffer,
fill_indirect_draw_params_buffer),
(&propagate_program.alpha_tiles_storage_buffer, alpha_tiles_storage_buffer), (&propagate_program.alpha_tiles_storage_buffer, alpha_tiles_storage_buffer),
]; ];
@ -633,9 +623,7 @@ impl<D> RendererD3D11<D> where D: Device {
core.current_timer.as_mut().unwrap().push_query(TimeCategory::Other, timer_query); core.current_timer.as_mut().unwrap().push_query(TimeCategory::Other, timer_query);
let fill_indirect_draw_params_receiver = let fill_indirect_draw_params_receiver =
core.device.read_buffer(&fill_indirect_draw_params_buffer, core.device.read_buffer(&z_buffer, BufferTarget::Storage, 0..32);
BufferTarget::Storage,
0..32);
let fill_indirect_draw_params = core.device let fill_indirect_draw_params = core.device
.recv_buffer(&fill_indirect_draw_params_receiver); .recv_buffer(&fill_indirect_draw_params_receiver);
let fill_indirect_draw_params: &[u32] = fill_indirect_draw_params.as_slice_of().unwrap(); let fill_indirect_draw_params: &[u32] = fill_indirect_draw_params.as_slice_of().unwrap();
@ -703,9 +691,10 @@ impl<D> RendererD3D11<D> where D: Device {
} }
fn allocate_z_buffer(&mut self, core: &mut RendererCore<D>) -> BufferID { fn allocate_z_buffer(&mut self, core: &mut RendererCore<D>) -> BufferID {
core.allocator.allocate_buffer::<i32>(&core.device, // This includes the fill indirect draw params because some drivers limit the number of
core.tile_size().area() as u64, // SSBOs to 8 (#373).
BufferTag("ZBufferD3D11")) let size = core.tile_size().area() as u64 + FILL_INDIRECT_DRAW_PARAMS_SIZE as u64;
core.allocator.allocate_buffer::<i32>( &core.device, size, BufferTag("ZBufferD3D11"))
} }
pub(crate) fn draw_tiles(&mut self, pub(crate) fn draw_tiles(&mut self,
@ -808,7 +797,6 @@ struct TileBatchInfoD3D11 {
#[derive(Clone)] #[derive(Clone)]
struct FillBufferInfoD3D11 { struct FillBufferInfoD3D11 {
fill_vertex_buffer_id: BufferID, fill_vertex_buffer_id: BufferID,
fill_indirect_draw_params_buffer_id: BufferID,
} }
#[derive(Debug)] #[derive(Debug)]

View File

@ -55,7 +55,6 @@ pub struct PropagateProgramD3D11<D> where D: Device {
pub clip_tiles_storage_buffer: D::StorageBuffer, pub clip_tiles_storage_buffer: D::StorageBuffer,
pub z_buffer_storage_buffer: D::StorageBuffer, pub z_buffer_storage_buffer: D::StorageBuffer,
pub first_tile_map_storage_buffer: D::StorageBuffer, pub first_tile_map_storage_buffer: D::StorageBuffer,
pub indirect_draw_params_storage_buffer: D::StorageBuffer,
pub alpha_tiles_storage_buffer: D::StorageBuffer, pub alpha_tiles_storage_buffer: D::StorageBuffer,
} }
@ -75,9 +74,7 @@ impl<D> PropagateProgramD3D11<D> where D: Device {
let clip_tiles_storage_buffer = device.get_storage_buffer(&program, "ClipTiles", 4); let clip_tiles_storage_buffer = device.get_storage_buffer(&program, "ClipTiles", 4);
let z_buffer_storage_buffer = device.get_storage_buffer(&program, "ZBuffer", 5); let z_buffer_storage_buffer = device.get_storage_buffer(&program, "ZBuffer", 5);
let first_tile_map_storage_buffer = device.get_storage_buffer(&program, "FirstTileMap", 6); let first_tile_map_storage_buffer = device.get_storage_buffer(&program, "FirstTileMap", 6);
let indirect_draw_params_storage_buffer = let alpha_tiles_storage_buffer = device.get_storage_buffer(&program, "AlphaTiles", 7);
device.get_storage_buffer(&program, "IndirectDrawParams", 7);
let alpha_tiles_storage_buffer = device.get_storage_buffer(&program, "AlphaTiles", 8);
PropagateProgramD3D11 { PropagateProgramD3D11 {
program, program,
@ -91,7 +88,6 @@ impl<D> PropagateProgramD3D11<D> where D: Device {
clip_tiles_storage_buffer, clip_tiles_storage_buffer,
z_buffer_storage_buffer, z_buffer_storage_buffer,
first_tile_map_storage_buffer, first_tile_map_storage_buffer,
indirect_draw_params_storage_buffer,
alpha_tiles_storage_buffer, alpha_tiles_storage_buffer,
} }
} }

View File

@ -29,6 +29,9 @@ layout(local_size_x = 64)in;
uniform ivec2 uFramebufferTileSize; uniform ivec2 uFramebufferTileSize;
uniform int uColumnCount; uniform int uColumnCount;
uniform int uFirstAlphaTileIndex; uniform int uFirstAlphaTileIndex;
@ -76,6 +79,12 @@ layout(std430, binding = 4)buffer bClipTiles {
}; };
layout(std430, binding = 5)buffer bZBuffer { layout(std430, binding = 5)buffer bZBuffer {
restrict int iZBuffer[]; restrict int iZBuffer[];
}; };
@ -83,16 +92,7 @@ layout(std430, binding = 6)buffer bFirstTileMap {
restrict int iFirstTileMap[]; restrict int iFirstTileMap[];
}; };
layout(std430, binding = 7)buffer bIndirectDrawParams { layout(std430, binding = 7)buffer bAlphaTiles {
restrict uint iIndirectDrawParams[];
};
layout(std430, binding = 8)buffer bAlphaTiles {
restrict uint iAlphaTiles[]; restrict uint iAlphaTiles[];
@ -191,7 +191,8 @@ void main(){
} }
if(needNewAlphaTile){ if(needNewAlphaTile){
uint drawBatchAlphaTileIndex = atomicAdd(iIndirectDrawParams[4], 1); uint drawBatchAlphaTileIndex =
atomicAdd(iZBuffer[4], 1);
iAlphaTiles[drawBatchAlphaTileIndex * 2 + 0]= drawTileIndex; iAlphaTiles[drawBatchAlphaTileIndex * 2 + 0]= drawTileIndex;
iAlphaTiles[drawBatchAlphaTileIndex * 2 + 1]= clipAlphaTileIndex; iAlphaTiles[drawBatchAlphaTileIndex * 2 + 1]= clipAlphaTileIndex;
drawAlphaTileIndex = int(drawBatchAlphaTileIndex)+ uFirstAlphaTileIndex; drawAlphaTileIndex = int(drawBatchAlphaTileIndex)+ uFirstAlphaTileIndex;
@ -206,7 +207,7 @@ void main(){
ivec2 tileCoord = ivec2(tileX, tileY)+ ivec2(drawTileRect . xy); ivec2 tileCoord = ivec2(tileX, tileY)+ ivec2(drawTileRect . xy);
int tileMapIndex = tileCoord . y * uFramebufferTileSize . x + tileCoord . x; int tileMapIndex = tileCoord . y * uFramebufferTileSize . x + tileCoord . x;
if(zWrite && drawTileBackdrop != 0 && drawAlphaTileIndex < 0) if(zWrite && drawTileBackdrop != 0 && drawAlphaTileIndex < 0)
atomicMax(iZBuffer[tileMapIndex], int(drawTileIndex)); atomicMax(iZBuffer[tileMapIndex + 8], int(drawTileIndex));
if(drawTileBackdrop != 0 || drawAlphaTileIndex >= 0){ if(drawTileBackdrop != 0 || drawAlphaTileIndex >= 0){

View File

@ -25,6 +25,8 @@ precision highp float;
uniform int uTileCount; uniform int uTileCount;
layout(std430, binding = 0)buffer bTiles { layout(std430, binding = 0)buffer bTiles {
@ -62,7 +64,7 @@ void main(){
if(globalTileIndex >= uint(uTileCount)) if(globalTileIndex >= uint(uTileCount))
return; return;
int zValue = iZBuffer[globalTileIndex]; int zValue = iZBuffer[8 + globalTileIndex];
int unsortedFirstTileIndex = getFirst(globalTileIndex); int unsortedFirstTileIndex = getFirst(globalTileIndex);
int sortedFirstTileIndex = - 1; int sortedFirstTileIndex = - 1;

View File

@ -33,9 +33,9 @@ struct bClipTiles
uint iClipTiles[1]; uint iClipTiles[1];
}; };
struct bIndirectDrawParams struct bZBuffer
{ {
uint iIndirectDrawParams[1]; int iZBuffer[1];
}; };
struct bAlphaTiles struct bAlphaTiles
@ -43,11 +43,6 @@ struct bAlphaTiles
uint iAlphaTiles[1]; uint iAlphaTiles[1];
}; };
struct bZBuffer
{
int iZBuffer[1];
};
struct bFirstTileMap struct bFirstTileMap
{ {
int iFirstTileMap[1]; int iFirstTileMap[1];
@ -61,7 +56,7 @@ uint calculateTileIndex(thread const uint& bufferOffset, thread const uint4& til
return (bufferOffset + (tileCoord.y * (tileRect.z - tileRect.x))) + tileCoord.x; return (bufferOffset + (tileCoord.y * (tileRect.z - tileRect.x))) + tileCoord.x;
} }
kernel void main0(constant int& uColumnCount [[buffer(0)]], constant int& uFirstAlphaTileIndex [[buffer(8)]], constant int2& uFramebufferTileSize [[buffer(9)]], const device bBackdrops& _59 [[buffer(1)]], const device bDrawMetadata& _85 [[buffer(2)]], const device bClipMetadata& _126 [[buffer(3)]], device bDrawTiles& _175 [[buffer(4)]], device bClipTiles& _252 [[buffer(5)]], device bIndirectDrawParams& _302 [[buffer(6)]], device bAlphaTiles& _309 [[buffer(7)]], device bZBuffer& _380 [[buffer(10)]], device bFirstTileMap& _397 [[buffer(11)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) kernel void main0(constant int& uColumnCount [[buffer(0)]], constant int& uFirstAlphaTileIndex [[buffer(8)]], constant int2& uFramebufferTileSize [[buffer(9)]], const device bBackdrops& _59 [[buffer(1)]], const device bDrawMetadata& _85 [[buffer(2)]], const device bClipMetadata& _126 [[buffer(3)]], device bDrawTiles& _175 [[buffer(4)]], device bClipTiles& _252 [[buffer(5)]], device bZBuffer& _302 [[buffer(6)]], device bAlphaTiles& _310 [[buffer(7)]], device bFirstTileMap& _395 [[buffer(10)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{ {
uint columnIndex = gl_GlobalInvocationID.x; uint columnIndex = gl_GlobalInvocationID.x;
if (int(columnIndex) >= uColumnCount) if (int(columnIndex) >= uColumnCount)
@ -154,10 +149,10 @@ kernel void main0(constant int& uColumnCount [[buffer(0)]], constant int& uFirst
} }
if (needNewAlphaTile) if (needNewAlphaTile)
{ {
uint _305 = atomic_fetch_add_explicit((device atomic_uint*)&_302.iIndirectDrawParams[4], 1u, memory_order_relaxed); int _305 = atomic_fetch_add_explicit((device atomic_int*)&_302.iZBuffer[4], 1, memory_order_relaxed);
uint drawBatchAlphaTileIndex = _305; uint drawBatchAlphaTileIndex = uint(_305);
_309.iAlphaTiles[(drawBatchAlphaTileIndex * 2u) + 0u] = drawTileIndex; _310.iAlphaTiles[(drawBatchAlphaTileIndex * 2u) + 0u] = drawTileIndex;
_309.iAlphaTiles[(drawBatchAlphaTileIndex * 2u) + 1u] = uint(clipAlphaTileIndex); _310.iAlphaTiles[(drawBatchAlphaTileIndex * 2u) + 1u] = uint(clipAlphaTileIndex);
drawAlphaTileIndex = int(drawBatchAlphaTileIndex) + uFirstAlphaTileIndex; drawAlphaTileIndex = int(drawBatchAlphaTileIndex) + uFirstAlphaTileIndex;
} }
_175.iDrawTiles[(drawTileIndex * 4u) + 2u] = (uint(drawAlphaTileIndex) & 16777215u) | (uint(drawBackdropDelta) << uint(24)); _175.iDrawTiles[(drawTileIndex * 4u) + 2u] = (uint(drawAlphaTileIndex) & 16777215u) | (uint(drawBackdropDelta) << uint(24));
@ -166,12 +161,12 @@ kernel void main0(constant int& uColumnCount [[buffer(0)]], constant int& uFirst
int tileMapIndex = (tileCoord_1.y * uFramebufferTileSize.x) + tileCoord_1.x; int tileMapIndex = (tileCoord_1.y * uFramebufferTileSize.x) + tileCoord_1.x;
if ((zWrite && (drawTileBackdrop != 0)) && (drawAlphaTileIndex < 0)) if ((zWrite && (drawTileBackdrop != 0)) && (drawAlphaTileIndex < 0))
{ {
int _385 = atomic_fetch_max_explicit((device atomic_int*)&_380.iZBuffer[tileMapIndex], int(drawTileIndex), memory_order_relaxed); int _383 = atomic_fetch_max_explicit((device atomic_int*)&_302.iZBuffer[tileMapIndex + 8], int(drawTileIndex), memory_order_relaxed);
} }
if ((drawTileBackdrop != 0) || (drawAlphaTileIndex >= 0)) if ((drawTileBackdrop != 0) || (drawAlphaTileIndex >= 0))
{ {
int _402 = atomic_exchange_explicit((device atomic_int*)&_397.iFirstTileMap[tileMapIndex], int(drawTileIndex), memory_order_relaxed); int _400 = atomic_exchange_explicit((device atomic_int*)&_395.iFirstTileMap[tileMapIndex], int(drawTileIndex), memory_order_relaxed);
int nextTileIndex = _402; int nextTileIndex = _400;
_175.iDrawTiles[(drawTileIndex * 4u) + 0u] = uint(nextTileIndex); _175.iDrawTiles[(drawTileIndex * 4u) + 0u] = uint(nextTileIndex);
} }
currentBackdrop += drawBackdropDelta; currentBackdrop += drawBackdropDelta;

View File

@ -48,7 +48,7 @@ kernel void main0(constant int& uTileCount [[buffer(2)]], device bFirstTileMap&
{ {
return; return;
} }
int zValue = _76.iZBuffer[globalTileIndex]; int zValue = _76.iZBuffer[8u + globalTileIndex];
uint param = globalTileIndex; uint param = globalTileIndex;
int unsortedFirstTileIndex = getFirst(param, v_26); int unsortedFirstTileIndex = getFirst(param, v_26);
int sortedFirstTileIndex = -1; int sortedFirstTileIndex = -1;

View File

@ -27,6 +27,9 @@ layout(local_size_x = 64) in;
#define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2 #define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2
#define TILE_FIELD_CONTROL 3 #define TILE_FIELD_CONTROL 3
#define FILL_INDIRECT_DRAW_PARAMS_ALPHA_TILE_COUNT_INDEX 4
#define FILL_INDIRECT_DRAW_PARAMS_SIZE 8
uniform ivec2 uFramebufferTileSize; uniform ivec2 uFramebufferTileSize;
uniform int uColumnCount; uniform int uColumnCount;
uniform int uFirstAlphaTileIndex; uniform int uFirstAlphaTileIndex;
@ -74,6 +77,12 @@ layout(std430, binding = 4) buffer bClipTiles {
}; };
layout(std430, binding = 5) buffer bZBuffer { layout(std430, binding = 5) buffer bZBuffer {
// [0]: vertexCount (6)
// [1]: instanceCount (of fills)
// [2]: vertexStart (0)
// [3]: baseInstance (0)
// [4]: alpha tile count
// [8..]: z-buffer
restrict int iZBuffer[]; restrict int iZBuffer[];
}; };
@ -81,16 +90,7 @@ layout(std430, binding = 6) buffer bFirstTileMap {
restrict int iFirstTileMap[]; restrict int iFirstTileMap[];
}; };
layout(std430, binding = 7) buffer bIndirectDrawParams { layout(std430, binding = 7) buffer bAlphaTiles {
// [0]: vertexCount (6)
// [1]: instanceCount (of fills)
// [2]: vertexStart (0)
// [3]: baseInstance (0)
// [4]: alpha tile count
restrict uint iIndirectDrawParams[];
};
layout(std430, binding = 8) buffer bAlphaTiles {
// [0]: alpha tile index // [0]: alpha tile index
// [1]: clip tile index // [1]: clip tile index
restrict uint iAlphaTiles[]; restrict uint iAlphaTiles[];
@ -189,7 +189,8 @@ void main() {
} }
if (needNewAlphaTile) { if (needNewAlphaTile) {
uint drawBatchAlphaTileIndex = atomicAdd(iIndirectDrawParams[4], 1); uint drawBatchAlphaTileIndex =
atomicAdd(iZBuffer[FILL_INDIRECT_DRAW_PARAMS_ALPHA_TILE_COUNT_INDEX], 1);
iAlphaTiles[drawBatchAlphaTileIndex * 2 + 0] = drawTileIndex; iAlphaTiles[drawBatchAlphaTileIndex * 2 + 0] = drawTileIndex;
iAlphaTiles[drawBatchAlphaTileIndex * 2 + 1] = clipAlphaTileIndex; iAlphaTiles[drawBatchAlphaTileIndex * 2 + 1] = clipAlphaTileIndex;
drawAlphaTileIndex = int(drawBatchAlphaTileIndex) + uFirstAlphaTileIndex; drawAlphaTileIndex = int(drawBatchAlphaTileIndex) + uFirstAlphaTileIndex;
@ -204,7 +205,7 @@ void main() {
ivec2 tileCoord = ivec2(tileX, tileY) + ivec2(drawTileRect.xy); ivec2 tileCoord = ivec2(tileX, tileY) + ivec2(drawTileRect.xy);
int tileMapIndex = tileCoord.y * uFramebufferTileSize.x + tileCoord.x; int tileMapIndex = tileCoord.y * uFramebufferTileSize.x + tileCoord.x;
if (zWrite && drawTileBackdrop != 0 && drawAlphaTileIndex < 0) if (zWrite && drawTileBackdrop != 0 && drawAlphaTileIndex < 0)
atomicMax(iZBuffer[tileMapIndex], int(drawTileIndex)); atomicMax(iZBuffer[tileMapIndex + FILL_INDIRECT_DRAW_PARAMS_SIZE], int(drawTileIndex));
// Stitch into the linked list if necessary. // Stitch into the linked list if necessary.
if (drawTileBackdrop != 0 || drawAlphaTileIndex >= 0) { if (drawTileBackdrop != 0 || drawAlphaTileIndex >= 0) {

View File

@ -23,6 +23,8 @@ precision highp sampler2D;
#define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2 #define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2
#define TILE_FIELD_CONTROL 3 #define TILE_FIELD_CONTROL 3
#define FILL_INDIRECT_DRAW_PARAMS_SIZE 8
uniform int uTileCount; uniform int uTileCount;
layout(std430, binding = 0) buffer bTiles { layout(std430, binding = 0) buffer bTiles {
@ -60,7 +62,7 @@ void main() {
if (globalTileIndex >= uint(uTileCount)) if (globalTileIndex >= uint(uTileCount))
return; return;
int zValue = iZBuffer[globalTileIndex]; int zValue = iZBuffer[FILL_INDIRECT_DRAW_PARAMS_SIZE + globalTileIndex];
int unsortedFirstTileIndex = getFirst(globalTileIndex); int unsortedFirstTileIndex = getFirst(globalTileIndex);
int sortedFirstTileIndex = -1; int sortedFirstTileIndex = -1;