diff --git a/renderer/src/gpu/d3d11/mod.rs b/renderer/src/gpu/d3d11/mod.rs new file mode 100644 index 00000000..eebeb578 --- /dev/null +++ b/renderer/src/gpu/d3d11/mod.rs @@ -0,0 +1,12 @@ +// pathfinder/renderer/src/gpu/d3d11/mod.rs +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +pub mod renderer; +pub mod shaders; diff --git a/renderer/src/gpu/d3d11/renderer.rs b/renderer/src/gpu/d3d11/renderer.rs new file mode 100644 index 00000000..3668183a --- /dev/null +++ b/renderer/src/gpu/d3d11/renderer.rs @@ -0,0 +1,897 @@ +// pathfinder/renderer/src/gpu/d3d11/renderer.rs +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::gpu::d3d11::shaders::{BOUND_WORKGROUP_SIZE, DICE_WORKGROUP_SIZE}; +use crate::gpu::d3d11::shaders::{PROPAGATE_WORKGROUP_SIZE, ProgramsD3D11, SORT_WORKGROUP_SIZE}; +use crate::gpu::perf::TimerFuture; +use crate::gpu::renderer::{FramebufferFlags, RendererCore}; +use crate::gpu_data::{AlphaTileD3D11, BackdropInfoD3D11, DiceMetadataD3D11, DrawTileBatchD3D11}; +use crate::gpu_data::{Fill, FirstTileD3D11, MicrolineD3D11, PathSource, PropagateMetadataD3D11}; +use crate::gpu_data::{SegmentIndicesD3D11, SegmentsD3D11, TileD3D11, TileBatchDataD3D11}; +use crate::gpu_data::{TileBatchTexture, TilePathInfoD3D11}; +use byte_slice_cast::AsSliceOf; +use pathfinder_geometry::transform2d::Transform2F; +use pathfinder_geometry::vector::Vector2F; +use pathfinder_gpu::allocator::{BufferID, BufferTag, GPUMemoryAllocator}; +use pathfinder_gpu::{BufferTarget, ComputeDimensions, ComputeState, Device, ImageAccess}; +use pathfinder_gpu::{RenderTarget, UniformData}; +use pathfinder_resources::ResourceLoader; +use pathfinder_simd::default::{F32x4, I32x2}; +use std::ops::Range; +use vec_map::VecMap; + +const FILL_INDIRECT_DRAW_PARAMS_INSTANCE_COUNT_INDEX: usize = 1; +const FILL_INDIRECT_DRAW_PARAMS_ALPHA_TILE_COUNT_INDEX: usize = 4; + +const BIN_INDIRECT_DRAW_PARAMS_MICROLINE_COUNT_INDEX: usize = 3; + +const LOAD_ACTION_CLEAR: i32 = 0; +const LOAD_ACTION_LOAD: i32 = 1; + +const INITIAL_ALLOCATED_MICROLINE_COUNT: u32 = 1024 * 16; +const INITIAL_ALLOCATED_FILL_COUNT: u32 = 1024 * 16; + +pub(crate) struct RendererD3D11 where D: Device { + programs: ProgramsD3D11, + allocated_microline_count: u32, + allocated_fill_count: u32, + scene_buffers: SceneBuffers, + tile_batch_info: VecMap, +} + +impl RendererD3D11 where D: Device { + pub(crate) fn new(core: &mut RendererCore, resources: &dyn ResourceLoader) + -> RendererD3D11 { + let programs = ProgramsD3D11::new(&core.device, resources); + RendererD3D11 { + programs, + allocated_fill_count: INITIAL_ALLOCATED_FILL_COUNT, + allocated_microline_count: INITIAL_ALLOCATED_MICROLINE_COUNT, + scene_buffers: SceneBuffers::new(), + tile_batch_info: VecMap::::new(), + } + } + + fn bound(&mut self, + core: &mut RendererCore, + tiles_d3d11_buffer_id: BufferID, + tile_count: u32, + tile_path_info: &[TilePathInfoD3D11]) { + let bound_program = &self.programs.bound_program; + + let path_info_buffer_id = + core.allocator.allocate_buffer::(&core.device, + tile_path_info.len() as u64, + BufferTag("TilePathInfoD3D11")); + let tile_path_info_buffer = core.allocator.get_buffer(path_info_buffer_id); + core.device.upload_to_buffer(tile_path_info_buffer, + 0, + tile_path_info, + BufferTarget::Storage); + + let tiles_buffer = core.allocator.get_buffer(tiles_d3d11_buffer_id); + + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + let compute_dimensions = ComputeDimensions { + x: (tile_count + BOUND_WORKGROUP_SIZE - 1) / BOUND_WORKGROUP_SIZE, + y: 1, + z: 1, + }; + core.device.dispatch_compute(compute_dimensions, &ComputeState { + program: &bound_program.program, + textures: &[], + uniforms: &[ + (&bound_program.path_count_uniform, UniformData::Int(tile_path_info.len() as i32)), + (&bound_program.tile_count_uniform, UniformData::Int(tile_count as i32)), + ], + images: &[], + storage_buffers: &[ + (&bound_program.tile_path_info_storage_buffer, tile_path_info_buffer), + (&bound_program.tiles_storage_buffer, tiles_buffer), + ], + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().other_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 1; + + core.allocator.free_buffer(path_info_buffer_id); + } + + fn upload_propagate_metadata(&mut self, + core: &mut RendererCore, + propagate_metadata: &[PropagateMetadataD3D11], + backdrops: &[BackdropInfoD3D11]) + -> PropagateMetadataBufferIDsD3D11 { + let propagate_metadata_storage_id = + core.allocator + .allocate_buffer::(&core.device, + propagate_metadata.len() as u64, + BufferTag("PropagateMetadataD3D11")); + let propagate_metadata_buffer = core.allocator.get_buffer(propagate_metadata_storage_id); + core.device.upload_to_buffer(propagate_metadata_buffer, + 0, + propagate_metadata, + BufferTarget::Storage); + + let backdrops_storage_id = + core.allocator.allocate_buffer::(&core.device, + backdrops.len() as u64, + BufferTag("BackdropInfoD3D11")); + + PropagateMetadataBufferIDsD3D11 { + propagate_metadata: propagate_metadata_storage_id, + backdrops: backdrops_storage_id, + } + } + + fn upload_initial_backdrops(&self, + core: &RendererCore, + backdrops_buffer_id: BufferID, + backdrops: &[BackdropInfoD3D11]) { + let backdrops_buffer = core.allocator.get_buffer(backdrops_buffer_id); + core.device.upload_to_buffer(backdrops_buffer, 0, backdrops, BufferTarget::Storage); + } + + fn bin_segments(&mut self, + core: &mut RendererCore, + microlines_storage: &MicrolinesBufferIDsD3D11, + propagate_metadata_buffer_ids: &PropagateMetadataBufferIDsD3D11, + tiles_d3d11_buffer_id: BufferID) + -> Option { + let bin_program = &self.programs.bin_program; + + let fill_vertex_buffer_id = + core.allocator.allocate_buffer::(&core.device, + self.allocated_fill_count as u64, + BufferTag("Fill")); + let fill_indirect_draw_params_buffer_id = + core.allocator.allocate_buffer::(&core.device, + 8, + BufferTag("FillIndirectDrawParamsD3D11")); + + let fill_vertex_buffer = core.allocator.get_buffer(fill_vertex_buffer_id); + let microlines_buffer = core.allocator.get_buffer(microlines_storage.buffer_id); + let tiles_buffer = core.allocator.get_buffer(tiles_d3d11_buffer_id); + let propagate_metadata_buffer = + core.allocator.get_buffer(propagate_metadata_buffer_ids.propagate_metadata); + let backdrops_buffer = core.allocator.get_buffer(propagate_metadata_buffer_ids.backdrops); + + let fill_indirect_draw_params_buffer = + core.allocator.get_buffer(fill_indirect_draw_params_buffer_id); + let indirect_draw_params = [6, 0, 0, 0, 0, microlines_storage.count, 0, 0]; + core.device.upload_to_buffer::(&fill_indirect_draw_params_buffer, + 0, + &indirect_draw_params, + BufferTarget::Storage); + + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + let compute_dimensions = ComputeDimensions { + x: (microlines_storage.count + 63) / 64, + y: 1, + z: 1, + }; + + core.device.dispatch_compute(compute_dimensions, &ComputeState { + program: &bin_program.program, + textures: &[], + uniforms: &[ + (&bin_program.microline_count_uniform, + UniformData::Int(microlines_storage.count as i32)), + (&bin_program.max_fill_count_uniform, + UniformData::Int(self.allocated_fill_count as i32)), + ], + images: &[], + storage_buffers: &[ + (&bin_program.microlines_storage_buffer, microlines_buffer), + (&bin_program.metadata_storage_buffer, propagate_metadata_buffer), + (&bin_program.indirect_draw_params_storage_buffer, + fill_indirect_draw_params_buffer), + (&bin_program.fills_storage_buffer, fill_vertex_buffer), + (&bin_program.tiles_storage_buffer, tiles_buffer), + (&bin_program.backdrops_storage_buffer, backdrops_buffer), + ], + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().bin_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 1; + + let indirect_draw_params_receiver = + core.device.read_buffer(fill_indirect_draw_params_buffer, + BufferTarget::Storage, + 0..32); + let indirect_draw_params = core.device.recv_buffer(&indirect_draw_params_receiver); + let indirect_draw_params: &[u32] = indirect_draw_params.as_slice_of().unwrap(); + + let needed_fill_count = + indirect_draw_params[FILL_INDIRECT_DRAW_PARAMS_INSTANCE_COUNT_INDEX]; + if needed_fill_count > self.allocated_fill_count { + self.allocated_fill_count = needed_fill_count.next_power_of_two(); + return None; + } + + core.stats.fill_count += needed_fill_count as usize; + + Some(FillBufferInfoD3D11 { fill_vertex_buffer_id, fill_indirect_draw_params_buffer_id }) + } + + pub(crate) fn upload_scene(&mut self, + core: &mut RendererCore, + draw_segments: &SegmentsD3D11, + clip_segments: &SegmentsD3D11) { + self.scene_buffers.upload(&mut core.allocator, &core.device, draw_segments, clip_segments); + } + + fn allocate_tiles(&mut self, core: &mut RendererCore, tile_count: u32) -> BufferID { + core.allocator.allocate_buffer::(&core.device, + tile_count as u64, + BufferTag("TileD3D11")) + } + + fn dice_segments(&mut self, + core: &mut RendererCore, + dice_metadata: &[DiceMetadataD3D11], + batch_segment_count: u32, + path_source: PathSource, + transform: Transform2F) + -> Option { + let dice_program = &self.programs.dice_program; + + let microlines_buffer_id = + core.allocator.allocate_buffer::(&core.device, + self.allocated_microline_count as u64, + BufferTag("MicrolineD3D11")); + let dice_metadata_buffer_id = + core.allocator.allocate_buffer::(&core.device, + dice_metadata.len() as u64, + BufferTag("DiceMetadataD3D11")); + let dice_indirect_draw_params_buffer_id = + core.allocator.allocate_buffer::(&core.device, + 8, + BufferTag("DiceIndirectDrawParamsD3D11")); + + let microlines_buffer = core.allocator.get_buffer(microlines_buffer_id); + let dice_metadata_storage_buffer = core.allocator.get_buffer(dice_metadata_buffer_id); + let dice_indirect_draw_params_buffer = + core.allocator.get_buffer(dice_indirect_draw_params_buffer_id); + + let scene_buffers = &self.scene_buffers; + let scene_source_buffers = match path_source { + PathSource::Draw => &scene_buffers.draw, + PathSource::Clip => &scene_buffers.clip, + }; + let SceneSourceBuffers { + points_buffer: points_buffer_id, + point_indices_buffer: point_indices_buffer_id, + point_indices_count, + .. + } = *scene_source_buffers; + + let points_buffer = + core.allocator.get_buffer(points_buffer_id.expect("Where's the points buffer?")); + let point_indices_buffer = + core.allocator + .get_buffer(point_indices_buffer_id.expect("Where's the point indices buffer?")); + + core.device.upload_to_buffer(dice_indirect_draw_params_buffer, + 0, + &[0, 0, 0, 0, point_indices_count, 0, 0, 0], + BufferTarget::Storage); + core.device.upload_to_buffer(dice_metadata_storage_buffer, + 0, + dice_metadata, + BufferTarget::Storage); + + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + let workgroup_count = (batch_segment_count + DICE_WORKGROUP_SIZE - 1) / + DICE_WORKGROUP_SIZE; + let compute_dimensions = ComputeDimensions { x: workgroup_count, y: 1, z: 1 }; + + core.device.dispatch_compute(compute_dimensions, &ComputeState { + program: &dice_program.program, + textures: &[], + uniforms: &[ + (&dice_program.transform_uniform, UniformData::Mat2(transform.matrix.0)), + (&dice_program.translation_uniform, UniformData::Vec2(transform.vector.0)), + (&dice_program.path_count_uniform, + UniformData::Int(dice_metadata.len() as i32)), + (&dice_program.last_batch_segment_index_uniform, + UniformData::Int(batch_segment_count as i32)), + (&dice_program.max_microline_count_uniform, + UniformData::Int(self.allocated_microline_count as i32)), + ], + images: &[], + storage_buffers: &[ + (&dice_program.compute_indirect_params_storage_buffer, + dice_indirect_draw_params_buffer), + (&dice_program.points_storage_buffer, points_buffer), + (&dice_program.input_indices_storage_buffer, point_indices_buffer), + (&dice_program.microlines_storage_buffer, microlines_buffer), + (&dice_program.dice_metadata_storage_buffer, &dice_metadata_storage_buffer), + ], + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().dice_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 1; + + let indirect_compute_params_receiver = + core.device.read_buffer(&dice_indirect_draw_params_buffer, + BufferTarget::Storage, + 0..32); + let indirect_compute_params = core.device.recv_buffer(&indirect_compute_params_receiver); + let indirect_compute_params: &[u32] = indirect_compute_params.as_slice_of().unwrap(); + + core.allocator.free_buffer(dice_metadata_buffer_id); + core.allocator.free_buffer(dice_indirect_draw_params_buffer_id); + + let microline_count = + indirect_compute_params[BIN_INDIRECT_DRAW_PARAMS_MICROLINE_COUNT_INDEX]; + if microline_count > self.allocated_microline_count { + self.allocated_microline_count = microline_count.next_power_of_two(); + return None; + } + + Some(MicrolinesBufferIDsD3D11 { buffer_id: microlines_buffer_id, count: microline_count }) + } + + fn draw_fills(&mut self, + core: &mut RendererCore, + fill_storage_info: &FillBufferInfoD3D11, + tiles_d3d11_buffer_id: BufferID, + alpha_tiles_buffer_id: BufferID, + propagate_tiles_info: &PropagateTilesInfoD3D11) { + let &FillBufferInfoD3D11 { + fill_vertex_buffer_id, + fill_indirect_draw_params_buffer_id: _, + } = fill_storage_info; + let &PropagateTilesInfoD3D11 { ref alpha_tile_range } = propagate_tiles_info; + + let fill_program = &self.programs.fill_program; + let fill_vertex_buffer = core.allocator.get_buffer(fill_vertex_buffer_id); + + let mask_storage = core.mask_storage.as_ref().expect("Where's the mask storage?"); + let mask_framebuffer_id = mask_storage.framebuffer_id; + let mask_framebuffer = core.allocator.get_framebuffer(mask_framebuffer_id); + let image_texture = core.device.framebuffer_texture(mask_framebuffer); + + let tiles_d3d11_buffer = core.allocator.get_buffer(tiles_d3d11_buffer_id); + let alpha_tiles_buffer = core.allocator.get_buffer(alpha_tiles_buffer_id); + + let area_lut_texture = core.allocator.get_texture(core.area_lut_texture_id); + + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + // This setup is an annoying workaround for the 64K limit of compute invocation in OpenGL. + let alpha_tile_count = alpha_tile_range.end - alpha_tile_range.start; + let dimensions = ComputeDimensions { + x: alpha_tile_count.min(1 << 15) as u32, + y: ((alpha_tile_count + (1 << 15) - 1) >> 15) as u32, + z: 1, + }; + + core.device.dispatch_compute(dimensions, &ComputeState { + program: &fill_program.program, + textures: &[(&fill_program.area_lut_texture, area_lut_texture)], + images: &[(&fill_program.dest_image, image_texture, ImageAccess::ReadWrite)], + uniforms: &[ + (&fill_program.alpha_tile_range_uniform, + UniformData::IVec2(I32x2::new(alpha_tile_range.start as i32, + alpha_tile_range.end as i32))), + ], + storage_buffers: &[ + (&fill_program.fills_storage_buffer, fill_vertex_buffer), + (&fill_program.tiles_storage_buffer, tiles_d3d11_buffer), + (&fill_program.alpha_tiles_storage_buffer, &alpha_tiles_buffer), + ], + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().fill_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 1; + + core.framebuffer_flags.insert(FramebufferFlags::MASK_FRAMEBUFFER_IS_DIRTY); + } + + pub(crate) fn prepare_and_draw_tiles(&mut self, + core: &mut RendererCore, + batch: &DrawTileBatchD3D11) { + let tile_batch_id = batch.tile_batch_data.batch_id; + self.prepare_tiles(core, &batch.tile_batch_data); + let batch_info = self.tile_batch_info[tile_batch_id.0 as usize].clone(); + self.draw_tiles(core, + batch_info.tiles_d3d11_buffer_id, + batch_info.first_tile_map_buffer_id, + batch.color_texture); + } + + // Computes backdrops, performs clipping, and populates Z buffers on GPU. + pub(crate) fn prepare_tiles(&mut self, + core: &mut RendererCore, + batch: &TileBatchDataD3D11) { + core.stats.total_tile_count += batch.tile_count as usize; + + // Upload tiles to GPU or allocate them as appropriate. + let tiles_d3d11_buffer_id = self.allocate_tiles(core, batch.tile_count); + + // Fetch and/or allocate clip storage as needed. + let clip_buffer_ids = match batch.clipped_path_info { + Some(ref clipped_path_info) => { + let clip_batch_id = clipped_path_info.clip_batch_id; + let clip_tile_batch_info = &self.tile_batch_info[clip_batch_id.0 as usize]; + let metadata = clip_tile_batch_info.propagate_metadata_buffer_id; + let tiles = clip_tile_batch_info.tiles_d3d11_buffer_id; + Some(ClipBufferIDs { metadata: Some(metadata), tiles }) + } + None => None, + }; + + // Allocate a Z-buffer. + let z_buffer_id = self.allocate_z_buffer(core); + + // Propagate backdrops, bin fills, render fills, and/or perform clipping on GPU if + // necessary. + // Allocate space for tile lists. + let first_tile_map_buffer_id = self.allocate_first_tile_map(core); + + let propagate_metadata_buffer_ids = + self.upload_propagate_metadata(core, + &batch.prepare_info.propagate_metadata, + &batch.prepare_info.backdrops); + + // Dice (flatten) segments into microlines. We might have to do this twice if our + // first attempt runs out of space in the storage buffer. + let mut microlines_storage = None; + for _ in 0..2 { + microlines_storage = self.dice_segments(core, + &batch.prepare_info.dice_metadata, + batch.segment_count, + batch.path_source, + batch.prepare_info.transform); + if microlines_storage.is_some() { + break; + } + } + let microlines_storage = + microlines_storage.expect("Ran out of space for microlines when dicing!"); + + // Initialize tiles, and bin segments. We might have to do this twice if our first + // attempt runs out of space in the fill buffer. + let mut fill_buffer_info = None; + for _ in 0..2 { + self.bound(core, + tiles_d3d11_buffer_id, + batch.tile_count, + &batch.prepare_info.tile_path_info); + + self.upload_initial_backdrops(core, + propagate_metadata_buffer_ids.backdrops, + &batch.prepare_info.backdrops); + + fill_buffer_info = self.bin_segments(core, + µlines_storage, + &propagate_metadata_buffer_ids, + tiles_d3d11_buffer_id); + if fill_buffer_info.is_some() { + break; + } + } + let fill_buffer_info = + fill_buffer_info.expect("Ran out of space for fills when binning!"); + + core.allocator.free_buffer(microlines_storage.buffer_id); + + // TODO(pcwalton): If we run out of space for alpha tile indices, propagate + // multiple times. + + let alpha_tiles_buffer_id = self.allocate_alpha_tile_info(core, batch.tile_count); + + let propagate_tiles_info = + self.propagate_tiles(core, + batch.prepare_info.backdrops.len() as u32, + tiles_d3d11_buffer_id, + fill_buffer_info.fill_indirect_draw_params_buffer_id, + z_buffer_id, + first_tile_map_buffer_id, + alpha_tiles_buffer_id, + &propagate_metadata_buffer_ids, + clip_buffer_ids.as_ref()); + + core.allocator.free_buffer(propagate_metadata_buffer_ids.backdrops); + + // FIXME(pcwalton): Don't unconditionally pass true for copying here. + core.reallocate_alpha_tile_pages_if_necessary(true); + self.draw_fills(core, + &fill_buffer_info, + tiles_d3d11_buffer_id, + alpha_tiles_buffer_id, + &propagate_tiles_info); + + core.allocator.free_buffer(fill_buffer_info.fill_vertex_buffer_id); + core.allocator.free_buffer(fill_buffer_info.fill_indirect_draw_params_buffer_id); + core.allocator.free_buffer(alpha_tiles_buffer_id); + + // FIXME(pcwalton): This seems like the wrong place to do this... + self.sort_tiles(core, tiles_d3d11_buffer_id, first_tile_map_buffer_id, z_buffer_id); + + // Record tile batch info. + self.tile_batch_info.insert(batch.batch_id.0 as usize, TileBatchInfoD3D11 { + tile_count: batch.tile_count, + z_buffer_id, + tiles_d3d11_buffer_id, + propagate_metadata_buffer_id: propagate_metadata_buffer_ids.propagate_metadata, + first_tile_map_buffer_id, + }); + } + + fn propagate_tiles(&mut self, + core: &mut RendererCore, + column_count: u32, + tiles_d3d11_buffer_id: BufferID, + fill_indirect_draw_params_buffer_id: BufferID, + z_buffer_id: BufferID, + first_tile_map_buffer_id: BufferID, + alpha_tiles_buffer_id: BufferID, + propagate_metadata_buffer_ids: &PropagateMetadataBufferIDsD3D11, + clip_buffer_ids: Option<&ClipBufferIDs>) + -> PropagateTilesInfoD3D11 { + let propagate_program = &self.programs.propagate_program; + + let tiles_d3d11_buffer = core.allocator.get_buffer(tiles_d3d11_buffer_id); + let propagate_metadata_storage_buffer = + core.allocator.get_buffer(propagate_metadata_buffer_ids.propagate_metadata); + let backdrops_storage_buffer = + core.allocator.get_buffer(propagate_metadata_buffer_ids.backdrops); + + // TODO(pcwalton): Zero out the Z-buffer on GPU? + let z_buffer = core.allocator.get_buffer(z_buffer_id); + let z_buffer_size = core.tile_size(); + let tile_area = z_buffer_size.area() as usize; + core.device.upload_to_buffer(z_buffer, 0, &vec![0i32; tile_area], BufferTarget::Storage); + + // TODO(pcwalton): Initialize the first tiles buffer on GPU? + let first_tile_map_storage_buffer = core.allocator.get_buffer(first_tile_map_buffer_id); + core.device.upload_to_buffer::(&first_tile_map_storage_buffer, + 0, + &vec![FirstTileD3D11::default(); tile_area], + BufferTarget::Storage); + + let alpha_tiles_storage_buffer = core.allocator.get_buffer(alpha_tiles_buffer_id); + let fill_indirect_draw_params_buffer = + core.allocator.get_buffer(fill_indirect_draw_params_buffer_id); + + let mut storage_buffers = vec![ + (&propagate_program.draw_metadata_storage_buffer, propagate_metadata_storage_buffer), + (&propagate_program.backdrops_storage_buffer, &backdrops_storage_buffer), + (&propagate_program.draw_tiles_storage_buffer, tiles_d3d11_buffer), + (&propagate_program.z_buffer_storage_buffer, z_buffer), + (&propagate_program.first_tile_map_storage_buffer, first_tile_map_storage_buffer), + (&propagate_program.indirect_draw_params_storage_buffer, + fill_indirect_draw_params_buffer), + (&propagate_program.alpha_tiles_storage_buffer, alpha_tiles_storage_buffer), + ]; + + if let Some(clip_buffer_ids) = clip_buffer_ids { + let clip_metadata_buffer_id = + clip_buffer_ids.metadata.expect("Where's the clip metadata storage?"); + let clip_metadata_buffer = core.allocator.get_buffer(clip_metadata_buffer_id); + let clip_tile_buffer = core.allocator.get_buffer(clip_buffer_ids.tiles); + storage_buffers.push((&propagate_program.clip_metadata_storage_buffer, + clip_metadata_buffer)); + storage_buffers.push((&propagate_program.clip_tiles_storage_buffer, clip_tile_buffer)); + } + + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + let dimensions = ComputeDimensions { + x: (column_count + PROPAGATE_WORKGROUP_SIZE - 1) / PROPAGATE_WORKGROUP_SIZE, + y: 1, + z: 1, + }; + core.device.dispatch_compute(dimensions, &ComputeState { + program: &propagate_program.program, + textures: &[], + images: &[], + uniforms: &[ + (&propagate_program.framebuffer_tile_size_uniform, + UniformData::IVec2(core.framebuffer_tile_size().0)), + (&propagate_program.column_count_uniform, UniformData::Int(column_count as i32)), + (&propagate_program.first_alpha_tile_index_uniform, + UniformData::Int(core.alpha_tile_count as i32)), + ], + storage_buffers: &storage_buffers, + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().other_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 1; + + let fill_indirect_draw_params_receiver = + core.device.read_buffer(&fill_indirect_draw_params_buffer, + BufferTarget::Storage, + 0..32); + let fill_indirect_draw_params = core.device + .recv_buffer(&fill_indirect_draw_params_receiver); + let fill_indirect_draw_params: &[u32] = fill_indirect_draw_params.as_slice_of().unwrap(); + + let batch_alpha_tile_count = + fill_indirect_draw_params[FILL_INDIRECT_DRAW_PARAMS_ALPHA_TILE_COUNT_INDEX]; + + let alpha_tile_start = core.alpha_tile_count; + core.alpha_tile_count += batch_alpha_tile_count; + core.stats.alpha_tile_count += batch_alpha_tile_count as usize; + let alpha_tile_end = core.alpha_tile_count; + + PropagateTilesInfoD3D11 { alpha_tile_range: alpha_tile_start..alpha_tile_end } + } + + fn sort_tiles(&mut self, + core: &mut RendererCore, + tiles_d3d11_buffer_id: BufferID, + first_tile_map_buffer_id: BufferID, + z_buffer_id: BufferID) { + let sort_program = &self.programs.sort_program; + + let tiles_d3d11_buffer = core.allocator.get_buffer(tiles_d3d11_buffer_id); + let first_tile_map_buffer = core.allocator.get_buffer(first_tile_map_buffer_id); + let z_buffer = core.allocator.get_buffer(z_buffer_id); + + let tile_count = core.framebuffer_tile_size().area(); + + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + let dimensions = ComputeDimensions { + x: (tile_count as u32 + SORT_WORKGROUP_SIZE - 1) / SORT_WORKGROUP_SIZE, + y: 1, + z: 1, + }; + core.device.dispatch_compute(dimensions, &ComputeState { + program: &sort_program.program, + textures: &[], + images: &[], + uniforms: &[(&sort_program.tile_count_uniform, UniformData::Int(tile_count))], + storage_buffers: &[ + (&sort_program.tiles_storage_buffer, tiles_d3d11_buffer), + (&sort_program.first_tile_map_storage_buffer, first_tile_map_buffer), + (&sort_program.z_buffer_storage_buffer, z_buffer), + ], + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().other_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 1; + } + + fn allocate_first_tile_map(&mut self, core: &mut RendererCore) -> BufferID { + core.allocator.allocate_buffer::(&core.device, + core.tile_size().area() as u64, + BufferTag("FirstTileD3D11")) + } + + fn allocate_alpha_tile_info(&mut self, core: &mut RendererCore, index_count: u32) + -> BufferID { + core.allocator.allocate_buffer::(&core.device, + index_count as u64, + BufferTag("AlphaTileD3D11")) + } + + fn allocate_z_buffer(&mut self, core: &mut RendererCore) -> BufferID { + core.allocator.allocate_buffer::(&core.device, + core.tile_size().area() as u64, + BufferTag("ZBufferD3D11")) + } + + pub(crate) fn draw_tiles(&mut self, + core: &mut RendererCore, + tiles_d3d11_buffer_id: BufferID, + first_tile_map_buffer_id: BufferID, + color_texture_0: Option) { + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + let tile_program = &self.programs.tile_program; + + let (mut textures, mut uniforms, mut images) = (vec![], vec![], vec![]); + + core.set_uniforms_for_drawing_tiles(&tile_program.common, + &mut textures, + &mut uniforms, + color_texture_0); + + uniforms.push((&tile_program.framebuffer_tile_size_uniform, + UniformData::IVec2(core.framebuffer_tile_size().0))); + + match core.draw_render_target() { + RenderTarget::Default => panic!("Can't draw to the default framebuffer with compute!"), + RenderTarget::Framebuffer(ref framebuffer) => { + let dest_texture = core.device.framebuffer_texture(framebuffer); + images.push((&tile_program.dest_image, dest_texture, ImageAccess::ReadWrite)); + } + } + + let clear_color = core.clear_color_for_draw_operation(); + match clear_color { + None => { + uniforms.push((&tile_program.load_action_uniform, + UniformData::Int(LOAD_ACTION_LOAD))); + uniforms.push((&tile_program.clear_color_uniform, + UniformData::Vec4(F32x4::default()))); + } + Some(clear_color) => { + uniforms.push((&tile_program.load_action_uniform, + UniformData::Int(LOAD_ACTION_CLEAR))); + uniforms.push((&tile_program.clear_color_uniform, + UniformData::Vec4(clear_color.0))); + } + } + + let tiles_d3d11_buffer = core.allocator.get_buffer(tiles_d3d11_buffer_id); + let first_tile_map_storage_buffer = core.allocator.get_buffer(first_tile_map_buffer_id); + + let framebuffer_tile_size = core.framebuffer_tile_size().0; + let compute_dimensions = ComputeDimensions { + x: framebuffer_tile_size.x() as u32, + y: framebuffer_tile_size.y() as u32, + z: 1, + }; + + core.device.dispatch_compute(compute_dimensions, &ComputeState { + program: &tile_program.common.program, + textures: &textures, + images: &images, + storage_buffers: &[ + (&tile_program.tiles_storage_buffer, tiles_d3d11_buffer), + (&tile_program.first_tile_map_storage_buffer, first_tile_map_storage_buffer), + ], + uniforms: &uniforms, + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().composite_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 1; + + core.preserve_draw_framebuffer(); + } + + pub(crate) fn end_frame(&mut self, core: &mut RendererCore) { + self.free_tile_batch_buffers(core); + } + + fn free_tile_batch_buffers(&mut self, core: &mut RendererCore) { + for (_, tile_batch_info) in self.tile_batch_info.drain() { + core.allocator.free_buffer(tile_batch_info.z_buffer_id); + core.allocator.free_buffer(tile_batch_info.tiles_d3d11_buffer_id); + core.allocator.free_buffer(tile_batch_info.propagate_metadata_buffer_id); + core.allocator.free_buffer(tile_batch_info.first_tile_map_buffer_id); + } + } +} + +// Buffer data + +#[derive(Clone)] +struct TileBatchInfoD3D11 { + tile_count: u32, + z_buffer_id: BufferID, + tiles_d3d11_buffer_id: BufferID, + propagate_metadata_buffer_id: BufferID, + first_tile_map_buffer_id: BufferID, +} + +#[derive(Clone)] +struct FillBufferInfoD3D11 { + fill_vertex_buffer_id: BufferID, + fill_indirect_draw_params_buffer_id: BufferID, +} + +#[derive(Debug)] +struct PropagateMetadataBufferIDsD3D11 { + propagate_metadata: BufferID, + backdrops: BufferID, +} + +struct MicrolinesBufferIDsD3D11 { + buffer_id: BufferID, + count: u32, +} + +#[derive(Clone, Debug)] +struct ClipBufferIDs { + metadata: Option, + tiles: BufferID, +} + +struct SceneBuffers { + draw: SceneSourceBuffers, + clip: SceneSourceBuffers, +} + +struct SceneSourceBuffers { + points_buffer: Option, + points_capacity: u32, + point_indices_buffer: Option, + point_indices_count: u32, + point_indices_capacity: u32, +} + +#[derive(Clone)] +struct PropagateTilesInfoD3D11 { + alpha_tile_range: Range, +} + +impl SceneBuffers { + fn new() -> SceneBuffers { + SceneBuffers { draw: SceneSourceBuffers::new(), clip: SceneSourceBuffers::new() } + } + + fn upload(&mut self, + allocator: &mut GPUMemoryAllocator, + device: &D, + draw_segments: &SegmentsD3D11, + clip_segments: &SegmentsD3D11) + where D: Device { + self.draw.upload(allocator, device, draw_segments); + self.clip.upload(allocator, device, clip_segments); + } +} + +impl SceneSourceBuffers { + fn new() -> SceneSourceBuffers { + SceneSourceBuffers { + points_buffer: None, + points_capacity: 0, + point_indices_buffer: None, + point_indices_count: 0, + point_indices_capacity: 0, + } + } + + fn upload(&mut self, + allocator: &mut GPUMemoryAllocator, + device: &D, + segments: &SegmentsD3D11) + where D: Device { + let needed_points_capacity = (segments.points.len() as u32).next_power_of_two(); + let needed_point_indices_capacity = (segments.indices.len() as u32).next_power_of_two(); + if self.points_capacity < needed_points_capacity { + self.points_buffer = + Some(allocator.allocate_buffer::(device, + needed_points_capacity as u64, + BufferTag("PointsD3D11"))); + self.points_capacity = needed_points_capacity; + } + if self.point_indices_capacity < needed_point_indices_capacity { + self.point_indices_buffer = Some(allocator.allocate_buffer::( + device, + needed_point_indices_capacity as u64, + BufferTag("PointIndicesD3D11"))); + self.point_indices_capacity = needed_point_indices_capacity; + } + device.upload_to_buffer(allocator.get_buffer(self.points_buffer.unwrap()), + 0, + &segments.points, + BufferTarget::Storage); + device.upload_to_buffer(allocator.get_buffer(self.point_indices_buffer.unwrap()), + 0, + &segments.indices, + BufferTarget::Storage); + self.point_indices_count = segments.indices.len() as u32; + } +} diff --git a/renderer/src/gpu/d3d11/shaders.rs b/renderer/src/gpu/d3d11/shaders.rs new file mode 100644 index 00000000..cd8249c7 --- /dev/null +++ b/renderer/src/gpu/d3d11/shaders.rs @@ -0,0 +1,321 @@ +// pathfinder/renderer/src/gpu/d3d11/shaders.rs +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::gpu::shaders::TileProgramCommon; +use crate::tiles::{TILE_HEIGHT, TILE_WIDTH}; +use pathfinder_gpu::{ComputeDimensions, Device}; +use pathfinder_resources::ResourceLoader; + +pub const BOUND_WORKGROUP_SIZE: u32 = 64; +pub const DICE_WORKGROUP_SIZE: u32 = 64; +pub const BIN_WORKGROUP_SIZE: u32 = 64; +pub const PROPAGATE_WORKGROUP_SIZE: u32 = 64; +pub const SORT_WORKGROUP_SIZE: u32 = 64; + +pub struct ProgramsD3D11 where D: Device { + pub bound_program: BoundProgramD3D11, + pub dice_program: DiceProgramD3D11, + pub bin_program: BinProgramD3D11, + pub propagate_program: PropagateProgramD3D11, + pub sort_program: SortProgramD3D11, + pub fill_program: FillProgramD3D11, + pub tile_program: TileProgramD3D11, +} + +impl ProgramsD3D11 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> ProgramsD3D11 { + ProgramsD3D11 { + bound_program: BoundProgramD3D11::new(device, resources), + dice_program: DiceProgramD3D11::new(device, resources), + bin_program: BinProgramD3D11::new(device, resources), + propagate_program: PropagateProgramD3D11::new(device, resources), + sort_program: SortProgramD3D11::new(device, resources), + fill_program: FillProgramD3D11::new(device, resources), + tile_program: TileProgramD3D11::new(device, resources), + } + } +} + +pub struct PropagateProgramD3D11 where D: Device { + pub program: D::Program, + pub framebuffer_tile_size_uniform: D::Uniform, + pub column_count_uniform: D::Uniform, + pub first_alpha_tile_index_uniform: D::Uniform, + pub draw_metadata_storage_buffer: D::StorageBuffer, + pub clip_metadata_storage_buffer: D::StorageBuffer, + pub backdrops_storage_buffer: D::StorageBuffer, + pub draw_tiles_storage_buffer: D::StorageBuffer, + pub clip_tiles_storage_buffer: D::StorageBuffer, + pub z_buffer_storage_buffer: D::StorageBuffer, + pub first_tile_map_storage_buffer: D::StorageBuffer, + pub indirect_draw_params_storage_buffer: D::StorageBuffer, + pub alpha_tiles_storage_buffer: D::StorageBuffer, +} + +impl PropagateProgramD3D11 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> PropagateProgramD3D11 { + let mut program = device.create_compute_program(resources, "d3d11/propagate"); + let local_size = ComputeDimensions { x: PROPAGATE_WORKGROUP_SIZE, y: 1, z: 1 }; + device.set_compute_program_local_size(&mut program, local_size); + + let framebuffer_tile_size_uniform = device.get_uniform(&program, "FramebufferTileSize"); + let column_count_uniform = device.get_uniform(&program, "ColumnCount"); + let first_alpha_tile_index_uniform = device.get_uniform(&program, "FirstAlphaTileIndex"); + let draw_metadata_storage_buffer = device.get_storage_buffer(&program, "DrawMetadata", 0); + let clip_metadata_storage_buffer = device.get_storage_buffer(&program, "ClipMetadata", 1); + let backdrops_storage_buffer = device.get_storage_buffer(&program, "Backdrops", 2); + let draw_tiles_storage_buffer = device.get_storage_buffer(&program, "DrawTiles", 3); + let clip_tiles_storage_buffer = device.get_storage_buffer(&program, "ClipTiles", 4); + let z_buffer_storage_buffer = device.get_storage_buffer(&program, "ZBuffer", 5); + let first_tile_map_storage_buffer = device.get_storage_buffer(&program, "FirstTileMap", 6); + let indirect_draw_params_storage_buffer = + device.get_storage_buffer(&program, "IndirectDrawParams", 7); + let alpha_tiles_storage_buffer = device.get_storage_buffer(&program, "AlphaTiles", 8); + + PropagateProgramD3D11 { + program, + framebuffer_tile_size_uniform, + column_count_uniform, + first_alpha_tile_index_uniform, + draw_metadata_storage_buffer, + clip_metadata_storage_buffer, + backdrops_storage_buffer, + draw_tiles_storage_buffer, + clip_tiles_storage_buffer, + z_buffer_storage_buffer, + first_tile_map_storage_buffer, + indirect_draw_params_storage_buffer, + alpha_tiles_storage_buffer, + } + } +} + +pub struct FillProgramD3D11 where D: Device { + pub program: D::Program, + pub dest_image: D::ImageParameter, + pub area_lut_texture: D::TextureParameter, + pub alpha_tile_range_uniform: D::Uniform, + pub fills_storage_buffer: D::StorageBuffer, + pub tiles_storage_buffer: D::StorageBuffer, + pub alpha_tiles_storage_buffer: D::StorageBuffer, +} + +impl FillProgramD3D11 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> FillProgramD3D11 { + let mut program = device.create_compute_program(resources, "d3d11/fill"); + let local_size = ComputeDimensions { x: TILE_WIDTH, y: TILE_HEIGHT / 4, z: 1 }; + device.set_compute_program_local_size(&mut program, local_size); + + let dest_image = device.get_image_parameter(&program, "Dest"); + let area_lut_texture = device.get_texture_parameter(&program, "AreaLUT"); + let alpha_tile_range_uniform = device.get_uniform(&program, "AlphaTileRange"); + let fills_storage_buffer = device.get_storage_buffer(&program, "Fills", 0); + let tiles_storage_buffer = device.get_storage_buffer(&program, "Tiles", 1); + let alpha_tiles_storage_buffer = device.get_storage_buffer(&program, "AlphaTiles", 2); + + FillProgramD3D11 { + program, + dest_image, + area_lut_texture, + alpha_tile_range_uniform, + fills_storage_buffer, + tiles_storage_buffer, + alpha_tiles_storage_buffer, + } + } +} + +pub struct TileProgramD3D11 where D: Device { + pub common: TileProgramCommon, + pub load_action_uniform: D::Uniform, + pub clear_color_uniform: D::Uniform, + pub framebuffer_tile_size_uniform: D::Uniform, + pub dest_image: D::ImageParameter, + pub tiles_storage_buffer: D::StorageBuffer, + pub first_tile_map_storage_buffer: D::StorageBuffer, +} + +impl TileProgramD3D11 where D: Device { + fn new(device: &D, resources: &dyn ResourceLoader) -> TileProgramD3D11 { + let mut program = device.create_compute_program(resources, "d3d11/tile"); + device.set_compute_program_local_size(&mut program, + ComputeDimensions { x: 16, y: 4, z: 1 }); + + let load_action_uniform = device.get_uniform(&program, "LoadAction"); + let clear_color_uniform = device.get_uniform(&program, "ClearColor"); + let framebuffer_tile_size_uniform = device.get_uniform(&program, "FramebufferTileSize"); + let dest_image = device.get_image_parameter(&program, "DestImage"); + let tiles_storage_buffer = device.get_storage_buffer(&program, "Tiles", 0); + let first_tile_map_storage_buffer = device.get_storage_buffer(&program, "FirstTileMap", 1); + + let common = TileProgramCommon::new(device, program); + TileProgramD3D11 { + common, + load_action_uniform, + clear_color_uniform, + framebuffer_tile_size_uniform, + dest_image, + tiles_storage_buffer, + first_tile_map_storage_buffer, + } + } +} + +pub struct BinProgramD3D11 where D: Device { + pub program: D::Program, + pub microline_count_uniform: D::Uniform, + pub max_fill_count_uniform: D::Uniform, + pub microlines_storage_buffer: D::StorageBuffer, + pub metadata_storage_buffer: D::StorageBuffer, + pub indirect_draw_params_storage_buffer: D::StorageBuffer, + pub fills_storage_buffer: D::StorageBuffer, + pub tiles_storage_buffer: D::StorageBuffer, + pub backdrops_storage_buffer: D::StorageBuffer, +} + +impl BinProgramD3D11 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> BinProgramD3D11 { + let mut program = device.create_compute_program(resources, "d3d11/bin"); + let dimensions = ComputeDimensions { x: BIN_WORKGROUP_SIZE, y: 1, z: 1 }; + device.set_compute_program_local_size(&mut program, dimensions); + + let microline_count_uniform = device.get_uniform(&program, "MicrolineCount"); + let max_fill_count_uniform = device.get_uniform(&program, "MaxFillCount"); + + let microlines_storage_buffer = device.get_storage_buffer(&program, "Microlines", 0); + let metadata_storage_buffer = device.get_storage_buffer(&program, "Metadata", 1); + let indirect_draw_params_storage_buffer = + device.get_storage_buffer(&program, "IndirectDrawParams", 2); + let fills_storage_buffer = device.get_storage_buffer(&program, "Fills", 3); + let tiles_storage_buffer = device.get_storage_buffer(&program, "Tiles", 4); + let backdrops_storage_buffer = device.get_storage_buffer(&program, "Backdrops", 5); + + BinProgramD3D11 { + program, + microline_count_uniform, + max_fill_count_uniform, + metadata_storage_buffer, + indirect_draw_params_storage_buffer, + fills_storage_buffer, + tiles_storage_buffer, + microlines_storage_buffer, + backdrops_storage_buffer, + } + } +} + +pub struct DiceProgramD3D11 where D: Device { + pub program: D::Program, + pub transform_uniform: D::Uniform, + pub translation_uniform: D::Uniform, + pub path_count_uniform: D::Uniform, + pub last_batch_segment_index_uniform: D::Uniform, + pub max_microline_count_uniform: D::Uniform, + pub compute_indirect_params_storage_buffer: D::StorageBuffer, + pub dice_metadata_storage_buffer: D::StorageBuffer, + pub points_storage_buffer: D::StorageBuffer, + pub input_indices_storage_buffer: D::StorageBuffer, + pub microlines_storage_buffer: D::StorageBuffer, +} + +impl DiceProgramD3D11 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> DiceProgramD3D11 { + let mut program = device.create_compute_program(resources, "d3d11/dice"); + let dimensions = ComputeDimensions { x: DICE_WORKGROUP_SIZE, y: 1, z: 1 }; + device.set_compute_program_local_size(&mut program, dimensions); + + let transform_uniform = device.get_uniform(&program, "Transform"); + let translation_uniform = device.get_uniform(&program, "Translation"); + let path_count_uniform = device.get_uniform(&program, "PathCount"); + let last_batch_segment_index_uniform = device.get_uniform(&program, + "LastBatchSegmentIndex"); + let max_microline_count_uniform = device.get_uniform(&program, "MaxMicrolineCount"); + + let compute_indirect_params_storage_buffer = + device.get_storage_buffer(&program, "ComputeIndirectParams", 0); + let dice_metadata_storage_buffer = device.get_storage_buffer(&program, "DiceMetadata", 1); + let points_storage_buffer = device.get_storage_buffer(&program, "Points", 2); + let input_indices_storage_buffer = device.get_storage_buffer(&program, "InputIndices", 3); + let microlines_storage_buffer = device.get_storage_buffer(&program, "Microlines", 4); + + DiceProgramD3D11 { + program, + transform_uniform, + translation_uniform, + path_count_uniform, + last_batch_segment_index_uniform, + max_microline_count_uniform, + compute_indirect_params_storage_buffer, + dice_metadata_storage_buffer, + points_storage_buffer, + input_indices_storage_buffer, + microlines_storage_buffer, + } + } +} + +pub struct BoundProgramD3D11 where D: Device { + pub program: D::Program, + pub path_count_uniform: D::Uniform, + pub tile_count_uniform: D::Uniform, + pub tile_path_info_storage_buffer: D::StorageBuffer, + pub tiles_storage_buffer: D::StorageBuffer, +} + +impl BoundProgramD3D11 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> BoundProgramD3D11 { + let mut program = device.create_compute_program(resources, "d3d11/bound"); + let dimensions = ComputeDimensions { x: BOUND_WORKGROUP_SIZE, y: 1, z: 1 }; + device.set_compute_program_local_size(&mut program, dimensions); + + let path_count_uniform = device.get_uniform(&program, "PathCount"); + let tile_count_uniform = device.get_uniform(&program, "TileCount"); + + let tile_path_info_storage_buffer = device.get_storage_buffer(&program, "TilePathInfo", 0); + let tiles_storage_buffer = device.get_storage_buffer(&program, "Tiles", 1); + + BoundProgramD3D11 { + program, + path_count_uniform, + tile_count_uniform, + tile_path_info_storage_buffer, + tiles_storage_buffer, + } + } +} + +pub struct SortProgramD3D11 where D: Device { + pub program: D::Program, + pub tile_count_uniform: D::Uniform, + pub tiles_storage_buffer: D::StorageBuffer, + pub first_tile_map_storage_buffer: D::StorageBuffer, + pub z_buffer_storage_buffer: D::StorageBuffer, +} + +impl SortProgramD3D11 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> SortProgramD3D11 { + let mut program = device.create_compute_program(resources, "d3d11/sort"); + let dimensions = ComputeDimensions { x: SORT_WORKGROUP_SIZE, y: 1, z: 1 }; + device.set_compute_program_local_size(&mut program, dimensions); + + let tile_count_uniform = device.get_uniform(&program, "TileCount"); + let tiles_storage_buffer = device.get_storage_buffer(&program, "Tiles", 0); + let first_tile_map_storage_buffer = device.get_storage_buffer(&program, "FirstTileMap", 1); + let z_buffer_storage_buffer = device.get_storage_buffer(&program, "ZBuffer", 2); + + SortProgramD3D11 { + program, + tile_count_uniform, + tiles_storage_buffer, + first_tile_map_storage_buffer, + z_buffer_storage_buffer, + } + } +} \ No newline at end of file diff --git a/resources/shaders/gl4/d3d11/bin.cs.glsl b/resources/shaders/gl4/d3d11/bin.cs.glsl new file mode 100644 index 00000000..c185cc15 --- /dev/null +++ b/resources/shaders/gl4/d3d11/bin.cs.glsl @@ -0,0 +1,259 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + + + + + + + + + + + + +precision highp float; + + + + + +layout(local_size_x = 64)in; + +uniform int uMicrolineCount; + +uniform int uMaxFillCount; + +layout(std430, binding = 0)buffer bMicrolines { + restrict readonly uvec4 iMicrolines[]; +}; + +layout(std430, binding = 1)buffer bMetadata { + + + + + + + restrict readonly ivec4 iMetadata[]; +}; + + + + + + +layout(std430, binding = 2)buffer bIndirectDrawParams { + restrict uint iIndirectDrawParams[]; +}; + +layout(std430, binding = 3)buffer bFills { + restrict writeonly uint iFills[]; +}; + +layout(std430, binding = 4)buffer bTiles { + + + + + restrict uint iTiles[]; +}; + +layout(std430, binding = 5)buffer bBackdrops { + + + + restrict uint iBackdrops[]; +}; + +uint computeTileIndexNoCheck(ivec2 tileCoords, ivec4 pathTileRect, uint pathTileOffset){ + ivec2 offsetCoords = tileCoords - pathTileRect . xy; + return pathTileOffset + offsetCoords . x + offsetCoords . y *(pathTileRect . z - pathTileRect . x); +} + +bvec4 computeTileOutcodes(ivec2 tileCoords, ivec4 pathTileRect){ + return bvec4(lessThan(tileCoords, pathTileRect . xy), + greaterThanEqual(tileCoords, pathTileRect . zw)); +} + +bool computeTileIndex(ivec2 tileCoords, + ivec4 pathTileRect, + uint pathTileOffset, + out uint outTileIndex){ + outTileIndex = computeTileIndexNoCheck(tileCoords, pathTileRect, pathTileOffset); + return ! any(computeTileOutcodes(tileCoords, pathTileRect)); +} + +void addFill(vec4 lineSegment, ivec2 tileCoords, ivec4 pathTileRect, uint pathTileOffset){ + + uint tileIndex; + if(! computeTileIndex(tileCoords, pathTileRect, pathTileOffset, tileIndex)){ + return; + } + + + uvec4 scaledLocalLine = uvec4((lineSegment - vec4(tileCoords . xyxy * ivec4(16)))* vec4(256.0)); + if(scaledLocalLine . x == scaledLocalLine . z) + return; + + + uint fillIndex = atomicAdd(iIndirectDrawParams[1], 1); + + + uint fillLink = atomicExchange(iTiles[tileIndex * 4 + 1], + int(fillIndex)); + + + if(fillIndex < uMaxFillCount){ + iFills[fillIndex * 3 + 0]= scaledLocalLine . x |(scaledLocalLine . y << 16); + iFills[fillIndex * 3 + 1]= scaledLocalLine . z |(scaledLocalLine . w << 16); + iFills[fillIndex * 3 + 2]= fillLink; + } +} + +void adjustBackdrop(int backdropDelta, + ivec2 tileCoords, + ivec4 pathTileRect, + uint pathTileOffset, + uint pathBackdropOffset){ + bvec4 outcodes = computeTileOutcodes(tileCoords, pathTileRect); + if(any(outcodes)){ + if(! outcodes . x && outcodes . y && ! outcodes . z){ + uint backdropIndex = pathBackdropOffset + uint(tileCoords . x - pathTileRect . x); + atomicAdd(iBackdrops[backdropIndex * 3], backdropDelta); + } + } else { + uint tileIndex = computeTileIndexNoCheck(tileCoords, pathTileRect, pathTileOffset); + atomicAdd(iTiles[tileIndex * 4 + 2], + uint(backdropDelta)<< 24); + } +} + +vec4 unpackMicroline(uvec4 packedMicroline, out uint outPathIndex){ + outPathIndex = packedMicroline . w; + ivec4 signedMicroline = ivec4(packedMicroline); + return vec4((signedMicroline . x << 16)>> 16, signedMicroline . x >> 16, + (signedMicroline . y << 16)>> 16, signedMicroline . y >> 16)+ + vec4(signedMicroline . z & 0xff,(signedMicroline . z >> 8)& 0xff, + (signedMicroline . z >> 16)& 0xff,(signedMicroline . z >> 24)& 0xff)/ 256.0; +} + +void main(){ + uint segmentIndex = gl_GlobalInvocationID . x; + if(segmentIndex >= uMicrolineCount) + return; + + uint pathIndex; + vec4 lineSegment = unpackMicroline(iMicrolines[segmentIndex], pathIndex); + + ivec4 pathTileRect = iMetadata[pathIndex * 3 + 0]; + uint pathTileOffset = uint(iMetadata[pathIndex * 3 + 1]. x); + uint pathBackdropOffset = uint(iMetadata[pathIndex * 3 + 2]. x); + + + + ivec2 tileSize = ivec2(16); + + ivec4 tileLineSegment = ivec4(floor(lineSegment / vec4(tileSize . xyxy))); + ivec2 fromTileCoords = tileLineSegment . xy, toTileCoords = tileLineSegment . zw; + + vec2 vector = lineSegment . zw - lineSegment . xy; + vec2 vectorIsNegative = vec2(vector . x < 0.0 ? - 1.0 : 0.0, vector . y < 0.0 ? - 1.0 : 0.0); + ivec2 tileStep = ivec2(vector . x < 0.0 ? - 1 : 1, vector . y < 0.0 ? - 1 : 1); + + vec2 firstTileCrossing = vec2((fromTileCoords + ivec2(vector . x >= 0.0 ? 1 : 0, + vector . y >= 0.0 ? 1 : 0))* tileSize); + + vec2 tMax =(firstTileCrossing - lineSegment . xy)/ vector; + vec2 tDelta = abs(tileSize / vector); + + vec2 currentPosition = lineSegment . xy; + ivec2 tileCoords = fromTileCoords; + int lastStepDirection = 0; + uint iteration = 0; + + while(iteration < 1024u){ + int nextStepDirection; + if(tMax . x < tMax . y) + nextStepDirection = 1; + else if(tMax . x > tMax . y) + nextStepDirection = 2; + else if(tileStep . x > 0.0) + nextStepDirection = 1; + else + nextStepDirection = 2; + + float nextT = min(nextStepDirection == 1 ? tMax . x : tMax . y, 1.0); + + + if(tileCoords == toTileCoords) + nextStepDirection = 0; + + vec2 nextPosition = mix(lineSegment . xy, lineSegment . zw, nextT); + vec4 clippedLineSegment = vec4(currentPosition, nextPosition); + addFill(clippedLineSegment, tileCoords, pathTileRect, pathTileOffset); + + + vec4 auxiliarySegment; + bool haveAuxiliarySegment = false; + if(tileStep . y < 0 && nextStepDirection == 2){ + auxiliarySegment = vec4(clippedLineSegment . zw, vec2(tileCoords * tileSize)); + haveAuxiliarySegment = true; + } else if(tileStep . y > 0 && lastStepDirection == 2){ + auxiliarySegment = vec4(vec2(tileCoords * tileSize), clippedLineSegment . xy); + haveAuxiliarySegment = true; + } + if(haveAuxiliarySegment) + addFill(auxiliarySegment, tileCoords, pathTileRect, pathTileOffset); + + + + + + if(tileStep . x < 0 && lastStepDirection == 1){ + adjustBackdrop(1, + tileCoords, + pathTileRect, + pathTileOffset, + pathBackdropOffset); + } else if(tileStep . x > 0 && nextStepDirection == 1){ + adjustBackdrop(- 1, + tileCoords, + pathTileRect, + pathTileOffset, + pathBackdropOffset); + } + + + if(nextStepDirection == 1){ + tMax . x += tDelta . x; + tileCoords . x += tileStep . x; + } else if(nextStepDirection == 2){ + tMax . y += tDelta . y; + tileCoords . y += tileStep . y; + } else if(nextStepDirection == 0){ + break; + } + + currentPosition = nextPosition; + lastStepDirection = nextStepDirection; + + iteration ++; + } +} + diff --git a/resources/shaders/gl4/d3d11/bound.cs.glsl b/resources/shaders/gl4/d3d11/bound.cs.glsl new file mode 100644 index 00000000..57f07444 --- /dev/null +++ b/resources/shaders/gl4/d3d11/bound.cs.glsl @@ -0,0 +1,87 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + + + + + + + + + + +layout(local_size_x = 64)in; + +uniform int uPathCount; +uniform int uTileCount; + +layout(std430, binding = 0)buffer bTilePathInfo { + + + + + restrict readonly uvec4 iTilePathInfo[]; +}; + +layout(std430, binding = 1)buffer bTiles { + + + + + restrict uint iTiles[]; +}; + +void main(){ + uint tileIndex = gl_GlobalInvocationID . x; + if(tileIndex >= uint(uTileCount)) + return; + + uint lowPathIndex = 0, highPathIndex = uint(uPathCount); + int iteration = 0; + while(iteration < 1024 && lowPathIndex + 1 < highPathIndex){ + uint midPathIndex = lowPathIndex +(highPathIndex - lowPathIndex)/ 2; + uint midTileIndex = iTilePathInfo[midPathIndex]. z; + if(tileIndex < midTileIndex){ + highPathIndex = midPathIndex; + } else { + lowPathIndex = midPathIndex; + if(tileIndex == midTileIndex) + break; + } + iteration ++; + } + + uint pathIndex = lowPathIndex; + uvec4 pathInfo = iTilePathInfo[pathIndex]; + + ivec2 packedTileRect = ivec2(pathInfo . xy); + ivec4 tileRect = ivec4((packedTileRect . x << 16)>> 16, packedTileRect . x >> 16, + (packedTileRect . y << 16)>> 16, packedTileRect . y >> 16); + + uint tileOffset = tileIndex - pathInfo . z; + uint tileWidth = uint(tileRect . z - tileRect . x); + ivec2 tileCoords = tileRect . xy + ivec2(tileOffset % tileWidth, tileOffset / tileWidth); + + iTiles[tileIndex * 4 + 0]= ~ 0u; + iTiles[tileIndex * 4 + 1]= ~ 0u; + iTiles[tileIndex * 4 + 2]= 0x00ffffffu; + iTiles[tileIndex * 4 + 3]= pathInfo . w; +} + diff --git a/resources/shaders/gl4/d3d11/dice.cs.glsl b/resources/shaders/gl4/d3d11/dice.cs.glsl new file mode 100644 index 00000000..42549df7 --- /dev/null +++ b/resources/shaders/gl4/d3d11/dice.cs.glsl @@ -0,0 +1,220 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + + + + + + + + + + + + + +precision highp float; + + + + + +layout(local_size_x = 64)in; + +uniform mat2 uTransform; +uniform vec2 uTranslation; +uniform int uPathCount; +uniform int uLastBatchSegmentIndex; +uniform int uMaxMicrolineCount; + +layout(std430, binding = 0)buffer bComputeIndirectParams { + + + + + restrict uint iComputeIndirectParams[]; +}; + + +layout(std430, binding = 1)buffer bDiceMetadata { + + + + + restrict readonly uvec4 iDiceMetadata[]; +}; + +layout(std430, binding = 2)buffer bPoints { + restrict readonly vec2 iPoints[]; +}; + +layout(std430, binding = 3)buffer bInputIndices { + restrict readonly uvec2 iInputIndices[]; +}; + +layout(std430, binding = 4)buffer bMicrolines { + + + + + restrict uvec4 iMicrolines[]; +}; + +void emitMicroline(vec4 microlineSegment, uint pathIndex, uint outputMicrolineIndex){ + if(outputMicrolineIndex >= uMaxMicrolineCount) + return; + + ivec4 microlineSubpixels = ivec4(round(clamp(microlineSegment, - 32768.0, 32767.0)* 256.0)); + ivec4 microlinePixels = ivec4(floor(vec4(microlineSubpixels)/ 256.0)); + ivec4 microlineFractPixels = microlineSubpixels - microlinePixels * 256; + + iMicrolines[outputMicrolineIndex]= + uvec4((uint(microlinePixels . x)& 0xffff)|(uint(microlinePixels . y)<< 16), + (uint(microlinePixels . z)& 0xffff)|(uint(microlinePixels . w)<< 16), + uint(microlineFractPixels . x)|(uint(microlineFractPixels . y)<< 8)| + (uint(microlineFractPixels . z)<< 16)|(uint(microlineFractPixels . w)<< 24), + pathIndex); +} + + +bool curveIsFlat(vec4 baseline, vec4 ctrl){ + vec4 uv = vec4(3.0)* ctrl - vec4(2.0)* baseline - baseline . zwxy; + uv *= uv; + uv = max(uv, uv . zwxy); + return uv . x + uv . y <= 16.0 * 0.25 * 0.25; +} + +void subdivideCurve(vec4 baseline, + vec4 ctrl, + float t, + out vec4 prevBaseline, + out vec4 prevCtrl, + out vec4 nextBaseline, + out vec4 nextCtrl){ + vec2 p0 = baseline . xy, p1 = ctrl . xy, p2 = ctrl . zw, p3 = baseline . zw; + vec2 p0p1 = mix(p0, p1, t), p1p2 = mix(p1, p2, t), p2p3 = mix(p2, p3, t); + vec2 p0p1p2 = mix(p0p1, p1p2, t), p1p2p3 = mix(p1p2, p2p3, t); + vec2 p0p1p2p3 = mix(p0p1p2, p1p2p3, t); + prevBaseline = vec4(p0, p0p1p2p3); + prevCtrl = vec4(p0p1, p0p1p2); + nextBaseline = vec4(p0p1p2p3, p3); + nextCtrl = vec4(p1p2p3, p2p3); +} + +vec2 sampleCurve(vec4 baseline, vec4 ctrl, float t){ + vec2 p0 = baseline . xy, p1 = ctrl . xy, p2 = ctrl . zw, p3 = baseline . zw; + vec2 p0p1 = mix(p0, p1, t), p1p2 = mix(p1, p2, t), p2p3 = mix(p2, p3, t); + vec2 p0p1p2 = mix(p0p1, p1p2, t), p1p2p3 = mix(p1p2, p2p3, t); + return mix(p0p1p2, p1p2p3, t); +} + +vec2 sampleLine(vec4 line, float t){ + return mix(line . xy, line . zw, t); +} + +vec2 getPoint(uint pointIndex){ + return uTransform * iPoints[pointIndex]+ uTranslation; +} + +void main(){ + uint batchSegmentIndex = gl_GlobalInvocationID . x; + if(batchSegmentIndex >= uLastBatchSegmentIndex) + return; + + + uint lowPathIndex = 0, highPathIndex = uint(uPathCount); + int iteration = 0; + while(iteration < 1024 && lowPathIndex + 1 < highPathIndex){ + uint midPathIndex = lowPathIndex +(highPathIndex - lowPathIndex)/ 2; + uint midBatchSegmentIndex = iDiceMetadata[midPathIndex]. z; + if(batchSegmentIndex < midBatchSegmentIndex){ + highPathIndex = midPathIndex; + } else { + lowPathIndex = midPathIndex; + if(batchSegmentIndex == midBatchSegmentIndex) + break; + } + iteration ++; + } + + uint batchPathIndex = lowPathIndex; + uvec4 diceMetadata = iDiceMetadata[batchPathIndex]; + uint firstGlobalSegmentIndexInPath = diceMetadata . y; + uint firstBatchSegmentIndexInPath = diceMetadata . z; + uint globalSegmentIndex = batchSegmentIndex - firstBatchSegmentIndexInPath + + firstGlobalSegmentIndexInPath; + + uvec2 inputIndices = iInputIndices[globalSegmentIndex]; + uint fromPointIndex = inputIndices . x, flagsPathIndex = inputIndices . y; + + uint toPointIndex = fromPointIndex; + if((flagsPathIndex & 0x40000000u)!= 0u) + toPointIndex += 3; + else if((flagsPathIndex & 0x80000000u)!= 0u) + toPointIndex += 2; + else + toPointIndex += 1; + + vec4 baseline = vec4(getPoint(fromPointIndex), getPoint(toPointIndex)); + + + + + + vec4 ctrl = vec4(0.0); + float segmentCountF; + bool isCurve =(flagsPathIndex &(0x40000000u | + 0x80000000u))!= 0; + if(isCurve){ + vec2 ctrl0 = getPoint(fromPointIndex + 1); + if((flagsPathIndex & 0x80000000u)!= 0){ + vec2 ctrl0_2 = ctrl0 * vec2(2.0); + ctrl =(baseline +(ctrl0 * vec2(2.0)). xyxy)* vec4(1.0 / 3.0); + } else { + ctrl = vec4(ctrl0, getPoint(fromPointIndex + 2)); + } + vec2 bound = vec2(6.0)* max(abs(ctrl . zw - 2.0 * ctrl . xy + baseline . xy), + abs(baseline . zw - 2.0 * ctrl . zw + ctrl . xy)); + segmentCountF = sqrt(length(bound)/(8.0 * 0.25)); + } else { + segmentCountF = length(baseline . zw - baseline . xy)/ 16.0; + } + + + int segmentCount = max(int(ceil(segmentCountF)), 1); + uint firstOutputMicrolineIndex = + atomicAdd(iComputeIndirectParams[3], + segmentCount); + + float prevT = 0.0; + vec2 prevPoint = baseline . xy; + for(int segmentIndex = 0;segmentIndex < segmentCount;segmentIndex ++){ + float nextT = float(segmentIndex + 1)/ float(segmentCount); + vec2 nextPoint; + if(isCurve) + nextPoint = sampleCurve(baseline, ctrl, nextT); + else + nextPoint = sampleLine(baseline, nextT); + emitMicroline(vec4(prevPoint, nextPoint), + batchPathIndex, + firstOutputMicrolineIndex + segmentIndex); + prevT = nextT; + prevPoint = nextPoint; + } +} + diff --git a/resources/shaders/gl4/d3d11/fill.cs.glsl b/resources/shaders/gl4/d3d11/fill.cs.glsl new file mode 100644 index 00000000..ccc459d9 --- /dev/null +++ b/resources/shaders/gl4/d3d11/fill.cs.glsl @@ -0,0 +1,145 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + + + + + + + + + + + + + + + + +vec4 computeCoverage(vec2 from, vec2 to, sampler2D areaLUT){ + + vec2 left = from . x < to . x ? from : to, right = from . x < to . x ? to : from; + + + vec2 window = clamp(vec2(from . x, to . x), - 0.5, 0.5); + float offset = mix(window . x, window . y, 0.5)- left . x; + float t = offset /(right . x - left . x); + + + float y = mix(left . y, right . y, t); + float d =(right . y - left . y)/(right . x - left . x); + + + float dX = window . x - window . y; + return texture(areaLUT, vec2(y + 8.0, abs(d * dX))/ 16.0)* dX; +} + + +layout(local_size_x = 16, local_size_y = 4)in; + + + + + + +layout(rgba8)uniform image2D uDest; +uniform sampler2D uAreaLUT; +uniform ivec2 uAlphaTileRange; + +layout(std430, binding = 0)buffer bFills { + restrict readonly uint iFills[]; +}; + +layout(std430, binding = 1)buffer bTiles { + + + + + + restrict uint iTiles[]; +}; + +layout(std430, binding = 2)buffer bAlphaTiles { + + + restrict readonly uint iAlphaTiles[]; +}; + + + + + + + + + + + + +vec4 accumulateCoverageForFillList(int fillIndex, ivec2 tileSubCoord){ + vec2 tileFragCoord = vec2(tileSubCoord)+ vec2(0.5); + vec4 coverages = vec4(0.0); + int iteration = 0; + do { + uint fillFrom = iFills[fillIndex * 3 + 0], fillTo = iFills[fillIndex * 3 + 1]; + vec4 lineSegment = vec4(fillFrom & 0xffff, fillFrom >> 16, + fillTo & 0xffff, fillTo >> 16)/ 256.0; + lineSegment -= tileFragCoord . xyxy; + coverages += computeCoverage(lineSegment . xy, lineSegment . zw, uAreaLUT); + fillIndex = int(iFills[fillIndex * 3 + 2]); + iteration ++; + } while(fillIndex >= 0 && iteration < 1024); + return coverages; +} + + +ivec2 computeTileCoord(uint alphaTileIndex){ + uint x = alphaTileIndex & 0xff; + uint y =(alphaTileIndex >> 8u)& 0xff +(((alphaTileIndex >> 16u)& 0xff)<< 8u); + return ivec2(16, 4)* ivec2(x, y)+ ivec2(gl_LocalInvocationID . xy); +} + +void main(){ + ivec2 tileSubCoord = ivec2(gl_LocalInvocationID . xy)* ivec2(1, 4); + + + uint batchAlphaTileIndex =(gl_WorkGroupID . x |(gl_WorkGroupID . y << 15)); + uint alphaTileIndex = batchAlphaTileIndex + uint(uAlphaTileRange . x); + if(alphaTileIndex >= uint(uAlphaTileRange . y)) + return; + + uint tileIndex = iAlphaTiles[batchAlphaTileIndex * 2 + 0]; + if((int(iTiles[tileIndex * 4 + 2]<< 8)>> 8)< 0) + return; + + int fillIndex = int(iTiles[tileIndex * 4 + 1]); + int backdrop = int(iTiles[tileIndex * 4 + 3])>> 24; + + + vec4 coverages = vec4(backdrop); + coverages += accumulateCoverageForFillList(fillIndex, tileSubCoord); + coverages = clamp(abs(coverages), 0.0, 1.0); + + + int clipTileIndex = int(iAlphaTiles[batchAlphaTileIndex * 2 + 1]); + if(clipTileIndex >= 0) + coverages = min(coverages, imageLoad(uDest, computeTileCoord(clipTileIndex))); + + imageStore(uDest, computeTileCoord(alphaTileIndex), coverages); +} + diff --git a/resources/shaders/gl4/d3d11/propagate.cs.glsl b/resources/shaders/gl4/d3d11/propagate.cs.glsl new file mode 100644 index 00000000..a4d9d182 --- /dev/null +++ b/resources/shaders/gl4/d3d11/propagate.cs.glsl @@ -0,0 +1,227 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + + + + + +layout(local_size_x = 64)in; + + + + + + +uniform ivec2 uFramebufferTileSize; +uniform int uColumnCount; +uniform int uFirstAlphaTileIndex; + +layout(std430, binding = 0)buffer bDrawMetadata { + + + + + + + restrict readonly uvec4 iDrawMetadata[]; +}; + +layout(std430, binding = 1)buffer bClipMetadata { + + + + + + restrict readonly uvec4 iClipMetadata[]; +}; + +layout(std430, binding = 2)buffer bBackdrops { + + + + restrict readonly int iBackdrops[]; +}; + +layout(std430, binding = 3)buffer bDrawTiles { + + + + + restrict uint iDrawTiles[]; +}; + +layout(std430, binding = 4)buffer bClipTiles { + + + + + restrict uint iClipTiles[]; +}; + +layout(std430, binding = 5)buffer bZBuffer { + restrict int iZBuffer[]; +}; + +layout(std430, binding = 6)buffer bFirstTileMap { + restrict int iFirstTileMap[]; +}; + +layout(std430, binding = 7)buffer bIndirectDrawParams { + + + + + + restrict uint iIndirectDrawParams[]; +}; + +layout(std430, binding = 8)buffer bAlphaTiles { + + + restrict uint iAlphaTiles[]; +}; + +uint calculateTileIndex(uint bufferOffset, uvec4 tileRect, uvec2 tileCoord){ + return bufferOffset + tileCoord . y *(tileRect . z - tileRect . x)+ tileCoord . x; +} + +void main(){ + uint columnIndex = gl_GlobalInvocationID . x; + if(int(columnIndex)>= uColumnCount) + return; + + int currentBackdrop = iBackdrops[columnIndex * 3 + 0]; + int tileX = iBackdrops[columnIndex * 3 + 1]; + uint drawPathIndex = uint(iBackdrops[columnIndex * 3 + 2]); + + uvec4 drawTileRect = iDrawMetadata[drawPathIndex * 3 + 0]; + uvec4 drawOffsets = iDrawMetadata[drawPathIndex * 3 + 1]; + uvec2 drawTileSize = drawTileRect . zw - drawTileRect . xy; + uint drawTileBufferOffset = drawOffsets . x; + bool zWrite = drawOffsets . z != 0; + + int clipPathIndex = int(drawOffsets . w); + uvec4 clipTileRect = uvec4(0u), clipOffsets = uvec4(0u); + if(clipPathIndex >= 0){ + clipTileRect = iClipMetadata[clipPathIndex * 2 + 0]; + clipOffsets = iClipMetadata[clipPathIndex * 2 + 1]; + } + uint clipTileBufferOffset = clipOffsets . x, clipBackdropOffset = clipOffsets . y; + + for(uint tileY = 0;tileY < drawTileSize . y;tileY ++){ + uvec2 drawTileCoord = uvec2(tileX, tileY); + uint drawTileIndex = calculateTileIndex(drawTileBufferOffset, drawTileRect, drawTileCoord); + + int drawAlphaTileIndex = - 1; + int clipAlphaTileIndex = - 1; + int drawFirstFillIndex = int(iDrawTiles[drawTileIndex * 4 + 1]); + int drawBackdropDelta = + int(iDrawTiles[drawTileIndex * 4 + 2])>> 24; + uint drawTileWord = iDrawTiles[drawTileIndex * 4 + 3]& 0x00ffffff; + + int drawTileBackdrop = currentBackdrop; + bool haveDrawAlphaMask = drawFirstFillIndex >= 0; + bool needNewAlphaTile = haveDrawAlphaMask; + + + if(clipPathIndex >= 0){ + uvec2 tileCoord = drawTileCoord + drawTileRect . xy; + if(all(bvec4(greaterThanEqual(tileCoord, clipTileRect . xy), + lessThan(tileCoord, clipTileRect . zw)))){ + uvec2 clipTileCoord = tileCoord - clipTileRect . xy; + uint clipTileIndex = calculateTileIndex(clipTileBufferOffset, + clipTileRect, + clipTileCoord); + + + + + + + int thisClipAlphaTileIndex = + int(iClipTiles[clipTileIndex * 4 + + 2]<< 8)>> 8; + + uint clipTileWord = iClipTiles[clipTileIndex * 4 + 3]; + int clipTileBackdrop = int(clipTileWord)>> 24; + + if(thisClipAlphaTileIndex >= 0){ + if(haveDrawAlphaMask){ + clipAlphaTileIndex = thisClipAlphaTileIndex; + needNewAlphaTile = true; + } else { + if(drawTileBackdrop != 0){ + + + drawAlphaTileIndex = thisClipAlphaTileIndex; + clipAlphaTileIndex = - 1; + needNewAlphaTile = false; + } else { + + drawAlphaTileIndex = - 1; + clipAlphaTileIndex = - 1; + needNewAlphaTile = false; + } + } + } else { + + if(clipTileBackdrop == 0){ + + drawTileBackdrop = 0; + needNewAlphaTile = false; + } else { + needNewAlphaTile = true; + } + } + } else { + + drawTileBackdrop = 0; + needNewAlphaTile = false; + } + } + + if(needNewAlphaTile){ + uint drawBatchAlphaTileIndex = atomicAdd(iIndirectDrawParams[4], 1); + iAlphaTiles[drawBatchAlphaTileIndex * 2 + 0]= drawTileIndex; + iAlphaTiles[drawBatchAlphaTileIndex * 2 + 1]= clipAlphaTileIndex; + drawAlphaTileIndex = int(drawBatchAlphaTileIndex)+ uFirstAlphaTileIndex; + } + + iDrawTiles[drawTileIndex * 4 + 2]= + (uint(drawAlphaTileIndex)& 0x00ffffffu)|(uint(drawBackdropDelta)<< 24); + iDrawTiles[drawTileIndex * 4 + 3]= + drawTileWord |(uint(drawTileBackdrop)<< 24); + + + ivec2 tileCoord = ivec2(tileX, tileY)+ ivec2(drawTileRect . xy); + int tileMapIndex = tileCoord . y * uFramebufferTileSize . x + tileCoord . x; + if(zWrite && drawTileBackdrop != 0 && drawAlphaTileIndex < 0) + atomicMax(iZBuffer[tileMapIndex], int(drawTileIndex)); + + + if(drawTileBackdrop != 0 || drawAlphaTileIndex >= 0){ + int nextTileIndex = atomicExchange(iFirstTileMap[tileMapIndex], int(drawTileIndex)); + iDrawTiles[drawTileIndex * 4 + 0]= nextTileIndex; + } + + currentBackdrop += drawBackdropDelta; + } +} + diff --git a/resources/shaders/gl4/d3d11/sort.cs.glsl b/resources/shaders/gl4/d3d11/sort.cs.glsl new file mode 100644 index 00000000..42de2d39 --- /dev/null +++ b/resources/shaders/gl4/d3d11/sort.cs.glsl @@ -0,0 +1,96 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + + + + + + + + + + +uniform int uTileCount; + +layout(std430, binding = 0)buffer bTiles { + + + + + restrict uint iTiles[]; +}; + +layout(std430, binding = 1)buffer bFirstTileMap { + restrict int iFirstTileMap[]; +}; + +layout(std430, binding = 2)buffer bZBuffer { + restrict readonly int iZBuffer[]; +}; + +layout(local_size_x = 64)in; + +int getFirst(uint globalTileIndex){ + return iFirstTileMap[globalTileIndex]; +} + +int getNextTile(int tileIndex){ + return int(iTiles[tileIndex * 4 + 0]); +} + +void setNextTile(int tileIndex, int newNextTileIndex){ + iTiles[tileIndex * 4 + 0]= uint(newNextTileIndex); +} + +void main(){ + uint globalTileIndex = gl_GlobalInvocationID . x; + if(globalTileIndex >= uint(uTileCount)) + return; + + int zValue = iZBuffer[globalTileIndex]; + + int unsortedFirstTileIndex = getFirst(globalTileIndex); + int sortedFirstTileIndex = - 1; + + while(unsortedFirstTileIndex >= 0){ + int currentTileIndex = unsortedFirstTileIndex; + unsortedFirstTileIndex = getNextTile(currentTileIndex); + + if(currentTileIndex >= zValue){ + int prevTrialTileIndex = - 1; + int trialTileIndex = sortedFirstTileIndex; + while(true){ + if(trialTileIndex < 0 || currentTileIndex < trialTileIndex){ + if(prevTrialTileIndex < 0){ + setNextTile(currentTileIndex, sortedFirstTileIndex); + sortedFirstTileIndex = currentTileIndex; + } else { + setNextTile(currentTileIndex, trialTileIndex); + setNextTile(prevTrialTileIndex, currentTileIndex); + } + break; + } + prevTrialTileIndex = trialTileIndex; + trialTileIndex = getNextTile(trialTileIndex); + } + } + } + + iFirstTileMap[globalTileIndex]= sortedFirstTileIndex; +} + diff --git a/resources/shaders/gl4/d3d11/tile.cs.glsl b/resources/shaders/gl4/d3d11/tile.cs.glsl new file mode 100644 index 00000000..c7470675 --- /dev/null +++ b/resources/shaders/gl4/d3d11/tile.cs.glsl @@ -0,0 +1,791 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + + + + + +layout(local_size_x = 16, local_size_y = 4)in; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +vec4 sampleColor(sampler2D colorTexture, vec2 colorTexCoord){ + return texture(colorTexture, colorTexCoord); +} + + + +vec4 combineColor0(vec4 destColor, vec4 srcColor, int op){ + switch(op){ + case 0x1 : + return vec4(srcColor . rgb, srcColor . a * destColor . a); + case 0x2 : + return vec4(destColor . rgb, srcColor . a * destColor . a); + } + return destColor; +} + + + +float filterTextSample1Tap(float offset, sampler2D colorTexture, vec2 colorTexCoord){ + return texture(colorTexture, colorTexCoord + vec2(offset, 0.0)). r; +} + + +void filterTextSample9Tap(out vec4 outAlphaLeft, + out float outAlphaCenter, + out vec4 outAlphaRight, + sampler2D colorTexture, + vec2 colorTexCoord, + vec4 kernel, + float onePixel){ + bool wide = kernel . x > 0.0; + outAlphaLeft = + vec4(wide ? filterTextSample1Tap(- 4.0 * onePixel, colorTexture, colorTexCoord): 0.0, + filterTextSample1Tap(- 3.0 * onePixel, colorTexture, colorTexCoord), + filterTextSample1Tap(- 2.0 * onePixel, colorTexture, colorTexCoord), + filterTextSample1Tap(- 1.0 * onePixel, colorTexture, colorTexCoord)); + outAlphaCenter = filterTextSample1Tap(0.0, colorTexture, colorTexCoord); + outAlphaRight = + vec4(filterTextSample1Tap(1.0 * onePixel, colorTexture, colorTexCoord), + filterTextSample1Tap(2.0 * onePixel, colorTexture, colorTexCoord), + filterTextSample1Tap(3.0 * onePixel, colorTexture, colorTexCoord), + wide ? filterTextSample1Tap(4.0 * onePixel, colorTexture, colorTexCoord): 0.0); +} + +float filterTextConvolve7Tap(vec4 alpha0, vec3 alpha1, vec4 kernel){ + return dot(alpha0, kernel)+ dot(alpha1, kernel . zyx); +} + +float filterTextGammaCorrectChannel(float bgColor, float fgColor, sampler2D gammaLUT){ + return texture(gammaLUT, vec2(fgColor, 1.0 - bgColor)). r; +} + + +vec3 filterTextGammaCorrect(vec3 bgColor, vec3 fgColor, sampler2D gammaLUT){ + return vec3(filterTextGammaCorrectChannel(bgColor . r, fgColor . r, gammaLUT), + filterTextGammaCorrectChannel(bgColor . g, fgColor . g, gammaLUT), + filterTextGammaCorrectChannel(bgColor . b, fgColor . b, gammaLUT)); +} + + + + + + +vec4 filterText(vec2 colorTexCoord, + sampler2D colorTexture, + sampler2D gammaLUT, + vec2 colorTextureSize, + vec4 filterParams0, + vec4 filterParams1, + vec4 filterParams2){ + + vec4 kernel = filterParams0; + vec3 bgColor = filterParams1 . rgb; + vec3 fgColor = filterParams2 . rgb; + bool gammaCorrectionEnabled = filterParams2 . a != 0.0; + + + vec3 alpha; + if(kernel . w == 0.0){ + alpha = texture(colorTexture, colorTexCoord). rrr; + } else { + vec4 alphaLeft, alphaRight; + float alphaCenter; + filterTextSample9Tap(alphaLeft, + alphaCenter, + alphaRight, + colorTexture, + colorTexCoord, + kernel, + 1.0 / colorTextureSize . x); + + float r = filterTextConvolve7Tap(alphaLeft, vec3(alphaCenter, alphaRight . xy), kernel); + float g = filterTextConvolve7Tap(vec4(alphaLeft . yzw, alphaCenter), alphaRight . xyz, kernel); + float b = filterTextConvolve7Tap(vec4(alphaLeft . zw, alphaCenter, alphaRight . x), + alphaRight . yzw, + kernel); + + alpha = vec3(r, g, b); + } + + + if(gammaCorrectionEnabled) + alpha = filterTextGammaCorrect(bgColor, alpha, gammaLUT); + + + return vec4(mix(bgColor, fgColor, alpha), 1.0); +} + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +vec4 filterRadialGradient(vec2 colorTexCoord, + sampler2D colorTexture, + vec2 colorTextureSize, + vec2 fragCoord, + vec2 framebufferSize, + vec4 filterParams0, + vec4 filterParams1){ + vec2 lineFrom = filterParams0 . xy, lineVector = filterParams0 . zw; + vec2 radii = filterParams1 . xy, uvOrigin = filterParams1 . zw; + + vec2 dP = colorTexCoord - lineFrom, dC = lineVector; + float dR = radii . y - radii . x; + + float a = dot(dC, dC)- dR * dR; + float b = dot(dP, dC)+ radii . x * dR; + float c = dot(dP, dP)- radii . x * radii . x; + float discrim = b * b - a * c; + + vec4 color = vec4(0.0); + if(abs(discrim)>= 0.00001){ + vec2 ts = vec2(sqrt(discrim)* vec2(1.0, - 1.0)+ vec2(b))/ vec2(a); + if(ts . x > ts . y) + ts = ts . yx; + float t = ts . x >= 0.0 ? ts . x : ts . y; + color = texture(colorTexture, uvOrigin + vec2(clamp(t, 0.0, 1.0), 0.0)); + } + + return color; +} + + + + + + +vec4 filterBlur(vec2 colorTexCoord, + sampler2D colorTexture, + vec2 colorTextureSize, + vec4 filterParams0, + vec4 filterParams1){ + + vec2 srcOffsetScale = filterParams0 . xy / colorTextureSize; + int support = int(filterParams0 . z); + vec3 gaussCoeff = filterParams1 . xyz; + + + float gaussSum = gaussCoeff . x; + vec4 color = texture(colorTexture, colorTexCoord)* gaussCoeff . x; + gaussCoeff . xy *= gaussCoeff . yz; + + + + + + + + + + for(int i = 1;i <= support;i += 2){ + float gaussPartialSum = gaussCoeff . x; + gaussCoeff . xy *= gaussCoeff . yz; + gaussPartialSum += gaussCoeff . x; + + vec2 srcOffset = srcOffsetScale *(float(i)+ gaussCoeff . x / gaussPartialSum); + color +=(texture(colorTexture, colorTexCoord - srcOffset)+ + texture(colorTexture, colorTexCoord + srcOffset))* gaussPartialSum; + + gaussSum += 2.0 * gaussPartialSum; + gaussCoeff . xy *= gaussCoeff . yz; + } + + + return color / gaussSum; +} + +vec4 filterNone(vec2 colorTexCoord, sampler2D colorTexture){ + return sampleColor(colorTexture, colorTexCoord); +} + +vec4 filterColor(vec2 colorTexCoord, + sampler2D colorTexture, + sampler2D gammaLUT, + vec2 colorTextureSize, + vec2 fragCoord, + vec2 framebufferSize, + vec4 filterParams0, + vec4 filterParams1, + vec4 filterParams2, + int colorFilter){ + switch(colorFilter){ + case 0x1 : + return filterRadialGradient(colorTexCoord, + colorTexture, + colorTextureSize, + fragCoord, + framebufferSize, + filterParams0, + filterParams1); + case 0x3 : + return filterBlur(colorTexCoord, + colorTexture, + colorTextureSize, + filterParams0, + filterParams1); + case 0x2 : + return filterText(colorTexCoord, + colorTexture, + gammaLUT, + colorTextureSize, + filterParams0, + filterParams1, + filterParams2); + } + return filterNone(colorTexCoord, colorTexture); +} + + + +vec3 compositeSelect(bvec3 cond, vec3 ifTrue, vec3 ifFalse){ + return vec3(cond . x ? ifTrue . x : ifFalse . x, + cond . y ? ifTrue . y : ifFalse . y, + cond . z ? ifTrue . z : ifFalse . z); +} + +float compositeDivide(float num, float denom){ + return denom != 0.0 ? num / denom : 0.0; +} + +vec3 compositeColorDodge(vec3 destColor, vec3 srcColor){ + bvec3 destZero = equal(destColor, vec3(0.0)), srcOne = equal(srcColor, vec3(1.0)); + return compositeSelect(destZero, + vec3(0.0), + compositeSelect(srcOne, vec3(1.0), destColor /(vec3(1.0)- srcColor))); +} + + +vec3 compositeHSLToRGB(vec3 hsl){ + float a = hsl . y * min(hsl . z, 1.0 - hsl . z); + vec3 ks = mod(vec3(0.0, 8.0, 4.0)+ vec3(hsl . x * 1.9098593171027443), 12.0); + return hsl . zzz - clamp(min(ks - vec3(3.0), vec3(9.0)- ks), - 1.0, 1.0)* a; +} + + +vec3 compositeRGBToHSL(vec3 rgb){ + float v = max(max(rgb . r, rgb . g), rgb . b), xMin = min(min(rgb . r, rgb . g), rgb . b); + float c = v - xMin, l = mix(xMin, v, 0.5); + vec3 terms = rgb . r == v ? vec3(0.0, rgb . gb): + rgb . g == v ? vec3(2.0, rgb . br): + vec3(4.0, rgb . rg); + float h = 1.0471975511965976 * compositeDivide(terms . x * c + terms . y - terms . z, c); + float s = compositeDivide(c, v); + return vec3(h, s, l); +} + +vec3 compositeScreen(vec3 destColor, vec3 srcColor){ + return destColor + srcColor - destColor * srcColor; +} + +vec3 compositeHardLight(vec3 destColor, vec3 srcColor){ + return compositeSelect(lessThanEqual(srcColor, vec3(0.5)), + destColor * vec3(2.0)* srcColor, + compositeScreen(destColor, vec3(2.0)* srcColor - vec3(1.0))); +} + +vec3 compositeSoftLight(vec3 destColor, vec3 srcColor){ + vec3 darkenedDestColor = + compositeSelect(lessThanEqual(destColor, vec3(0.25)), + ((vec3(16.0)* destColor - 12.0)* destColor + 4.0)* destColor, + sqrt(destColor)); + vec3 factor = compositeSelect(lessThanEqual(srcColor, vec3(0.5)), + destColor *(vec3(1.0)- destColor), + darkenedDestColor - destColor); + return destColor +(srcColor * 2.0 - 1.0)* factor; +} + +vec3 compositeHSL(vec3 destColor, vec3 srcColor, int op){ + switch(op){ + case 0xc : + return vec3(srcColor . x, destColor . y, destColor . z); + case 0xd : + return vec3(destColor . x, srcColor . y, destColor . z); + case 0xe : + return vec3(srcColor . x, srcColor . y, destColor . z); + default : + return vec3(destColor . x, destColor . y, srcColor . z); + } +} + +vec3 compositeRGB(vec3 destColor, vec3 srcColor, int op){ + switch(op){ + case 0x1 : + return destColor * srcColor; + case 0x2 : + return compositeScreen(destColor, srcColor); + case 0x3 : + return compositeHardLight(srcColor, destColor); + case 0x4 : + return min(destColor, srcColor); + case 0x5 : + return max(destColor, srcColor); + case 0x6 : + return compositeColorDodge(destColor, srcColor); + case 0x7 : + return vec3(1.0)- compositeColorDodge(vec3(1.0)- destColor, vec3(1.0)- srcColor); + case 0x8 : + return compositeHardLight(destColor, srcColor); + case 0x9 : + return compositeSoftLight(destColor, srcColor); + case 0xa : + return abs(destColor - srcColor); + case 0xb : + return destColor + srcColor - vec3(2.0)* destColor * srcColor; + case 0xc : + case 0xd : + case 0xe : + case 0xf : + return compositeHSLToRGB(compositeHSL(compositeRGBToHSL(destColor), + compositeRGBToHSL(srcColor), + op)); + } + return srcColor; +} + +vec4 composite(vec4 srcColor, + sampler2D destTexture, + vec2 destTextureSize, + vec2 fragCoord, + int op){ + if(op == 0x0) + return srcColor; + + + vec2 destTexCoord = fragCoord / destTextureSize; + vec4 destColor = texture(destTexture, destTexCoord); + vec3 blendedRGB = compositeRGB(destColor . rgb, srcColor . rgb, op); + return vec4(srcColor . a *(1.0 - destColor . a)* srcColor . rgb + + srcColor . a * destColor . a * blendedRGB + + (1.0 - srcColor . a)* destColor . rgb, + 1.0); +} + + + +float sampleMask(float maskAlpha, + sampler2D maskTexture, + vec2 maskTextureSize, + vec3 maskTexCoord, + int maskCtrl){ + if(maskCtrl == 0) + return maskAlpha; + + ivec2 maskTexCoordI = ivec2(floor(maskTexCoord . xy)); + vec4 texel = texture(maskTexture,(vec2(maskTexCoordI / ivec2(1, 4))+ 0.5)/ maskTextureSize); + float coverage = texel[maskTexCoordI . y % 4]+ maskTexCoord . z; + + if((maskCtrl & 0x1)!= 0) + coverage = abs(coverage); + else + coverage = 1.0 - abs(1.0 - mod(coverage, 2.0)); + return min(maskAlpha, coverage); +} + + + +vec4 calculateColor(vec2 fragCoord, + sampler2D colorTexture0, + sampler2D maskTexture0, + sampler2D destTexture, + sampler2D gammaLUT, + vec2 colorTextureSize0, + vec2 maskTextureSize0, + vec4 filterParams0, + vec4 filterParams1, + vec4 filterParams2, + vec2 framebufferSize, + int ctrl, + vec3 maskTexCoord0, + vec2 colorTexCoord0, + vec4 baseColor, + int tileCtrl){ + + int maskCtrl0 =(tileCtrl >> 0)& 0x3; + float maskAlpha = 1.0; + maskAlpha = sampleMask(maskAlpha, maskTexture0, maskTextureSize0, maskTexCoord0, maskCtrl0); + + + vec4 color = baseColor; + int color0Combine =(ctrl >> 6)& + 0x3; + if(color0Combine != 0){ + int color0Filter =(ctrl >> 4)& 0x3; + vec4 color0 = filterColor(colorTexCoord0, + colorTexture0, + gammaLUT, + colorTextureSize0, + fragCoord, + framebufferSize, + filterParams0, + filterParams1, + filterParams2, + color0Filter); + color = combineColor0(color, color0, color0Combine); + } + + + color . a *= maskAlpha; + + + int compositeOp =(ctrl >> 8)& 0xf; + color = composite(color, destTexture, framebufferSize, fragCoord, compositeOp); + + + color . rgb *= color . a; + return color; +} + + + + + + + + + + + + +vec4 fetchUnscaled(sampler2D srcTexture, vec2 scale, vec2 originCoord, int entry){ + return texture(srcTexture,(originCoord + vec2(0.5)+ vec2(entry, 0))* scale); +} + +void computeTileVaryings(vec2 position, + int colorEntry, + sampler2D textureMetadata, + ivec2 textureMetadataSize, + out vec2 outColorTexCoord0, + out vec4 outBaseColor, + out vec4 outFilterParams0, + out vec4 outFilterParams1, + out vec4 outFilterParams2, + out int outCtrl){ + vec2 metadataScale = vec2(1.0)/ vec2(textureMetadataSize); + vec2 metadataEntryCoord = vec2(colorEntry % 128 * 8, colorEntry / 128); + vec4 colorTexMatrix0 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 0); + vec4 colorTexOffsets = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 1); + vec4 baseColor = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 2); + vec4 filterParams0 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 3); + vec4 filterParams1 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 4); + vec4 filterParams2 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 5); + vec4 extra = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 6); + outColorTexCoord0 = mat2(colorTexMatrix0)* position + colorTexOffsets . xy; + outBaseColor = baseColor; + outFilterParams0 = filterParams0; + outFilterParams1 = filterParams1; + outFilterParams2 = filterParams2; + outCtrl = int(extra . x); +} + + + + + + + + + + +uniform int uLoadAction; +uniform vec4 uClearColor; +uniform vec2 uTileSize; +uniform sampler2D uTextureMetadata; +uniform ivec2 uTextureMetadataSize; +uniform sampler2D uZBuffer; +uniform ivec2 uZBufferSize; +uniform sampler2D uColorTexture0; +uniform sampler2D uMaskTexture0; +uniform sampler2D uDestTexture; +uniform sampler2D uGammaLUT; +uniform vec2 uColorTextureSize0; +uniform vec2 uMaskTextureSize0; +uniform vec2 uFramebufferSize; +uniform ivec2 uFramebufferTileSize; +layout(rgba8)uniform image2D uDestImage; + +layout(std430, binding = 0)buffer bTiles { + + + + + + restrict readonly uint iTiles[]; +}; + +layout(std430, binding = 1)buffer bFirstTileMap { + restrict readonly int iFirstTileMap[]; +}; + +uint calculateTileIndex(uint bufferOffset, uvec4 tileRect, uvec2 tileCoord){ + return bufferOffset + tileCoord . y *(tileRect . z - tileRect . x)+ tileCoord . x; +} + +ivec2 toImageCoords(ivec2 coords){ + return ivec2(coords . x, uFramebufferSize . y - coords . y); +} + +void main(){ + ivec2 tileCoord = ivec2(gl_WorkGroupID . xy); + ivec2 firstTileSubCoord = ivec2(gl_LocalInvocationID . xy)* ivec2(1, 4); + ivec2 firstFragCoord = tileCoord * ivec2(uTileSize)+ firstTileSubCoord; + + + int tileIndex = iFirstTileMap[tileCoord . x + uFramebufferTileSize . x * tileCoord . y]; + if(tileIndex < 0 && uLoadAction != 0) + return; + + mat4 destColors; + for(int subY = 0;subY < 4;subY ++){ + if(uLoadAction == 0){ + destColors[subY]= uClearColor; + } else { + ivec2 imageCoords = toImageCoords(firstFragCoord + ivec2(0, subY)); + destColors[subY]= imageLoad(uDestImage, imageCoords); + } + } + + while(tileIndex >= 0){ + for(int subY = 0;subY < 4;subY ++){ + ivec2 tileSubCoord = firstTileSubCoord + ivec2(0, subY); + vec2 fragCoord = vec2(firstFragCoord + ivec2(0, subY))+ vec2(0.5); + + int alphaTileIndex = + int(iTiles[tileIndex * 4 + 2]<< 8)>> 8; + uint tileControlWord = iTiles[tileIndex * 4 + 3]; + uint colorEntry = tileControlWord & 0xffff; + int tileCtrl = int((tileControlWord >> 16)& 0xff); + + int backdrop; + uvec2 maskTileCoord; + if(alphaTileIndex >= 0){ + backdrop = 0; + maskTileCoord = uvec2(alphaTileIndex & 0xff, alphaTileIndex >> 8)* + uvec2(uTileSize); + } else { + + backdrop = int(tileControlWord)>> 24; + maskTileCoord = uvec2(0u); + tileCtrl &= ~(0x3 << 0); + } + + vec3 maskTexCoord0 = vec3(vec2(ivec2(maskTileCoord)+ tileSubCoord), backdrop); + + vec2 colorTexCoord0; + vec4 baseColor, filterParams0, filterParams1, filterParams2; + int ctrl; + computeTileVaryings(fragCoord, + int(colorEntry), + uTextureMetadata, + uTextureMetadataSize, + colorTexCoord0, + baseColor, + filterParams0, + filterParams1, + filterParams2, + ctrl); + + vec4 srcColor = calculateColor(fragCoord, + uColorTexture0, + uMaskTexture0, + uDestTexture, + uGammaLUT, + uColorTextureSize0, + uMaskTextureSize0, + filterParams0, + filterParams1, + filterParams2, + uFramebufferSize, + ctrl, + maskTexCoord0, + colorTexCoord0, + baseColor, + tileCtrl); + + destColors[subY]= destColors[subY]*(1.0 - srcColor . a)+ srcColor; + } + + tileIndex = int(iTiles[tileIndex * 4 + 0]); + } + + for(int subY = 0;subY < 4;subY ++) + imageStore(uDestImage, toImageCoords(firstFragCoord + ivec2(0, subY)), destColors[subY]); +} + diff --git a/resources/shaders/metal/d3d11/bin.cs.metal b/resources/shaders/metal/d3d11/bin.cs.metal new file mode 100644 index 00000000..7008d475 --- /dev/null +++ b/resources/shaders/metal/d3d11/bin.cs.metal @@ -0,0 +1,284 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct bIndirectDrawParams +{ + uint iIndirectDrawParams[1]; +}; + +struct bTiles +{ + uint iTiles[1]; +}; + +struct bFills +{ + uint iFills[1]; +}; + +struct bBackdrops +{ + uint iBackdrops[1]; +}; + +struct bMicrolines +{ + uint4 iMicrolines[1]; +}; + +struct bMetadata +{ + int4 iMetadata[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +static inline __attribute__((always_inline)) +float4 unpackMicroline(thread const uint4& packedMicroline, thread uint& outPathIndex) +{ + outPathIndex = packedMicroline.w; + int4 signedMicroline = int4(packedMicroline); + return float4(float((signedMicroline.x << 16) >> 16), float(signedMicroline.x >> 16), float((signedMicroline.y << 16) >> 16), float(signedMicroline.y >> 16)) + (float4(float(signedMicroline.z & 255), float((signedMicroline.z >> 8) & 255), float((signedMicroline.z >> 16) & 255), float((signedMicroline.z >> 24) & 255)) / float4(256.0)); +} + +static inline __attribute__((always_inline)) +uint computeTileIndexNoCheck(thread const int2& tileCoords, thread const int4& pathTileRect, thread const uint& pathTileOffset) +{ + int2 offsetCoords = tileCoords - pathTileRect.xy; + return (pathTileOffset + uint(offsetCoords.x)) + uint(offsetCoords.y * (pathTileRect.z - pathTileRect.x)); +} + +static inline __attribute__((always_inline)) +bool4 computeTileOutcodes(thread const int2& tileCoords, thread const int4& pathTileRect) +{ + return bool4(tileCoords < pathTileRect.xy, tileCoords >= pathTileRect.zw); +} + +static inline __attribute__((always_inline)) +bool computeTileIndex(thread const int2& tileCoords, thread const int4& pathTileRect, thread const uint& pathTileOffset, thread uint& outTileIndex) +{ + int2 param = tileCoords; + int4 param_1 = pathTileRect; + uint param_2 = pathTileOffset; + outTileIndex = computeTileIndexNoCheck(param, param_1, param_2); + int2 param_3 = tileCoords; + int4 param_4 = pathTileRect; + return !any(computeTileOutcodes(param_3, param_4)); +} + +static inline __attribute__((always_inline)) +void addFill(thread const float4& lineSegment, thread const int2& tileCoords, thread const int4& pathTileRect, thread const uint& pathTileOffset, device bIndirectDrawParams& v_155, device bTiles& v_165, thread int uMaxFillCount, device bFills& v_186) +{ + int2 param = tileCoords; + int4 param_1 = pathTileRect; + uint param_2 = pathTileOffset; + uint param_3; + bool _124 = computeTileIndex(param, param_1, param_2, param_3); + uint tileIndex = param_3; + if (!_124) + { + return; + } + uint4 scaledLocalLine = uint4((lineSegment - float4(tileCoords.xyxy * int4(16))) * float4(256.0)); + if (scaledLocalLine.x == scaledLocalLine.z) + { + return; + } + uint _160 = atomic_fetch_add_explicit((device atomic_uint*)&v_155.iIndirectDrawParams[1], 1u, memory_order_relaxed); + uint fillIndex = _160; + uint _174 = atomic_exchange_explicit((device atomic_uint*)&v_165.iTiles[(tileIndex * 4u) + 1u], uint(int(fillIndex)), memory_order_relaxed); + uint fillLink = _174; + if (fillIndex < uint(uMaxFillCount)) + { + v_186.iFills[(fillIndex * 3u) + 0u] = scaledLocalLine.x | (scaledLocalLine.y << uint(16)); + v_186.iFills[(fillIndex * 3u) + 1u] = scaledLocalLine.z | (scaledLocalLine.w << uint(16)); + v_186.iFills[(fillIndex * 3u) + 2u] = fillLink; + } +} + +static inline __attribute__((always_inline)) +void adjustBackdrop(thread const int& backdropDelta, thread const int2& tileCoords, thread const int4& pathTileRect, thread const uint& pathTileOffset, thread const uint& pathBackdropOffset, device bTiles& v_165, device bBackdrops& v_251) +{ + int2 param = tileCoords; + int4 param_1 = pathTileRect; + bool4 outcodes = computeTileOutcodes(param, param_1); + if (any(outcodes)) + { + bool _230 = (!outcodes.x) && outcodes.y; + bool _236; + if (_230) + { + _236 = !outcodes.z; + } + else + { + _236 = _230; + } + if (_236) + { + uint backdropIndex = pathBackdropOffset + uint(tileCoords.x - pathTileRect.x); + uint _257 = atomic_fetch_add_explicit((device atomic_uint*)&v_251.iBackdrops[backdropIndex * 3u], uint(backdropDelta), memory_order_relaxed); + } + } + else + { + int2 param_2 = tileCoords; + int4 param_3 = pathTileRect; + uint param_4 = pathTileOffset; + uint tileIndex = computeTileIndexNoCheck(param_2, param_3, param_4); + uint _275 = atomic_fetch_add_explicit((device atomic_uint*)&v_165.iTiles[(tileIndex * 4u) + 2u], uint(backdropDelta) << uint(24), memory_order_relaxed); + } +} + +kernel void main0(constant int& uMaxFillCount [[buffer(2)]], constant int& uMicrolineCount [[buffer(5)]], device bIndirectDrawParams& v_155 [[buffer(0)]], device bTiles& v_165 [[buffer(1)]], device bFills& v_186 [[buffer(3)]], device bBackdrops& v_251 [[buffer(4)]], const device bMicrolines& _346 [[buffer(6)]], const device bMetadata& _360 [[buffer(7)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint segmentIndex = gl_GlobalInvocationID.x; + if (segmentIndex >= uint(uMicrolineCount)) + { + return; + } + uint4 param = _346.iMicrolines[segmentIndex]; + uint param_1; + float4 _354 = unpackMicroline(param, param_1); + uint pathIndex = param_1; + float4 lineSegment = _354; + int4 pathTileRect = _360.iMetadata[(pathIndex * 3u) + 0u]; + uint pathTileOffset = uint(_360.iMetadata[(pathIndex * 3u) + 1u].x); + uint pathBackdropOffset = uint(_360.iMetadata[(pathIndex * 3u) + 2u].x); + int2 tileSize = int2(16); + int4 tileLineSegment = int4(floor(lineSegment / float4(tileSize.xyxy))); + int2 fromTileCoords = tileLineSegment.xy; + int2 toTileCoords = tileLineSegment.zw; + float2 vector = lineSegment.zw - lineSegment.xy; + float2 vectorIsNegative = float2((vector.x < 0.0) ? (-1.0) : 0.0, (vector.y < 0.0) ? (-1.0) : 0.0); + int2 tileStep = int2((vector.x < 0.0) ? (-1) : 1, (vector.y < 0.0) ? (-1) : 1); + float2 firstTileCrossing = float2((fromTileCoords + int2(int(vector.x >= 0.0), int(vector.y >= 0.0))) * tileSize); + float2 tMax = (firstTileCrossing - lineSegment.xy) / vector; + float2 tDelta = abs(float2(tileSize) / vector); + float2 currentPosition = lineSegment.xy; + int2 tileCoords = fromTileCoords; + int lastStepDirection = 0; + uint iteration = 0u; + int nextStepDirection; + float _501; + float4 auxiliarySegment; + while (iteration < 1024u) + { + if (tMax.x < tMax.y) + { + nextStepDirection = 1; + } + else + { + if (tMax.x > tMax.y) + { + nextStepDirection = 2; + } + else + { + if (float(tileStep.x) > 0.0) + { + nextStepDirection = 1; + } + else + { + nextStepDirection = 2; + } + } + } + if (nextStepDirection == 1) + { + _501 = tMax.x; + } + else + { + _501 = tMax.y; + } + float nextT = fast::min(_501, 1.0); + if (all(tileCoords == toTileCoords)) + { + nextStepDirection = 0; + } + float2 nextPosition = mix(lineSegment.xy, lineSegment.zw, float2(nextT)); + float4 clippedLineSegment = float4(currentPosition, nextPosition); + float4 param_2 = clippedLineSegment; + int2 param_3 = tileCoords; + int4 param_4 = pathTileRect; + uint param_5 = pathTileOffset; + addFill(param_2, param_3, param_4, param_5, v_155, v_165, uMaxFillCount, v_186); + bool haveAuxiliarySegment = false; + if ((tileStep.y < 0) && (nextStepDirection == 2)) + { + auxiliarySegment = float4(clippedLineSegment.zw, float2(tileCoords * tileSize)); + haveAuxiliarySegment = true; + } + else + { + if ((tileStep.y > 0) && (lastStepDirection == 2)) + { + auxiliarySegment = float4(float2(tileCoords * tileSize), clippedLineSegment.xy); + haveAuxiliarySegment = true; + } + } + if (haveAuxiliarySegment) + { + float4 param_6 = auxiliarySegment; + int2 param_7 = tileCoords; + int4 param_8 = pathTileRect; + uint param_9 = pathTileOffset; + addFill(param_6, param_7, param_8, param_9, v_155, v_165, uMaxFillCount, v_186); + } + if ((tileStep.x < 0) && (lastStepDirection == 1)) + { + int param_10 = 1; + int2 param_11 = tileCoords; + int4 param_12 = pathTileRect; + uint param_13 = pathTileOffset; + uint param_14 = pathBackdropOffset; + adjustBackdrop(param_10, param_11, param_12, param_13, param_14, v_165, v_251); + } + else + { + if ((tileStep.x > 0) && (nextStepDirection == 1)) + { + int param_15 = -1; + int2 param_16 = tileCoords; + int4 param_17 = pathTileRect; + uint param_18 = pathTileOffset; + uint param_19 = pathBackdropOffset; + adjustBackdrop(param_15, param_16, param_17, param_18, param_19, v_165, v_251); + } + } + if (nextStepDirection == 1) + { + tMax.x += tDelta.x; + tileCoords.x += tileStep.x; + } + else + { + if (nextStepDirection == 2) + { + tMax.y += tDelta.y; + tileCoords.y += tileStep.y; + } + else + { + if (nextStepDirection == 0) + { + break; + } + } + } + currentPosition = nextPosition; + lastStepDirection = nextStepDirection; + iteration++; + } +} + diff --git a/resources/shaders/metal/d3d11/bound.cs.metal b/resources/shaders/metal/d3d11/bound.cs.metal new file mode 100644 index 00000000..b1a4bd01 --- /dev/null +++ b/resources/shaders/metal/d3d11/bound.cs.metal @@ -0,0 +1,77 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#include +#include + +using namespace metal; + +struct bTilePathInfo +{ + uint4 iTilePathInfo[1]; +}; + +struct bTiles +{ + uint iTiles[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +kernel void main0(constant int& uTileCount [[buffer(0)]], constant int& uPathCount [[buffer(1)]], const device bTilePathInfo& _64 [[buffer(2)]], device bTiles& _148 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint tileIndex = gl_GlobalInvocationID.x; + if (tileIndex >= uint(uTileCount)) + { + return; + } + uint lowPathIndex = 0u; + uint highPathIndex = uint(uPathCount); + int iteration = 0; + for (;;) + { + bool _42 = iteration < 1024; + bool _50; + if (_42) + { + _50 = (lowPathIndex + 1u) < highPathIndex; + } + else + { + _50 = _42; + } + if (_50) + { + uint midPathIndex = lowPathIndex + ((highPathIndex - lowPathIndex) / 2u); + uint midTileIndex = _64.iTilePathInfo[midPathIndex].z; + if (tileIndex < midTileIndex) + { + highPathIndex = midPathIndex; + } + else + { + lowPathIndex = midPathIndex; + if (tileIndex == midTileIndex) + { + break; + } + } + iteration++; + continue; + } + else + { + break; + } + } + uint pathIndex = lowPathIndex; + uint4 pathInfo = _64.iTilePathInfo[pathIndex]; + int2 packedTileRect = int2(pathInfo.xy); + int4 tileRect = int4((packedTileRect.x << 16) >> 16, packedTileRect.x >> 16, (packedTileRect.y << 16) >> 16, packedTileRect.y >> 16); + uint tileOffset = tileIndex - pathInfo.z; + uint tileWidth = uint(tileRect.z - tileRect.x); + int2 tileCoords = tileRect.xy + int2(int(tileOffset % tileWidth), int(tileOffset / tileWidth)); + _148.iTiles[(tileIndex * 4u) + 0u] = 4294967295u; + _148.iTiles[(tileIndex * 4u) + 1u] = 4294967295u; + _148.iTiles[(tileIndex * 4u) + 2u] = 16777215u; + _148.iTiles[(tileIndex * 4u) + 3u] = pathInfo.w; +} + diff --git a/resources/shaders/metal/d3d11/dice.cs.metal b/resources/shaders/metal/d3d11/dice.cs.metal new file mode 100644 index 00000000..54913e55 --- /dev/null +++ b/resources/shaders/metal/d3d11/dice.cs.metal @@ -0,0 +1,205 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct bMicrolines +{ + uint4 iMicrolines[1]; +}; + +struct bPoints +{ + float2 iPoints[1]; +}; + +struct bDiceMetadata +{ + uint4 iDiceMetadata[1]; +}; + +struct bInputIndices +{ + uint2 iInputIndices[1]; +}; + +struct bComputeIndirectParams +{ + uint iComputeIndirectParams[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +static inline __attribute__((always_inline)) +float2 getPoint(thread const uint& pointIndex, thread float2x2 uTransform, const device bPoints& v_194, thread float2 uTranslation) +{ + return (uTransform * v_194.iPoints[pointIndex]) + uTranslation; +} + +static inline __attribute__((always_inline)) +float2 sampleCurve(thread const float4& baseline, thread const float4& ctrl, thread const float& t) +{ + float2 p0 = baseline.xy; + float2 p1 = ctrl.xy; + float2 p2 = ctrl.zw; + float2 p3 = baseline.zw; + float2 p0p1 = mix(p0, p1, float2(t)); + float2 p1p2 = mix(p1, p2, float2(t)); + float2 p2p3 = mix(p2, p3, float2(t)); + float2 p0p1p2 = mix(p0p1, p1p2, float2(t)); + float2 p1p2p3 = mix(p1p2, p2p3, float2(t)); + return mix(p0p1p2, p1p2p3, float2(t)); +} + +static inline __attribute__((always_inline)) +float2 sampleLine(thread const float4& line, thread const float& t) +{ + return mix(line.xy, line.zw, float2(t)); +} + +static inline __attribute__((always_inline)) +void emitMicroline(thread const float4& microlineSegment, thread const uint& pathIndex, thread const uint& outputMicrolineIndex, thread int uMaxMicrolineCount, device bMicrolines& v_76) +{ + if (outputMicrolineIndex >= uint(uMaxMicrolineCount)) + { + return; + } + int4 microlineSubpixels = int4(round(fast::clamp(microlineSegment, float4(-32768.0), float4(32767.0)) * 256.0)); + int4 microlinePixels = int4(floor(float4(microlineSubpixels) / float4(256.0))); + int4 microlineFractPixels = microlineSubpixels - (microlinePixels * int4(256)); + v_76.iMicrolines[outputMicrolineIndex] = uint4((uint(microlinePixels.x) & 65535u) | (uint(microlinePixels.y) << uint(16)), (uint(microlinePixels.z) & 65535u) | (uint(microlinePixels.w) << uint(16)), ((uint(microlineFractPixels.x) | (uint(microlineFractPixels.y) << uint(8))) | (uint(microlineFractPixels.z) << uint(16))) | (uint(microlineFractPixels.w) << uint(24)), pathIndex); +} + +kernel void main0(constant int& uMaxMicrolineCount [[buffer(0)]], constant int& uLastBatchSegmentIndex [[buffer(5)]], constant int& uPathCount [[buffer(6)]], constant float2x2& uTransform [[buffer(2)]], constant float2& uTranslation [[buffer(4)]], device bMicrolines& v_76 [[buffer(1)]], const device bPoints& v_194 [[buffer(3)]], const device bDiceMetadata& _253 [[buffer(7)]], const device bInputIndices& _300 [[buffer(8)]], device bComputeIndirectParams& _439 [[buffer(9)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint batchSegmentIndex = gl_GlobalInvocationID.x; + if (batchSegmentIndex >= uint(uLastBatchSegmentIndex)) + { + return; + } + uint lowPathIndex = 0u; + uint highPathIndex = uint(uPathCount); + int iteration = 0; + for (;;) + { + bool _234 = iteration < 1024; + bool _241; + if (_234) + { + _241 = (lowPathIndex + 1u) < highPathIndex; + } + else + { + _241 = _234; + } + if (_241) + { + uint midPathIndex = lowPathIndex + ((highPathIndex - lowPathIndex) / 2u); + uint midBatchSegmentIndex = _253.iDiceMetadata[midPathIndex].z; + if (batchSegmentIndex < midBatchSegmentIndex) + { + highPathIndex = midPathIndex; + } + else + { + lowPathIndex = midPathIndex; + if (batchSegmentIndex == midBatchSegmentIndex) + { + break; + } + } + iteration++; + continue; + } + else + { + break; + } + } + uint batchPathIndex = lowPathIndex; + uint4 diceMetadata = _253.iDiceMetadata[batchPathIndex]; + uint firstGlobalSegmentIndexInPath = diceMetadata.y; + uint firstBatchSegmentIndexInPath = diceMetadata.z; + uint globalSegmentIndex = (batchSegmentIndex - firstBatchSegmentIndexInPath) + firstGlobalSegmentIndexInPath; + uint2 inputIndices = _300.iInputIndices[globalSegmentIndex]; + uint fromPointIndex = inputIndices.x; + uint flagsPathIndex = inputIndices.y; + uint toPointIndex = fromPointIndex; + if ((flagsPathIndex & 1073741824u) != 0u) + { + toPointIndex += 3u; + } + else + { + if ((flagsPathIndex & 2147483648u) != 0u) + { + toPointIndex += 2u; + } + else + { + toPointIndex++; + } + } + uint param = fromPointIndex; + uint param_1 = toPointIndex; + float4 baseline = float4(getPoint(param, uTransform, v_194, uTranslation), getPoint(param_1, uTransform, v_194, uTranslation)); + float4 ctrl = float4(0.0); + bool isCurve = (flagsPathIndex & 3221225472u) != 0u; + float segmentCountF; + if (isCurve) + { + uint param_2 = fromPointIndex + 1u; + float2 ctrl0 = getPoint(param_2, uTransform, v_194, uTranslation); + if ((flagsPathIndex & 2147483648u) != 0u) + { + float2 ctrl0_2 = ctrl0 * float2(2.0); + ctrl = (baseline + (ctrl0 * float2(2.0)).xyxy) * float4(0.3333333432674407958984375); + } + else + { + uint param_3 = fromPointIndex + 2u; + ctrl = float4(ctrl0, getPoint(param_3, uTransform, v_194, uTranslation)); + } + float2 bound = float2(6.0) * fast::max(abs((ctrl.zw - (ctrl.xy * 2.0)) + baseline.xy), abs((baseline.zw - (ctrl.zw * 2.0)) + ctrl.xy)); + segmentCountF = sqrt(length(bound) / 2.0); + } + else + { + segmentCountF = length(baseline.zw - baseline.xy) / 16.0; + } + int segmentCount = max(int(ceil(segmentCountF)), 1); + uint _444 = atomic_fetch_add_explicit((device atomic_uint*)&_439.iComputeIndirectParams[3], uint(segmentCount), memory_order_relaxed); + uint firstOutputMicrolineIndex = _444; + float prevT = 0.0; + float2 prevPoint = baseline.xy; + float2 nextPoint; + for (int segmentIndex = 0; segmentIndex < segmentCount; segmentIndex++) + { + float nextT = float(segmentIndex + 1) / float(segmentCount); + if (isCurve) + { + float4 param_4 = baseline; + float4 param_5 = ctrl; + float param_6 = nextT; + nextPoint = sampleCurve(param_4, param_5, param_6); + } + else + { + float4 param_7 = baseline; + float param_8 = nextT; + nextPoint = sampleLine(param_7, param_8); + } + float4 param_9 = float4(prevPoint, nextPoint); + uint param_10 = batchPathIndex; + uint param_11 = firstOutputMicrolineIndex + uint(segmentIndex); + emitMicroline(param_9, param_10, param_11, uMaxMicrolineCount, v_76); + prevT = nextT; + prevPoint = nextPoint; + } +} + diff --git a/resources/shaders/metal/d3d11/fill.cs.metal b/resources/shaders/metal/d3d11/fill.cs.metal new file mode 100644 index 00000000..c3f47df7 --- /dev/null +++ b/resources/shaders/metal/d3d11/fill.cs.metal @@ -0,0 +1,100 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct bFills +{ + uint iFills[1]; +}; + +struct bAlphaTiles +{ + uint iAlphaTiles[1]; +}; + +struct bTiles +{ + uint iTiles[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(16u, 4u, 1u); + +static inline __attribute__((always_inline)) +float4 computeCoverage(thread const float2& from, thread const float2& to, thread const texture2d areaLUT, thread const sampler areaLUTSmplr) +{ + float2 left = select(to, from, bool2(from.x < to.x)); + float2 right = select(from, to, bool2(from.x < to.x)); + float2 window = fast::clamp(float2(from.x, to.x), float2(-0.5), float2(0.5)); + float offset = mix(window.x, window.y, 0.5) - left.x; + float t = offset / (right.x - left.x); + float y = mix(left.y, right.y, t); + float d = (right.y - left.y) / (right.x - left.x); + float dX = window.x - window.y; + return areaLUT.sample(areaLUTSmplr, (float2(y + 8.0, abs(d * dX)) / float2(16.0)), level(0.0)) * dX; +} + +static inline __attribute__((always_inline)) +float4 accumulateCoverageForFillList(thread int& fillIndex, thread const int2& tileSubCoord, const device bFills& v_148, thread texture2d uAreaLUT, thread const sampler uAreaLUTSmplr) +{ + float2 tileFragCoord = float2(tileSubCoord) + float2(0.5); + float4 coverages = float4(0.0); + int iteration = 0; + do + { + uint fillFrom = v_148.iFills[(fillIndex * 3) + 0]; + uint fillTo = v_148.iFills[(fillIndex * 3) + 1]; + float4 lineSegment = float4(float(fillFrom & 65535u), float(fillFrom >> uint(16)), float(fillTo & 65535u), float(fillTo >> uint(16))) / float4(256.0); + lineSegment -= tileFragCoord.xyxy; + float2 param = lineSegment.xy; + float2 param_1 = lineSegment.zw; + coverages += computeCoverage(param, param_1, uAreaLUT, uAreaLUTSmplr); + fillIndex = int(v_148.iFills[(fillIndex * 3) + 2]); + iteration++; + } while ((fillIndex >= 0) && (iteration < 1024)); + return coverages; +} + +static inline __attribute__((always_inline)) +int2 computeTileCoord(thread const uint& alphaTileIndex, thread uint3& gl_LocalInvocationID) +{ + uint x = alphaTileIndex & 255u; + uint y = (alphaTileIndex >> 8u) & (255u + (((alphaTileIndex >> 16u) & 255u) << 8u)); + return (int2(16, 4) * int2(int(x), int(y))) + int2(gl_LocalInvocationID.xy); +} + +kernel void main0(constant int2& uAlphaTileRange [[buffer(1)]], const device bFills& v_148 [[buffer(0)]], const device bAlphaTiles& _284 [[buffer(2)]], device bTiles& _294 [[buffer(3)]], texture2d uAreaLUT [[texture(0)]], texture2d uDest [[texture(1)]], sampler uAreaLUTSmplr [[sampler(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + int2 tileSubCoord = int2(gl_LocalInvocationID.xy) * int2(1, 4); + uint batchAlphaTileIndex = gl_WorkGroupID.x | (gl_WorkGroupID.y << uint(15)); + uint alphaTileIndex = batchAlphaTileIndex + uint(uAlphaTileRange.x); + if (alphaTileIndex >= uint(uAlphaTileRange.y)) + { + return; + } + uint tileIndex = _284.iAlphaTiles[(batchAlphaTileIndex * 2u) + 0u]; + if ((int(_294.iTiles[(tileIndex * 4u) + 2u] << uint(8)) >> 8) < 0) + { + return; + } + int fillIndex = int(_294.iTiles[(tileIndex * 4u) + 1u]); + int backdrop = int(_294.iTiles[(tileIndex * 4u) + 3u]) >> 24; + float4 coverages = float4(float(backdrop)); + int param = fillIndex; + int2 param_1 = tileSubCoord; + float4 _334 = accumulateCoverageForFillList(param, param_1, v_148, uAreaLUT, uAreaLUTSmplr); + coverages += _334; + coverages = fast::clamp(abs(coverages), float4(0.0), float4(1.0)); + int clipTileIndex = int(_284.iAlphaTiles[(batchAlphaTileIndex * 2u) + 1u]); + if (clipTileIndex >= 0) + { + uint param_2 = uint(clipTileIndex); + coverages = fast::min(coverages, uDest.read(uint2(computeTileCoord(param_2, gl_LocalInvocationID)))); + } + uint param_3 = alphaTileIndex; + uDest.write(coverages, uint2(computeTileCoord(param_3, gl_LocalInvocationID))); +} + diff --git a/resources/shaders/metal/d3d11/propagate.cs.metal b/resources/shaders/metal/d3d11/propagate.cs.metal new file mode 100644 index 00000000..9cb008c6 --- /dev/null +++ b/resources/shaders/metal/d3d11/propagate.cs.metal @@ -0,0 +1,184 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct bBackdrops +{ + int iBackdrops[1]; +}; + +struct bDrawMetadata +{ + uint4 iDrawMetadata[1]; +}; + +struct bClipMetadata +{ + uint4 iClipMetadata[1]; +}; + +struct bDrawTiles +{ + uint iDrawTiles[1]; +}; + +struct bClipTiles +{ + uint iClipTiles[1]; +}; + +struct bIndirectDrawParams +{ + uint iIndirectDrawParams[1]; +}; + +struct bAlphaTiles +{ + uint iAlphaTiles[1]; +}; + +struct bZBuffer +{ + int iZBuffer[1]; +}; + +struct bFirstTileMap +{ + int iFirstTileMap[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +static inline __attribute__((always_inline)) +uint calculateTileIndex(thread const uint& bufferOffset, thread const uint4& tileRect, thread const uint2& tileCoord) +{ + return (bufferOffset + (tileCoord.y * (tileRect.z - tileRect.x))) + tileCoord.x; +} + +kernel void main0(constant int& uColumnCount [[buffer(0)]], constant int& uFirstAlphaTileIndex [[buffer(8)]], constant int2& uFramebufferTileSize [[buffer(9)]], const device bBackdrops& _59 [[buffer(1)]], const device bDrawMetadata& _85 [[buffer(2)]], const device bClipMetadata& _126 [[buffer(3)]], device bDrawTiles& _175 [[buffer(4)]], device bClipTiles& _252 [[buffer(5)]], device bIndirectDrawParams& _303 [[buffer(6)]], device bAlphaTiles& _310 [[buffer(7)]], device bZBuffer& _381 [[buffer(10)]], device bFirstTileMap& _398 [[buffer(11)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint columnIndex = gl_GlobalInvocationID.x; + if (int(columnIndex) >= uColumnCount) + { + return; + } + int currentBackdrop = _59.iBackdrops[(columnIndex * 3u) + 0u]; + int tileX = _59.iBackdrops[(columnIndex * 3u) + 1u]; + uint drawPathIndex = uint(_59.iBackdrops[(columnIndex * 3u) + 2u]); + uint4 drawTileRect = _85.iDrawMetadata[(drawPathIndex * 3u) + 0u]; + uint4 drawOffsets = _85.iDrawMetadata[(drawPathIndex * 3u) + 1u]; + uint2 drawTileSize = drawTileRect.zw - drawTileRect.xy; + uint drawTileBufferOffset = drawOffsets.x; + bool zWrite = drawOffsets.z != 0u; + int clipPathIndex = int(drawOffsets.w); + uint4 clipTileRect = uint4(0u); + uint4 clipOffsets = uint4(0u); + if (clipPathIndex >= 0) + { + clipTileRect = _126.iClipMetadata[(clipPathIndex * 2) + 0]; + clipOffsets = _126.iClipMetadata[(clipPathIndex * 2) + 1]; + } + uint clipTileBufferOffset = clipOffsets.x; + uint clipBackdropOffset = clipOffsets.y; + for (uint tileY = 0u; tileY < drawTileSize.y; tileY++) + { + uint2 drawTileCoord = uint2(uint(tileX), tileY); + uint param = drawTileBufferOffset; + uint4 param_1 = drawTileRect; + uint2 param_2 = drawTileCoord; + uint drawTileIndex = calculateTileIndex(param, param_1, param_2); + int drawAlphaTileIndex = -1; + int clipAlphaTileIndex = -1; + int drawFirstFillIndex = int(_175.iDrawTiles[(drawTileIndex * 4u) + 1u]); + int drawBackdropDelta = int(_175.iDrawTiles[(drawTileIndex * 4u) + 2u]) >> 24; + uint drawTileWord = _175.iDrawTiles[(drawTileIndex * 4u) + 3u] & 16777215u; + int drawTileBackdrop = currentBackdrop; + bool haveDrawAlphaMask = drawFirstFillIndex >= 0; + bool needNewAlphaTile = haveDrawAlphaMask; + if (clipPathIndex >= 0) + { + uint2 tileCoord = drawTileCoord + drawTileRect.xy; + if (all(bool4(tileCoord >= clipTileRect.xy, tileCoord < clipTileRect.zw))) + { + uint2 clipTileCoord = tileCoord - clipTileRect.xy; + uint param_3 = clipTileBufferOffset; + uint4 param_4 = clipTileRect; + uint2 param_5 = clipTileCoord; + uint clipTileIndex = calculateTileIndex(param_3, param_4, param_5); + int thisClipAlphaTileIndex = int(_252.iClipTiles[(clipTileIndex * 4u) + 2u] << uint(8)) >> 8; + uint clipTileWord = _252.iClipTiles[(clipTileIndex * 4u) + 3u]; + int clipTileBackdrop = int(clipTileWord) >> 24; + if (thisClipAlphaTileIndex >= 0) + { + if (haveDrawAlphaMask) + { + clipAlphaTileIndex = thisClipAlphaTileIndex; + needNewAlphaTile = true; + } + else + { + if (drawTileBackdrop != 0) + { + drawAlphaTileIndex = thisClipAlphaTileIndex; + clipAlphaTileIndex = -1; + needNewAlphaTile = false; + } + else + { + drawAlphaTileIndex = -1; + clipAlphaTileIndex = -1; + needNewAlphaTile = false; + } + } + } + else + { + if (clipTileBackdrop == 0) + { + drawTileBackdrop = 0; + needNewAlphaTile = false; + } + else + { + needNewAlphaTile = true; + } + } + } + else + { + drawTileBackdrop = 0; + needNewAlphaTile = false; + } + } + if (needNewAlphaTile) + { + uint _306 = atomic_fetch_add_explicit((device atomic_uint*)&_303.iIndirectDrawParams[4], 1u, memory_order_relaxed); + uint drawBatchAlphaTileIndex = _306; + _310.iAlphaTiles[(drawBatchAlphaTileIndex * 2u) + 0u] = drawTileIndex; + _310.iAlphaTiles[(drawBatchAlphaTileIndex * 2u) + 1u] = uint(clipAlphaTileIndex); + drawAlphaTileIndex = int(drawBatchAlphaTileIndex) + uFirstAlphaTileIndex; + } + _175.iDrawTiles[(drawTileIndex * 4u) + 2u] = (uint(drawAlphaTileIndex) & 16777215u) | (uint(drawBackdropDelta) << uint(24)); + _175.iDrawTiles[(drawTileIndex * 4u) + 3u] = drawTileWord | (uint(drawTileBackdrop) << uint(24)); + int2 tileCoord_1 = int2(tileX, int(tileY)) + int2(drawTileRect.xy); + int tileMapIndex = (tileCoord_1.y * uFramebufferTileSize.x) + tileCoord_1.x; + if ((zWrite && (drawTileBackdrop != 0)) && (drawAlphaTileIndex < 0)) + { + int _386 = atomic_fetch_max_explicit((device atomic_int*)&_381.iZBuffer[tileMapIndex], int(drawTileIndex), memory_order_relaxed); + } + if ((drawTileBackdrop != 0) || (drawAlphaTileIndex >= 0)) + { + int _403 = atomic_exchange_explicit((device atomic_int*)&_398.iFirstTileMap[tileMapIndex], int(drawTileIndex), memory_order_relaxed); + int nextTileIndex = _403; + _175.iDrawTiles[(drawTileIndex * 4u) + 0u] = uint(nextTileIndex); + } + currentBackdrop += drawBackdropDelta; + } +} + diff --git a/resources/shaders/metal/d3d11/sort.cs.metal b/resources/shaders/metal/d3d11/sort.cs.metal new file mode 100644 index 00000000..ae01b505 --- /dev/null +++ b/resources/shaders/metal/d3d11/sort.cs.metal @@ -0,0 +1,94 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct bFirstTileMap +{ + int iFirstTileMap[1]; +}; + +struct bTiles +{ + uint iTiles[1]; +}; + +struct bZBuffer +{ + int iZBuffer[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +static inline __attribute__((always_inline)) +int getFirst(thread const uint& globalTileIndex, device bFirstTileMap& v_26) +{ + return v_26.iFirstTileMap[globalTileIndex]; +} + +static inline __attribute__((always_inline)) +int getNextTile(thread const int& tileIndex, device bTiles& v_37) +{ + return int(v_37.iTiles[(tileIndex * 4) + 0]); +} + +static inline __attribute__((always_inline)) +void setNextTile(thread const int& tileIndex, thread const int& newNextTileIndex, device bTiles& v_37) +{ + v_37.iTiles[(tileIndex * 4) + 0] = uint(newNextTileIndex); +} + +kernel void main0(constant int& uTileCount [[buffer(2)]], device bFirstTileMap& v_26 [[buffer(0)]], device bTiles& v_37 [[buffer(1)]], const device bZBuffer& _76 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint globalTileIndex = gl_GlobalInvocationID.x; + if (globalTileIndex >= uint(uTileCount)) + { + return; + } + int zValue = _76.iZBuffer[globalTileIndex]; + uint param = globalTileIndex; + int unsortedFirstTileIndex = getFirst(param, v_26); + int sortedFirstTileIndex = -1; + while (unsortedFirstTileIndex >= 0) + { + int currentTileIndex = unsortedFirstTileIndex; + int param_1 = currentTileIndex; + unsortedFirstTileIndex = getNextTile(param_1, v_37); + if (currentTileIndex >= zValue) + { + int prevTrialTileIndex = -1; + int trialTileIndex = sortedFirstTileIndex; + while (true) + { + if ((trialTileIndex < 0) || (currentTileIndex < trialTileIndex)) + { + if (prevTrialTileIndex < 0) + { + int param_2 = currentTileIndex; + int param_3 = sortedFirstTileIndex; + setNextTile(param_2, param_3, v_37); + sortedFirstTileIndex = currentTileIndex; + } + else + { + int param_4 = currentTileIndex; + int param_5 = trialTileIndex; + setNextTile(param_4, param_5, v_37); + int param_6 = prevTrialTileIndex; + int param_7 = currentTileIndex; + setNextTile(param_6, param_7, v_37); + } + break; + } + prevTrialTileIndex = trialTileIndex; + int param_8 = trialTileIndex; + trialTileIndex = getNextTile(param_8, v_37); + } + } + } + v_26.iFirstTileMap[globalTileIndex] = sortedFirstTileIndex; +} + diff --git a/resources/shaders/metal/d3d11/tile.cs.metal b/resources/shaders/metal/d3d11/tile.cs.metal new file mode 100644 index 00000000..27f98375 --- /dev/null +++ b/resources/shaders/metal/d3d11/tile.cs.metal @@ -0,0 +1,737 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct bFirstTileMap +{ + int iFirstTileMap[1]; +}; + +struct bTiles +{ + uint iTiles[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(16u, 4u, 1u); + +constant float3 _1082 = {}; + +// Implementation of the GLSL mod() function, which is slightly different than Metal fmod() +template +inline Tx mod(Tx x, Ty y) +{ + return x - y * floor(x / y); +} + +static inline __attribute__((always_inline)) +int2 toImageCoords(thread const int2& coords, thread float2 uFramebufferSize) +{ + return int2(coords.x, int(uFramebufferSize.y - float(coords.y))); +} + +static inline __attribute__((always_inline)) +float4 fetchUnscaled(thread const texture2d srcTexture, thread const sampler srcTextureSmplr, thread const float2& scale, thread const float2& originCoord, thread const int& entry) +{ + return srcTexture.sample(srcTextureSmplr, (((originCoord + float2(0.5)) + float2(float(entry), 0.0)) * scale), level(0.0)); +} + +static inline __attribute__((always_inline)) +void computeTileVaryings(thread const float2& position, thread const int& colorEntry, thread const texture2d textureMetadata, thread const sampler textureMetadataSmplr, thread const int2& textureMetadataSize, thread float2& outColorTexCoord0, thread float4& outBaseColor, thread float4& outFilterParams0, thread float4& outFilterParams1, thread float4& outFilterParams2, thread int& outCtrl) +{ + float2 metadataScale = float2(1.0) / float2(textureMetadataSize); + float2 metadataEntryCoord = float2(float((colorEntry % 128) * 8), float(colorEntry / 128)); + float2 param = metadataScale; + float2 param_1 = metadataEntryCoord; + int param_2 = 0; + float4 colorTexMatrix0 = fetchUnscaled(textureMetadata, textureMetadataSmplr, param, param_1, param_2); + float2 param_3 = metadataScale; + float2 param_4 = metadataEntryCoord; + int param_5 = 1; + float4 colorTexOffsets = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_3, param_4, param_5); + float2 param_6 = metadataScale; + float2 param_7 = metadataEntryCoord; + int param_8 = 2; + float4 baseColor = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_6, param_7, param_8); + float2 param_9 = metadataScale; + float2 param_10 = metadataEntryCoord; + int param_11 = 3; + float4 filterParams0 = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_9, param_10, param_11); + float2 param_12 = metadataScale; + float2 param_13 = metadataEntryCoord; + int param_14 = 4; + float4 filterParams1 = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_12, param_13, param_14); + float2 param_15 = metadataScale; + float2 param_16 = metadataEntryCoord; + int param_17 = 5; + float4 filterParams2 = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_15, param_16, param_17); + float2 param_18 = metadataScale; + float2 param_19 = metadataEntryCoord; + int param_20 = 6; + float4 extra = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_18, param_19, param_20); + outColorTexCoord0 = (float2x2(float2(colorTexMatrix0.xy), float2(colorTexMatrix0.zw)) * position) + colorTexOffsets.xy; + outBaseColor = baseColor; + outFilterParams0 = filterParams0; + outFilterParams1 = filterParams1; + outFilterParams2 = filterParams2; + outCtrl = int(extra.x); +} + +static inline __attribute__((always_inline)) +float sampleMask(thread const float& maskAlpha, thread const texture2d maskTexture, thread const sampler maskTextureSmplr, thread const float2& maskTextureSize, thread const float3& maskTexCoord, thread const int& maskCtrl) +{ + if (maskCtrl == 0) + { + return maskAlpha; + } + int2 maskTexCoordI = int2(floor(maskTexCoord.xy)); + float4 texel = maskTexture.sample(maskTextureSmplr, ((float2(maskTexCoordI / int2(1, 4)) + float2(0.5)) / maskTextureSize), level(0.0)); + float coverage = texel[maskTexCoordI.y % 4] + maskTexCoord.z; + if ((maskCtrl & 1) != 0) + { + coverage = abs(coverage); + } + else + { + coverage = 1.0 - abs(1.0 - mod(coverage, 2.0)); + } + return fast::min(maskAlpha, coverage); +} + +static inline __attribute__((always_inline)) +float4 filterRadialGradient(thread const float2& colorTexCoord, thread const texture2d colorTexture, thread const sampler colorTextureSmplr, thread const float2& colorTextureSize, thread const float2& fragCoord, thread const float2& framebufferSize, thread const float4& filterParams0, thread const float4& filterParams1) +{ + float2 lineFrom = filterParams0.xy; + float2 lineVector = filterParams0.zw; + float2 radii = filterParams1.xy; + float2 uvOrigin = filterParams1.zw; + float2 dP = colorTexCoord - lineFrom; + float2 dC = lineVector; + float dR = radii.y - radii.x; + float a = dot(dC, dC) - (dR * dR); + float b = dot(dP, dC) + (radii.x * dR); + float c = dot(dP, dP) - (radii.x * radii.x); + float discrim = (b * b) - (a * c); + float4 color = float4(0.0); + if (abs(discrim) >= 9.9999997473787516355514526367188e-06) + { + float2 ts = float2((float2(1.0, -1.0) * sqrt(discrim)) + float2(b)) / float2(a); + if (ts.x > ts.y) + { + ts = ts.yx; + } + float _595; + if (ts.x >= 0.0) + { + _595 = ts.x; + } + else + { + _595 = ts.y; + } + float t = _595; + color = colorTexture.sample(colorTextureSmplr, (uvOrigin + float2(fast::clamp(t, 0.0, 1.0), 0.0)), level(0.0)); + } + return color; +} + +static inline __attribute__((always_inline)) +float4 filterBlur(thread const float2& colorTexCoord, thread const texture2d colorTexture, thread const sampler colorTextureSmplr, thread const float2& colorTextureSize, thread const float4& filterParams0, thread const float4& filterParams1) +{ + float2 srcOffsetScale = filterParams0.xy / colorTextureSize; + int support = int(filterParams0.z); + float3 gaussCoeff = filterParams1.xyz; + float gaussSum = gaussCoeff.x; + float4 color = colorTexture.sample(colorTextureSmplr, colorTexCoord, level(0.0)) * gaussCoeff.x; + float2 _640 = gaussCoeff.xy * gaussCoeff.yz; + gaussCoeff = float3(_640.x, _640.y, gaussCoeff.z); + for (int i = 1; i <= support; i += 2) + { + float gaussPartialSum = gaussCoeff.x; + float2 _660 = gaussCoeff.xy * gaussCoeff.yz; + gaussCoeff = float3(_660.x, _660.y, gaussCoeff.z); + gaussPartialSum += gaussCoeff.x; + float2 srcOffset = srcOffsetScale * (float(i) + (gaussCoeff.x / gaussPartialSum)); + color += ((colorTexture.sample(colorTextureSmplr, (colorTexCoord - srcOffset), level(0.0)) + colorTexture.sample(colorTextureSmplr, (colorTexCoord + srcOffset), level(0.0))) * gaussPartialSum); + gaussSum += (2.0 * gaussPartialSum); + float2 _700 = gaussCoeff.xy * gaussCoeff.yz; + gaussCoeff = float3(_700.x, _700.y, gaussCoeff.z); + } + return color / float4(gaussSum); +} + +static inline __attribute__((always_inline)) +float filterTextSample1Tap(thread const float& offset, thread const texture2d colorTexture, thread const sampler colorTextureSmplr, thread const float2& colorTexCoord) +{ + return colorTexture.sample(colorTextureSmplr, (colorTexCoord + float2(offset, 0.0)), level(0.0)).x; +} + +static inline __attribute__((always_inline)) +void filterTextSample9Tap(thread float4& outAlphaLeft, thread float& outAlphaCenter, thread float4& outAlphaRight, thread const texture2d colorTexture, thread const sampler colorTextureSmplr, thread const float2& colorTexCoord, thread const float4& kernel0, thread const float& onePixel) +{ + bool wide = kernel0.x > 0.0; + float _276; + if (wide) + { + float param = (-4.0) * onePixel; + float2 param_1 = colorTexCoord; + _276 = filterTextSample1Tap(param, colorTexture, colorTextureSmplr, param_1); + } + else + { + _276 = 0.0; + } + float param_2 = (-3.0) * onePixel; + float2 param_3 = colorTexCoord; + float param_4 = (-2.0) * onePixel; + float2 param_5 = colorTexCoord; + float param_6 = (-1.0) * onePixel; + float2 param_7 = colorTexCoord; + outAlphaLeft = float4(_276, filterTextSample1Tap(param_2, colorTexture, colorTextureSmplr, param_3), filterTextSample1Tap(param_4, colorTexture, colorTextureSmplr, param_5), filterTextSample1Tap(param_6, colorTexture, colorTextureSmplr, param_7)); + float param_8 = 0.0; + float2 param_9 = colorTexCoord; + outAlphaCenter = filterTextSample1Tap(param_8, colorTexture, colorTextureSmplr, param_9); + float param_10 = 1.0 * onePixel; + float2 param_11 = colorTexCoord; + float param_12 = 2.0 * onePixel; + float2 param_13 = colorTexCoord; + float param_14 = 3.0 * onePixel; + float2 param_15 = colorTexCoord; + float _336; + if (wide) + { + float param_16 = 4.0 * onePixel; + float2 param_17 = colorTexCoord; + _336 = filterTextSample1Tap(param_16, colorTexture, colorTextureSmplr, param_17); + } + else + { + _336 = 0.0; + } + outAlphaRight = float4(filterTextSample1Tap(param_10, colorTexture, colorTextureSmplr, param_11), filterTextSample1Tap(param_12, colorTexture, colorTextureSmplr, param_13), filterTextSample1Tap(param_14, colorTexture, colorTextureSmplr, param_15), _336); +} + +static inline __attribute__((always_inline)) +float filterTextConvolve7Tap(thread const float4& alpha0, thread const float3& alpha1, thread const float4& kernel0) +{ + return dot(alpha0, kernel0) + dot(alpha1, kernel0.zyx); +} + +static inline __attribute__((always_inline)) +float filterTextGammaCorrectChannel(thread const float& bgColor, thread const float& fgColor, thread const texture2d gammaLUT, thread const sampler gammaLUTSmplr) +{ + return gammaLUT.sample(gammaLUTSmplr, float2(fgColor, 1.0 - bgColor), level(0.0)).x; +} + +static inline __attribute__((always_inline)) +float3 filterTextGammaCorrect(thread const float3& bgColor, thread const float3& fgColor, thread const texture2d gammaLUT, thread const sampler gammaLUTSmplr) +{ + float param = bgColor.x; + float param_1 = fgColor.x; + float param_2 = bgColor.y; + float param_3 = fgColor.y; + float param_4 = bgColor.z; + float param_5 = fgColor.z; + return float3(filterTextGammaCorrectChannel(param, param_1, gammaLUT, gammaLUTSmplr), filterTextGammaCorrectChannel(param_2, param_3, gammaLUT, gammaLUTSmplr), filterTextGammaCorrectChannel(param_4, param_5, gammaLUT, gammaLUTSmplr)); +} + +static inline __attribute__((always_inline)) +float4 filterText(thread const float2& colorTexCoord, thread const texture2d colorTexture, thread const sampler colorTextureSmplr, thread const texture2d gammaLUT, thread const sampler gammaLUTSmplr, thread const float2& colorTextureSize, thread const float4& filterParams0, thread const float4& filterParams1, thread const float4& filterParams2) +{ + float4 kernel0 = filterParams0; + float3 bgColor = filterParams1.xyz; + float3 fgColor = filterParams2.xyz; + bool gammaCorrectionEnabled = filterParams2.w != 0.0; + float3 alpha; + if (kernel0.w == 0.0) + { + alpha = colorTexture.sample(colorTextureSmplr, colorTexCoord, level(0.0)).xxx; + } + else + { + float2 param_3 = colorTexCoord; + float4 param_4 = kernel0; + float param_5 = 1.0 / colorTextureSize.x; + float4 param; + float param_1; + float4 param_2; + filterTextSample9Tap(param, param_1, param_2, colorTexture, colorTextureSmplr, param_3, param_4, param_5); + float4 alphaLeft = param; + float alphaCenter = param_1; + float4 alphaRight = param_2; + float4 param_6 = alphaLeft; + float3 param_7 = float3(alphaCenter, alphaRight.xy); + float4 param_8 = kernel0; + float r = filterTextConvolve7Tap(param_6, param_7, param_8); + float4 param_9 = float4(alphaLeft.yzw, alphaCenter); + float3 param_10 = alphaRight.xyz; + float4 param_11 = kernel0; + float g = filterTextConvolve7Tap(param_9, param_10, param_11); + float4 param_12 = float4(alphaLeft.zw, alphaCenter, alphaRight.x); + float3 param_13 = alphaRight.yzw; + float4 param_14 = kernel0; + float b = filterTextConvolve7Tap(param_12, param_13, param_14); + alpha = float3(r, g, b); + } + if (gammaCorrectionEnabled) + { + float3 param_15 = bgColor; + float3 param_16 = alpha; + alpha = filterTextGammaCorrect(param_15, param_16, gammaLUT, gammaLUTSmplr); + } + return float4(mix(bgColor, fgColor, alpha), 1.0); +} + +static inline __attribute__((always_inline)) +float4 sampleColor(thread const texture2d colorTexture, thread const sampler colorTextureSmplr, thread const float2& colorTexCoord) +{ + return colorTexture.sample(colorTextureSmplr, colorTexCoord, level(0.0)); +} + +static inline __attribute__((always_inline)) +float4 filterNone(thread const float2& colorTexCoord, thread const texture2d colorTexture, thread const sampler colorTextureSmplr) +{ + float2 param = colorTexCoord; + return sampleColor(colorTexture, colorTextureSmplr, param); +} + +static inline __attribute__((always_inline)) +float4 filterColor(thread const float2& colorTexCoord, thread const texture2d colorTexture, thread const sampler colorTextureSmplr, thread const texture2d gammaLUT, thread const sampler gammaLUTSmplr, thread const float2& colorTextureSize, thread const float2& fragCoord, thread const float2& framebufferSize, thread const float4& filterParams0, thread const float4& filterParams1, thread const float4& filterParams2, thread const int& colorFilter) +{ + switch (colorFilter) + { + case 1: + { + float2 param = colorTexCoord; + float2 param_1 = colorTextureSize; + float2 param_2 = fragCoord; + float2 param_3 = framebufferSize; + float4 param_4 = filterParams0; + float4 param_5 = filterParams1; + return filterRadialGradient(param, colorTexture, colorTextureSmplr, param_1, param_2, param_3, param_4, param_5); + } + case 3: + { + float2 param_6 = colorTexCoord; + float2 param_7 = colorTextureSize; + float4 param_8 = filterParams0; + float4 param_9 = filterParams1; + return filterBlur(param_6, colorTexture, colorTextureSmplr, param_7, param_8, param_9); + } + case 2: + { + float2 param_10 = colorTexCoord; + float2 param_11 = colorTextureSize; + float4 param_12 = filterParams0; + float4 param_13 = filterParams1; + float4 param_14 = filterParams2; + return filterText(param_10, colorTexture, colorTextureSmplr, gammaLUT, gammaLUTSmplr, param_11, param_12, param_13, param_14); + } + } + float2 param_15 = colorTexCoord; + return filterNone(param_15, colorTexture, colorTextureSmplr); +} + +static inline __attribute__((always_inline)) +float4 combineColor0(thread const float4& destColor, thread const float4& srcColor, thread const int& op) +{ + switch (op) + { + case 1: + { + return float4(srcColor.xyz, srcColor.w * destColor.w); + } + case 2: + { + return float4(destColor.xyz, srcColor.w * destColor.w); + } + } + return destColor; +} + +static inline __attribute__((always_inline)) +float3 compositeScreen(thread const float3& destColor, thread const float3& srcColor) +{ + return (destColor + srcColor) - (destColor * srcColor); +} + +static inline __attribute__((always_inline)) +float3 compositeSelect(thread const bool3& cond, thread const float3& ifTrue, thread const float3& ifFalse) +{ + float _766; + if (cond.x) + { + _766 = ifTrue.x; + } + else + { + _766 = ifFalse.x; + } + float _777; + if (cond.y) + { + _777 = ifTrue.y; + } + else + { + _777 = ifFalse.y; + } + float _788; + if (cond.z) + { + _788 = ifTrue.z; + } + else + { + _788 = ifFalse.z; + } + return float3(_766, _777, _788); +} + +static inline __attribute__((always_inline)) +float3 compositeHardLight(thread const float3& destColor, thread const float3& srcColor) +{ + float3 param = destColor; + float3 param_1 = (float3(2.0) * srcColor) - float3(1.0); + bool3 param_2 = srcColor <= float3(0.5); + float3 param_3 = (destColor * float3(2.0)) * srcColor; + float3 param_4 = compositeScreen(param, param_1); + return compositeSelect(param_2, param_3, param_4); +} + +static inline __attribute__((always_inline)) +float3 compositeColorDodge(thread const float3& destColor, thread const float3& srcColor) +{ + bool3 destZero = destColor == float3(0.0); + bool3 srcOne = srcColor == float3(1.0); + bool3 param = srcOne; + float3 param_1 = float3(1.0); + float3 param_2 = destColor / (float3(1.0) - srcColor); + bool3 param_3 = destZero; + float3 param_4 = float3(0.0); + float3 param_5 = compositeSelect(param, param_1, param_2); + return compositeSelect(param_3, param_4, param_5); +} + +static inline __attribute__((always_inline)) +float3 compositeSoftLight(thread const float3& destColor, thread const float3& srcColor) +{ + bool3 param = destColor <= float3(0.25); + float3 param_1 = ((((float3(16.0) * destColor) - float3(12.0)) * destColor) + float3(4.0)) * destColor; + float3 param_2 = sqrt(destColor); + float3 darkenedDestColor = compositeSelect(param, param_1, param_2); + bool3 param_3 = srcColor <= float3(0.5); + float3 param_4 = destColor * (float3(1.0) - destColor); + float3 param_5 = darkenedDestColor - destColor; + float3 factor = compositeSelect(param_3, param_4, param_5); + return destColor + (((srcColor * 2.0) - float3(1.0)) * factor); +} + +static inline __attribute__((always_inline)) +float compositeDivide(thread const float& num, thread const float& denom) +{ + float _802; + if (denom != 0.0) + { + _802 = num / denom; + } + else + { + _802 = 0.0; + } + return _802; +} + +static inline __attribute__((always_inline)) +float3 compositeRGBToHSL(thread const float3& rgb) +{ + float v = fast::max(fast::max(rgb.x, rgb.y), rgb.z); + float xMin = fast::min(fast::min(rgb.x, rgb.y), rgb.z); + float c = v - xMin; + float l = mix(xMin, v, 0.5); + float3 _908; + if (rgb.x == v) + { + _908 = float3(0.0, rgb.yz); + } + else + { + float3 _921; + if (rgb.y == v) + { + _921 = float3(2.0, rgb.zx); + } + else + { + _921 = float3(4.0, rgb.xy); + } + _908 = _921; + } + float3 terms = _908; + float param = ((terms.x * c) + terms.y) - terms.z; + float param_1 = c; + float h = 1.0471975803375244140625 * compositeDivide(param, param_1); + float param_2 = c; + float param_3 = v; + float s = compositeDivide(param_2, param_3); + return float3(h, s, l); +} + +static inline __attribute__((always_inline)) +float3 compositeHSL(thread const float3& destColor, thread const float3& srcColor, thread const int& op) +{ + switch (op) + { + case 12: + { + return float3(srcColor.x, destColor.y, destColor.z); + } + case 13: + { + return float3(destColor.x, srcColor.y, destColor.z); + } + case 14: + { + return float3(srcColor.x, srcColor.y, destColor.z); + } + default: + { + return float3(destColor.x, destColor.y, srcColor.z); + } + } +} + +static inline __attribute__((always_inline)) +float3 compositeHSLToRGB(thread const float3& hsl) +{ + float a = hsl.y * fast::min(hsl.z, 1.0 - hsl.z); + float3 ks = mod(float3(0.0, 8.0, 4.0) + float3(hsl.x * 1.90985929965972900390625), float3(12.0)); + return hsl.zzz - (fast::clamp(fast::min(ks - float3(3.0), float3(9.0) - ks), float3(-1.0), float3(1.0)) * a); +} + +static inline __attribute__((always_inline)) +float3 compositeRGB(thread const float3& destColor, thread const float3& srcColor, thread const int& op) +{ + switch (op) + { + case 1: + { + return destColor * srcColor; + } + case 2: + { + float3 param = destColor; + float3 param_1 = srcColor; + return compositeScreen(param, param_1); + } + case 3: + { + float3 param_2 = srcColor; + float3 param_3 = destColor; + return compositeHardLight(param_2, param_3); + } + case 4: + { + return fast::min(destColor, srcColor); + } + case 5: + { + return fast::max(destColor, srcColor); + } + case 6: + { + float3 param_4 = destColor; + float3 param_5 = srcColor; + return compositeColorDodge(param_4, param_5); + } + case 7: + { + float3 param_6 = float3(1.0) - destColor; + float3 param_7 = float3(1.0) - srcColor; + return float3(1.0) - compositeColorDodge(param_6, param_7); + } + case 8: + { + float3 param_8 = destColor; + float3 param_9 = srcColor; + return compositeHardLight(param_8, param_9); + } + case 9: + { + float3 param_10 = destColor; + float3 param_11 = srcColor; + return compositeSoftLight(param_10, param_11); + } + case 10: + { + return abs(destColor - srcColor); + } + case 11: + { + return (destColor + srcColor) - ((float3(2.0) * destColor) * srcColor); + } + case 12: + case 13: + case 14: + case 15: + { + float3 param_12 = destColor; + float3 param_13 = srcColor; + float3 param_14 = compositeRGBToHSL(param_12); + float3 param_15 = compositeRGBToHSL(param_13); + int param_16 = op; + float3 param_17 = compositeHSL(param_14, param_15, param_16); + return compositeHSLToRGB(param_17); + } + } + return srcColor; +} + +static inline __attribute__((always_inline)) +float4 composite(thread const float4& srcColor, thread const texture2d destTexture, thread const sampler destTextureSmplr, thread const float2& destTextureSize, thread const float2& fragCoord, thread const int& op) +{ + if (op == 0) + { + return srcColor; + } + float2 destTexCoord = fragCoord / destTextureSize; + float4 destColor = destTexture.sample(destTextureSmplr, destTexCoord, level(0.0)); + float3 param = destColor.xyz; + float3 param_1 = srcColor.xyz; + int param_2 = op; + float3 blendedRGB = compositeRGB(param, param_1, param_2); + return float4(((srcColor.xyz * (srcColor.w * (1.0 - destColor.w))) + (blendedRGB * (srcColor.w * destColor.w))) + (destColor.xyz * (1.0 - srcColor.w)), 1.0); +} + +static inline __attribute__((always_inline)) +float4 calculateColor(thread const float2& fragCoord, thread const texture2d colorTexture0, thread const sampler colorTexture0Smplr, thread const texture2d maskTexture0, thread const sampler maskTexture0Smplr, thread const texture2d destTexture, thread const sampler destTextureSmplr, thread const texture2d gammaLUT, thread const sampler gammaLUTSmplr, thread const float2& colorTextureSize0, thread const float2& maskTextureSize0, thread const float4& filterParams0, thread const float4& filterParams1, thread const float4& filterParams2, thread const float2& framebufferSize, thread const int& ctrl, thread const float3& maskTexCoord0, thread const float2& colorTexCoord0, thread const float4& baseColor, thread const int& tileCtrl) +{ + int maskCtrl0 = (tileCtrl >> 0) & 3; + float maskAlpha = 1.0; + float param = maskAlpha; + float2 param_1 = maskTextureSize0; + float3 param_2 = maskTexCoord0; + int param_3 = maskCtrl0; + maskAlpha = sampleMask(param, maskTexture0, maskTexture0Smplr, param_1, param_2, param_3); + float4 color = baseColor; + int color0Combine = (ctrl >> 6) & 3; + if (color0Combine != 0) + { + int color0Filter = (ctrl >> 4) & 3; + float2 param_4 = colorTexCoord0; + float2 param_5 = colorTextureSize0; + float2 param_6 = fragCoord; + float2 param_7 = framebufferSize; + float4 param_8 = filterParams0; + float4 param_9 = filterParams1; + float4 param_10 = filterParams2; + int param_11 = color0Filter; + float4 color0 = filterColor(param_4, colorTexture0, colorTexture0Smplr, gammaLUT, gammaLUTSmplr, param_5, param_6, param_7, param_8, param_9, param_10, param_11); + float4 param_12 = color; + float4 param_13 = color0; + int param_14 = color0Combine; + color = combineColor0(param_12, param_13, param_14); + } + color.w *= maskAlpha; + int compositeOp = (ctrl >> 8) & 15; + float4 param_15 = color; + float2 param_16 = framebufferSize; + float2 param_17 = fragCoord; + int param_18 = compositeOp; + color = composite(param_15, destTexture, destTextureSmplr, param_16, param_17, param_18); + float3 _1364 = color.xyz * color.w; + color = float4(_1364.x, _1364.y, _1364.z, color.w); + return color; +} + +kernel void main0(constant int2& uFramebufferTileSize [[buffer(3)]], constant int& uLoadAction [[buffer(4)]], constant int2& uTextureMetadataSize [[buffer(7)]], constant float2& uFramebufferSize [[buffer(0)]], constant float2& uTileSize [[buffer(1)]], constant float4& uClearColor [[buffer(5)]], constant float2& uColorTextureSize0 [[buffer(8)]], constant float2& uMaskTextureSize0 [[buffer(9)]], const device bFirstTileMap& _1510 [[buffer(2)]], const device bTiles& _1603 [[buffer(6)]], texture2d uDestImage [[texture(0)]], texture2d uTextureMetadata [[texture(1)]], texture2d uColorTexture0 [[texture(2)]], texture2d uMaskTexture0 [[texture(3)]], texture2d uDestTexture [[texture(4)]], texture2d uGammaLUT [[texture(5)]], sampler uTextureMetadataSmplr [[sampler(0)]], sampler uColorTexture0Smplr [[sampler(1)]], sampler uMaskTexture0Smplr [[sampler(2)]], sampler uDestTextureSmplr [[sampler(3)]], sampler uGammaLUTSmplr [[sampler(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +{ + int2 tileCoord = int2(gl_WorkGroupID.xy); + int2 firstTileSubCoord = int2(gl_LocalInvocationID.xy) * int2(1, 4); + int2 firstFragCoord = (tileCoord * int2(uTileSize)) + firstTileSubCoord; + int tileIndex = _1510.iFirstTileMap[tileCoord.x + (uFramebufferTileSize.x * tileCoord.y)]; + if ((tileIndex < 0) && (uLoadAction != 0)) + { + return; + } + float4x4 destColors; + for (int subY = 0; subY < 4; subY++) + { + if (uLoadAction == 0) + { + destColors[subY] = uClearColor; + } + else + { + int2 param = firstFragCoord + int2(0, subY); + int2 imageCoords = toImageCoords(param, uFramebufferSize); + destColors[subY] = uDestImage.read(uint2(imageCoords)); + } + } + int backdrop; + uint2 maskTileCoord; + float2 param_4; + float4 param_5; + float4 param_6; + float4 param_7; + float4 param_8; + int param_9; + while (tileIndex >= 0) + { + for (int subY_1 = 0; subY_1 < 4; subY_1++) + { + int2 tileSubCoord = firstTileSubCoord + int2(0, subY_1); + float2 fragCoord = float2(firstFragCoord + int2(0, subY_1)) + float2(0.5); + int alphaTileIndex = int(_1603.iTiles[(tileIndex * 4) + 2] << uint(8)) >> 8; + uint tileControlWord = _1603.iTiles[(tileIndex * 4) + 3]; + uint colorEntry = tileControlWord & 65535u; + int tileCtrl = int((tileControlWord >> uint(16)) & 255u); + if (alphaTileIndex >= 0) + { + backdrop = 0; + maskTileCoord = uint2(uint(alphaTileIndex & 255), uint(alphaTileIndex >> 8)) * uint2(uTileSize); + } + else + { + backdrop = int(tileControlWord) >> 24; + maskTileCoord = uint2(0u); + tileCtrl &= (-4); + } + float3 maskTexCoord0 = float3(float2(int2(maskTileCoord) + tileSubCoord), float(backdrop)); + float2 param_1 = fragCoord; + int param_2 = int(colorEntry); + int2 param_3 = uTextureMetadataSize; + computeTileVaryings(param_1, param_2, uTextureMetadata, uTextureMetadataSmplr, param_3, param_4, param_5, param_6, param_7, param_8, param_9); + float2 colorTexCoord0 = param_4; + float4 baseColor = param_5; + float4 filterParams0 = param_6; + float4 filterParams1 = param_7; + float4 filterParams2 = param_8; + int ctrl = param_9; + float2 param_10 = fragCoord; + float2 param_11 = uColorTextureSize0; + float2 param_12 = uMaskTextureSize0; + float4 param_13 = filterParams0; + float4 param_14 = filterParams1; + float4 param_15 = filterParams2; + float2 param_16 = uFramebufferSize; + int param_17 = ctrl; + float3 param_18 = maskTexCoord0; + float2 param_19 = colorTexCoord0; + float4 param_20 = baseColor; + int param_21 = tileCtrl; + float4 srcColor = calculateColor(param_10, uColorTexture0, uColorTexture0Smplr, uMaskTexture0, uMaskTexture0Smplr, uDestTexture, uDestTextureSmplr, uGammaLUT, uGammaLUTSmplr, param_11, param_12, param_13, param_14, param_15, param_16, param_17, param_18, param_19, param_20, param_21); + destColors[subY_1] = (destColors[subY_1] * (1.0 - srcColor.w)) + srcColor; + } + tileIndex = int(_1603.iTiles[(tileIndex * 4) + 0]); + } + for (int subY_2 = 0; subY_2 < 4; subY_2++) + { + int2 param_22 = firstFragCoord + int2(0, subY_2); + uDestImage.write(destColors[subY_2], uint2(toImageCoords(param_22, uFramebufferSize))); + } +} + diff --git a/shaders/d3d11/bin.cs.glsl b/shaders/d3d11/bin.cs.glsl new file mode 100644 index 00000000..d34260eb --- /dev/null +++ b/shaders/d3d11/bin.cs.glsl @@ -0,0 +1,256 @@ +#version 430 + +// pathfinder/shaders/bin.cs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Assigns microlines to tiles. + +#extension GL_GOOGLE_include_directive : enable + +#define MAX_ITERATIONS 1024u + +#define STEP_DIRECTION_NONE 0 +#define STEP_DIRECTION_X 1 +#define STEP_DIRECTION_Y 2 + +#define TILE_FIELD_NEXT_TILE_ID 0 +#define TILE_FIELD_FIRST_FILL_ID 1 +#define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2 +#define TILE_FIELD_CONTROL 3 + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +layout(local_size_x = 64) in; + +uniform int uMicrolineCount; +// How many slots we have allocated for fills. +uniform int uMaxFillCount; + +layout(std430, binding = 0) buffer bMicrolines { + restrict readonly uvec4 iMicrolines[]; +}; + +layout(std430, binding = 1) buffer bMetadata { + // [0]: tile rect + // [1].x: tile offset + // [1].y: path ID + // [1].z: z write flag + // [1].w: clip path ID + // [2].x: backdrop offset + restrict readonly ivec4 iMetadata[]; +}; + +// [0]: vertexCount (6) +// [1]: instanceCount (of fills) +// [2]: vertexStart (0) +// [3]: baseInstance (0) +// [4]: alpha tile count +layout(std430, binding = 2) buffer bIndirectDrawParams { + restrict uint iIndirectDrawParams[]; +}; + +layout(std430, binding = 3) buffer bFills { + restrict writeonly uint iFills[]; +}; + +layout(std430, binding = 4) buffer bTiles { + // [0]: next tile ID (initialized to -1) + // [1]: first fill ID (initialized to -1) + // [2]: backdrop delta upper 8 bits, alpha tile ID lower 24 (initialized to 0, -1 respectively) + // [3]: color/ctrl/backdrop word + restrict uint iTiles[]; +}; + +layout(std430, binding = 5) buffer bBackdrops { + // [0]: backdrop + // [1]: tile X offset + // [2]: path ID + restrict uint iBackdrops[]; +}; + +uint computeTileIndexNoCheck(ivec2 tileCoords, ivec4 pathTileRect, uint pathTileOffset) { + ivec2 offsetCoords = tileCoords - pathTileRect.xy; + return pathTileOffset + offsetCoords.x + offsetCoords.y * (pathTileRect.z - pathTileRect.x); +} + +bvec4 computeTileOutcodes(ivec2 tileCoords, ivec4 pathTileRect) { + return bvec4(lessThan(tileCoords, pathTileRect.xy), + greaterThanEqual(tileCoords, pathTileRect.zw)); +} + +bool computeTileIndex(ivec2 tileCoords, + ivec4 pathTileRect, + uint pathTileOffset, + out uint outTileIndex) { + outTileIndex = computeTileIndexNoCheck(tileCoords, pathTileRect, pathTileOffset); + return !any(computeTileOutcodes(tileCoords, pathTileRect)); +} + +void addFill(vec4 lineSegment, ivec2 tileCoords, ivec4 pathTileRect, uint pathTileOffset) { + // Compute tile offset. If out of bounds, cull. + uint tileIndex; + if (!computeTileIndex(tileCoords, pathTileRect, pathTileOffset, tileIndex)) { + return; + } + + // Clip line. If too narrow, cull. + uvec4 scaledLocalLine = uvec4((lineSegment - vec4(tileCoords.xyxy * ivec4(16))) * vec4(256.0)); + if (scaledLocalLine.x == scaledLocalLine.z) + return; + + // Bump instance count. + uint fillIndex = atomicAdd(iIndirectDrawParams[1], 1); + + // Fill out the link field, inserting into the linked list. + uint fillLink = atomicExchange(iTiles[tileIndex * 4 + TILE_FIELD_FIRST_FILL_ID], + int(fillIndex)); + + // Write fill. + if (fillIndex < uMaxFillCount) { + iFills[fillIndex * 3 + 0] = scaledLocalLine.x | (scaledLocalLine.y << 16); + iFills[fillIndex * 3 + 1] = scaledLocalLine.z | (scaledLocalLine.w << 16); + iFills[fillIndex * 3 + 2] = fillLink; + } +} + +void adjustBackdrop(int backdropDelta, + ivec2 tileCoords, + ivec4 pathTileRect, + uint pathTileOffset, + uint pathBackdropOffset) { + bvec4 outcodes = computeTileOutcodes(tileCoords, pathTileRect); + if (any(outcodes)) { + if (!outcodes.x && outcodes.y && !outcodes.z) { + uint backdropIndex = pathBackdropOffset + uint(tileCoords.x - pathTileRect.x); + atomicAdd(iBackdrops[backdropIndex * 3], backdropDelta); + } + } else { + uint tileIndex = computeTileIndexNoCheck(tileCoords, pathTileRect, pathTileOffset); + atomicAdd(iTiles[tileIndex * 4 + TILE_FIELD_BACKDROP_ALPHA_TILE_ID], + uint(backdropDelta) << 24); + } +} + +vec4 unpackMicroline(uvec4 packedMicroline, out uint outPathIndex) { + outPathIndex = packedMicroline.w; + ivec4 signedMicroline = ivec4(packedMicroline); + return vec4((signedMicroline.x << 16) >> 16, signedMicroline.x >> 16, + (signedMicroline.y << 16) >> 16, signedMicroline.y >> 16) + + vec4(signedMicroline.z & 0xff, (signedMicroline.z >> 8) & 0xff, + (signedMicroline.z >> 16) & 0xff, (signedMicroline.z >> 24) & 0xff) / 256.0; +} + +void main() { + uint segmentIndex = gl_GlobalInvocationID.x; + if (segmentIndex >= uMicrolineCount) + return; + + uint pathIndex; + vec4 lineSegment = unpackMicroline(iMicrolines[segmentIndex], pathIndex); + + ivec4 pathTileRect = iMetadata[pathIndex * 3 + 0]; + uint pathTileOffset = uint(iMetadata[pathIndex * 3 + 1].x); + uint pathBackdropOffset = uint(iMetadata[pathIndex * 3 + 2].x); + + // Following is a straight port of `process_line_segment()`: + + ivec2 tileSize = ivec2(16); + + ivec4 tileLineSegment = ivec4(floor(lineSegment / vec4(tileSize.xyxy))); + ivec2 fromTileCoords = tileLineSegment.xy, toTileCoords = tileLineSegment.zw; + + vec2 vector = lineSegment.zw - lineSegment.xy; + vec2 vectorIsNegative = vec2(vector.x < 0.0 ? -1.0 : 0.0, vector.y < 0.0 ? -1.0 : 0.0); + ivec2 tileStep = ivec2(vector.x < 0.0 ? -1 : 1, vector.y < 0.0 ? -1 : 1); + + vec2 firstTileCrossing = vec2((fromTileCoords + ivec2(vector.x >= 0.0 ? 1 : 0, + vector.y >= 0.0 ? 1 : 0)) * tileSize); + + vec2 tMax = (firstTileCrossing - lineSegment.xy) / vector; + vec2 tDelta = abs(tileSize / vector); + + vec2 currentPosition = lineSegment.xy; + ivec2 tileCoords = fromTileCoords; + int lastStepDirection = STEP_DIRECTION_NONE; + uint iteration = 0; + + while (iteration < MAX_ITERATIONS) { + int nextStepDirection; + if (tMax.x < tMax.y) + nextStepDirection = STEP_DIRECTION_X; + else if (tMax.x > tMax.y) + nextStepDirection = STEP_DIRECTION_Y; + else if (tileStep.x > 0.0) + nextStepDirection = STEP_DIRECTION_X; + else + nextStepDirection = STEP_DIRECTION_Y; + + float nextT = min(nextStepDirection == STEP_DIRECTION_X ? tMax.x : tMax.y, 1.0); + + // If we've reached the end tile, don't step at all. + if (tileCoords == toTileCoords) + nextStepDirection = STEP_DIRECTION_NONE; + + vec2 nextPosition = mix(lineSegment.xy, lineSegment.zw, nextT); + vec4 clippedLineSegment = vec4(currentPosition, nextPosition); + addFill(clippedLineSegment, tileCoords, pathTileRect, pathTileOffset); + + // Add extra fills if necessary. + vec4 auxiliarySegment; + bool haveAuxiliarySegment = false; + if (tileStep.y < 0 && nextStepDirection == STEP_DIRECTION_Y) { + auxiliarySegment = vec4(clippedLineSegment.zw, vec2(tileCoords * tileSize)); + haveAuxiliarySegment = true; + } else if (tileStep.y > 0 && lastStepDirection == STEP_DIRECTION_Y) { + auxiliarySegment = vec4(vec2(tileCoords * tileSize), clippedLineSegment.xy); + haveAuxiliarySegment = true; + } + if (haveAuxiliarySegment) + addFill(auxiliarySegment, tileCoords, pathTileRect, pathTileOffset); + + // Adjust backdrop if necessary. + // + // NB: Do not refactor the calls below. This exact code sequence is needed to avoid a + // miscompilation on the Radeon Metal compiler. + if (tileStep.x < 0 && lastStepDirection == STEP_DIRECTION_X) { + adjustBackdrop(1, + tileCoords, + pathTileRect, + pathTileOffset, + pathBackdropOffset); + } else if (tileStep.x > 0 && nextStepDirection == STEP_DIRECTION_X) { + adjustBackdrop(-1, + tileCoords, + pathTileRect, + pathTileOffset, + pathBackdropOffset); + } + + // Take a step. + if (nextStepDirection == STEP_DIRECTION_X) { + tMax.x += tDelta.x; + tileCoords.x += tileStep.x; + } else if (nextStepDirection == STEP_DIRECTION_Y) { + tMax.y += tDelta.y; + tileCoords.y += tileStep.y; + } else if (nextStepDirection == STEP_DIRECTION_NONE) { + break; + } + + currentPosition = nextPosition; + lastStepDirection = nextStepDirection; + + iteration++; + } +} diff --git a/shaders/d3d11/bound.cs.glsl b/shaders/d3d11/bound.cs.glsl new file mode 100644 index 00000000..6f29b2f2 --- /dev/null +++ b/shaders/d3d11/bound.cs.glsl @@ -0,0 +1,84 @@ +#version 430 + +// pathfinder/shaders/bound.cs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Initializes the tile maps. + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +#define TILE_FIELD_NEXT_TILE_ID 0 +#define TILE_FIELD_FIRST_FILL_ID 1 +#define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2 +#define TILE_FIELD_CONTROL 3 + +layout(local_size_x = 64) in; + +uniform int uPathCount; +uniform int uTileCount; + +layout(std430, binding = 0) buffer bTilePathInfo { + // x: tile upper left, 16-bit packed x/y + // y: tile lower right, 16-bit packed x/y + // z: first tile index in this path + // w: color/ctrl/backdrop word + restrict readonly uvec4 iTilePathInfo[]; +}; + +layout(std430, binding = 1) buffer bTiles { + // [0]: next tile ID (initialized to -1) + // [1]: first fill ID (initialized to -1) + // [2]: backdrop delta upper 8 bits, alpha tile ID lower 24 (initialized to 0, -1 respectively) + // [3]: color/ctrl/backdrop word + restrict uint iTiles[]; +}; + +void main() { + uint tileIndex = gl_GlobalInvocationID.x; + if (tileIndex >= uint(uTileCount)) + return; + + uint lowPathIndex = 0, highPathIndex = uint(uPathCount); + int iteration = 0; + while (iteration < 1024 && lowPathIndex + 1 < highPathIndex) { + uint midPathIndex = lowPathIndex + (highPathIndex - lowPathIndex) / 2; + uint midTileIndex = iTilePathInfo[midPathIndex].z; + if (tileIndex < midTileIndex) { + highPathIndex = midPathIndex; + } else { + lowPathIndex = midPathIndex; + if (tileIndex == midTileIndex) + break; + } + iteration++; + } + + uint pathIndex = lowPathIndex; + uvec4 pathInfo = iTilePathInfo[pathIndex]; + + ivec2 packedTileRect = ivec2(pathInfo.xy); + ivec4 tileRect = ivec4((packedTileRect.x << 16) >> 16, packedTileRect.x >> 16, + (packedTileRect.y << 16) >> 16, packedTileRect.y >> 16); + + uint tileOffset = tileIndex - pathInfo.z; + uint tileWidth = uint(tileRect.z - tileRect.x); + ivec2 tileCoords = tileRect.xy + ivec2(tileOffset % tileWidth, tileOffset / tileWidth); + + iTiles[tileIndex * 4 + TILE_FIELD_NEXT_TILE_ID] = ~0u; + iTiles[tileIndex * 4 + TILE_FIELD_FIRST_FILL_ID] = ~0u; + iTiles[tileIndex * 4 + TILE_FIELD_BACKDROP_ALPHA_TILE_ID] = 0x00ffffffu; + iTiles[tileIndex * 4 + TILE_FIELD_CONTROL] = pathInfo.w; +} diff --git a/shaders/d3d11/dice.cs.glsl b/shaders/d3d11/dice.cs.glsl new file mode 100644 index 00000000..cc1ab72c --- /dev/null +++ b/shaders/d3d11/dice.cs.glsl @@ -0,0 +1,217 @@ +#version 430 + +// pathfinder/shaders/dice.cs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Chops lines and curves into microlines. + +#extension GL_GOOGLE_include_directive : enable + +#define BIN_WORKGROUP_SIZE 64 + +#define MAX_CURVE_STACK_SIZE 32 + +#define FLAGS_PATH_INDEX_CURVE_IS_QUADRATIC 0x80000000u +#define FLAGS_PATH_INDEX_CURVE_IS_CUBIC 0x40000000u + +#define BIN_INDIRECT_DRAW_PARAMS_MICROLINE_COUNT_INDEX 3 + +#define TOLERANCE 0.25 +#define MICROLINE_LENGTH 16.0 + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +layout(local_size_x = 64) in; + +uniform mat2 uTransform; +uniform vec2 uTranslation; +uniform int uPathCount; +uniform int uLastBatchSegmentIndex; +uniform int uMaxMicrolineCount; + +layout(std430, binding = 0) buffer bComputeIndirectParams { + // [0]: number of x workgroups + // [1]: number of y workgroups (always 1) + // [2]: number of z workgroups (always 1) + // [3]: number of output microlines + restrict uint iComputeIndirectParams[]; +}; + +// Indexed by batch path index. +layout(std430, binding = 1) buffer bDiceMetadata { + // x: global path ID + // y: first global segment index + // z: first batch segment index + // w: unused + restrict readonly uvec4 iDiceMetadata[]; +}; + +layout(std430, binding = 2) buffer bPoints { + restrict readonly vec2 iPoints[]; +}; + +layout(std430, binding = 3) buffer bInputIndices { + restrict readonly uvec2 iInputIndices[]; +}; + +layout(std430, binding = 4) buffer bMicrolines { + // x: from (X, Y) whole pixels, packed signed 16-bit + // y: to (X, Y) whole pixels, packed signed 16-bit + // z: (from X, from Y, to X, to Y) fractional pixels, packed unsigned 8-bit (0.8 fixed point) + // w: path ID + restrict uvec4 iMicrolines[]; +}; + +void emitMicroline(vec4 microlineSegment, uint pathIndex, uint outputMicrolineIndex) { + if (outputMicrolineIndex >= uMaxMicrolineCount) + return; + + ivec4 microlineSubpixels = ivec4(round(clamp(microlineSegment, -32768.0, 32767.0) * 256.0)); + ivec4 microlinePixels = ivec4(floor(vec4(microlineSubpixels) / 256.0)); + ivec4 microlineFractPixels = microlineSubpixels - microlinePixels * 256; + + iMicrolines[outputMicrolineIndex] = + uvec4((uint(microlinePixels.x) & 0xffff) | (uint(microlinePixels.y) << 16), + (uint(microlinePixels.z) & 0xffff) | (uint(microlinePixels.w) << 16), + uint(microlineFractPixels.x) | (uint(microlineFractPixels.y) << 8) | + (uint(microlineFractPixels.z) << 16) | (uint(microlineFractPixels.w) << 24), + pathIndex); +} + +// See Kaspar Fischer, "Piecewise Linear Approximation of Bézier Curves", 2000. +bool curveIsFlat(vec4 baseline, vec4 ctrl) { + vec4 uv = vec4(3.0) * ctrl - vec4(2.0) * baseline - baseline.zwxy; + uv *= uv; + uv = max(uv, uv.zwxy); + return uv.x + uv.y <= 16.0 * TOLERANCE * TOLERANCE; +} + +void subdivideCurve(vec4 baseline, + vec4 ctrl, + float t, + out vec4 prevBaseline, + out vec4 prevCtrl, + out vec4 nextBaseline, + out vec4 nextCtrl) { + vec2 p0 = baseline.xy, p1 = ctrl.xy, p2 = ctrl.zw, p3 = baseline.zw; + vec2 p0p1 = mix(p0, p1, t), p1p2 = mix(p1, p2, t), p2p3 = mix(p2, p3, t); + vec2 p0p1p2 = mix(p0p1, p1p2, t), p1p2p3 = mix(p1p2, p2p3, t); + vec2 p0p1p2p3 = mix(p0p1p2, p1p2p3, t); + prevBaseline = vec4(p0, p0p1p2p3); + prevCtrl = vec4(p0p1, p0p1p2); + nextBaseline = vec4(p0p1p2p3, p3); + nextCtrl = vec4(p1p2p3, p2p3); +} + +vec2 sampleCurve(vec4 baseline, vec4 ctrl, float t) { + vec2 p0 = baseline.xy, p1 = ctrl.xy, p2 = ctrl.zw, p3 = baseline.zw; + vec2 p0p1 = mix(p0, p1, t), p1p2 = mix(p1, p2, t), p2p3 = mix(p2, p3, t); + vec2 p0p1p2 = mix(p0p1, p1p2, t), p1p2p3 = mix(p1p2, p2p3, t); + return mix(p0p1p2, p1p2p3, t); +} + +vec2 sampleLine(vec4 line, float t) { + return mix(line.xy, line.zw, t); +} + +vec2 getPoint(uint pointIndex) { + return uTransform * iPoints[pointIndex] + uTranslation; +} + +void main() { + uint batchSegmentIndex = gl_GlobalInvocationID.x; + if (batchSegmentIndex >= uLastBatchSegmentIndex) + return; + + // Find the path index. + uint lowPathIndex = 0, highPathIndex = uint(uPathCount); + int iteration = 0; + while (iteration < 1024 && lowPathIndex + 1 < highPathIndex) { + uint midPathIndex = lowPathIndex + (highPathIndex - lowPathIndex) / 2; + uint midBatchSegmentIndex = iDiceMetadata[midPathIndex].z; + if (batchSegmentIndex < midBatchSegmentIndex) { + highPathIndex = midPathIndex; + } else { + lowPathIndex = midPathIndex; + if (batchSegmentIndex == midBatchSegmentIndex) + break; + } + iteration++; + } + + uint batchPathIndex = lowPathIndex; + uvec4 diceMetadata = iDiceMetadata[batchPathIndex]; + uint firstGlobalSegmentIndexInPath = diceMetadata.y; + uint firstBatchSegmentIndexInPath = diceMetadata.z; + uint globalSegmentIndex = batchSegmentIndex - firstBatchSegmentIndexInPath + + firstGlobalSegmentIndexInPath; + + uvec2 inputIndices = iInputIndices[globalSegmentIndex]; + uint fromPointIndex = inputIndices.x, flagsPathIndex = inputIndices.y; + + uint toPointIndex = fromPointIndex; + if ((flagsPathIndex & FLAGS_PATH_INDEX_CURVE_IS_CUBIC) != 0u) + toPointIndex += 3; + else if ((flagsPathIndex & FLAGS_PATH_INDEX_CURVE_IS_QUADRATIC) != 0u) + toPointIndex += 2; + else + toPointIndex += 1; + + vec4 baseline = vec4(getPoint(fromPointIndex), getPoint(toPointIndex)); + + // Read control points if applicable, and calculate number of segments. + // + // The technique is from Thomas Sederberg, "Computer-Aided Geometric Design" notes, section + // 10.6 "Error Bounds". + vec4 ctrl = vec4(0.0); + float segmentCountF; + bool isCurve = (flagsPathIndex & (FLAGS_PATH_INDEX_CURVE_IS_CUBIC | + FLAGS_PATH_INDEX_CURVE_IS_QUADRATIC)) != 0; + if (isCurve) { + vec2 ctrl0 = getPoint(fromPointIndex + 1); + if ((flagsPathIndex & FLAGS_PATH_INDEX_CURVE_IS_QUADRATIC) != 0) { + vec2 ctrl0_2 = ctrl0 * vec2(2.0); + ctrl = (baseline + (ctrl0 * vec2(2.0)).xyxy) * vec4(1.0 / 3.0); + } else { + ctrl = vec4(ctrl0, getPoint(fromPointIndex + 2)); + } + vec2 bound = vec2(6.0) * max(abs(ctrl.zw - 2.0 * ctrl.xy + baseline.xy), + abs(baseline.zw - 2.0 * ctrl.zw + ctrl.xy)); + segmentCountF = sqrt(length(bound) / (8.0 * TOLERANCE)); + } else { + segmentCountF = length(baseline.zw - baseline.xy) / MICROLINE_LENGTH; + } + + // Allocate space. + int segmentCount = max(int(ceil(segmentCountF)), 1); + uint firstOutputMicrolineIndex = + atomicAdd(iComputeIndirectParams[BIN_INDIRECT_DRAW_PARAMS_MICROLINE_COUNT_INDEX], + segmentCount); + + float prevT = 0.0; + vec2 prevPoint = baseline.xy; + for (int segmentIndex = 0; segmentIndex < segmentCount; segmentIndex++) { + float nextT = float(segmentIndex + 1) / float(segmentCount); + vec2 nextPoint; + if (isCurve) + nextPoint = sampleCurve(baseline, ctrl, nextT); + else + nextPoint = sampleLine(baseline, nextT); + emitMicroline(vec4(prevPoint, nextPoint), + batchPathIndex, + firstOutputMicrolineIndex + segmentIndex); + prevT = nextT; + prevPoint = nextPoint; + } +} diff --git a/shaders/d3d11/fill.cs.glsl b/shaders/d3d11/fill.cs.glsl new file mode 100644 index 00000000..c3e2ff35 --- /dev/null +++ b/shaders/d3d11/fill.cs.glsl @@ -0,0 +1,88 @@ +#version 430 + +// pathfinder/shaders/fill.cs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +#include "fill_area.inc.glsl" + +layout(local_size_x = 16, local_size_y = 4) in; + +#define TILE_FIELD_NEXT_TILE_ID 0 +#define TILE_FIELD_FIRST_FILL_ID 1 +#define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2 +#define TILE_FIELD_CONTROL 3 + +layout(rgba8) uniform image2D uDest; +uniform sampler2D uAreaLUT; +uniform ivec2 uAlphaTileRange; + +layout(std430, binding = 0) buffer bFills { + restrict readonly uint iFills[]; +}; + +layout(std430, binding = 1) buffer bTiles { + // [0]: path ID + // [1]: next tile ID + // [2]: first fill ID + // [3]: backdrop delta upper 8 bits, alpha tile ID lower 24 bits + // [4]: color/ctrl/backdrop word + restrict uint iTiles[]; +}; + +layout(std430, binding = 2) buffer bAlphaTiles { + // [0]: alpha tile index + // [1]: clip tile index + restrict readonly uint iAlphaTiles[]; +}; + +#include "fill_compute.inc.glsl" + +ivec2 computeTileCoord(uint alphaTileIndex) { + uint x = alphaTileIndex & 0xff; + uint y = (alphaTileIndex >> 8u) & 0xff + (((alphaTileIndex >> 16u) & 0xff) << 8u); + return ivec2(16, 4) * ivec2(x, y) + ivec2(gl_LocalInvocationID.xy); +} + +void main() { + ivec2 tileSubCoord = ivec2(gl_LocalInvocationID.xy) * ivec2(1, 4); + + // This is a workaround for the 64K workgroup dispatch limit in OpenGL. + uint batchAlphaTileIndex = (gl_WorkGroupID.x | (gl_WorkGroupID.y << 15)); + uint alphaTileIndex = batchAlphaTileIndex + uint(uAlphaTileRange.x); + if (alphaTileIndex >= uint(uAlphaTileRange.y)) + return; + + uint tileIndex = iAlphaTiles[batchAlphaTileIndex * 2 + 0]; + if ((int(iTiles[tileIndex * 4 + TILE_FIELD_BACKDROP_ALPHA_TILE_ID] << 8) >> 8) < 0) + return; + + int fillIndex = int(iTiles[tileIndex * 4 + TILE_FIELD_FIRST_FILL_ID]); + int backdrop = int(iTiles[tileIndex * 4 + TILE_FIELD_CONTROL]) >> 24; + + // TODO(pcwalton): Handle even-odd fill rule. + vec4 coverages = vec4(backdrop); + coverages += accumulateCoverageForFillList(fillIndex, tileSubCoord); + coverages = clamp(abs(coverages), 0.0, 1.0); + + // Handle clip if necessary. + int clipTileIndex = int(iAlphaTiles[batchAlphaTileIndex * 2 + 1]); + if (clipTileIndex >= 0) + coverages = min(coverages, imageLoad(uDest, computeTileCoord(clipTileIndex))); + + imageStore(uDest, computeTileCoord(alphaTileIndex), coverages); +} diff --git a/shaders/d3d11/fill_compute.inc.glsl b/shaders/d3d11/fill_compute.inc.glsl new file mode 100644 index 00000000..eaee1546 --- /dev/null +++ b/shaders/d3d11/fill_compute.inc.glsl @@ -0,0 +1,25 @@ +// pathfinder/shaders/fill_compute.inc.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +vec4 accumulateCoverageForFillList(int fillIndex, ivec2 tileSubCoord) { + vec2 tileFragCoord = vec2(tileSubCoord) + vec2(0.5); + vec4 coverages = vec4(0.0); + int iteration = 0; + do { + uint fillFrom = iFills[fillIndex * 3 + 0], fillTo = iFills[fillIndex * 3 + 1]; + vec4 lineSegment = vec4(fillFrom & 0xffff, fillFrom >> 16, + fillTo & 0xffff, fillTo >> 16) / 256.0; + lineSegment -= tileFragCoord.xyxy; + coverages += computeCoverage(lineSegment.xy, lineSegment.zw, uAreaLUT); + fillIndex = int(iFills[fillIndex * 3 + 2]); + iteration++; + } while (fillIndex >= 0 && iteration < 1024); + return coverages; +} diff --git a/shaders/d3d11/propagate.cs.glsl b/shaders/d3d11/propagate.cs.glsl new file mode 100644 index 00000000..6dd00060 --- /dev/null +++ b/shaders/d3d11/propagate.cs.glsl @@ -0,0 +1,224 @@ +#version 430 + +// pathfinder/shaders/propagate.cs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Sum up backdrops to propagate fills across tiles, and allocate alpha tiles. + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +layout(local_size_x = 64) in; + +#define TILE_FIELD_NEXT_TILE_ID 0 +#define TILE_FIELD_FIRST_FILL_ID 1 +#define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2 +#define TILE_FIELD_CONTROL 3 + +uniform ivec2 uFramebufferTileSize; +uniform int uColumnCount; +uniform int uFirstAlphaTileIndex; + +layout(std430, binding = 0) buffer bDrawMetadata { + // [0]: tile rect + // [1].x: tile offset + // [1].y: path ID + // [1].z: Z write enabled? + // [1].w: clip path ID, or ~0 + // [2].x: backdrop column offset + restrict readonly uvec4 iDrawMetadata[]; +}; + +layout(std430, binding = 1) buffer bClipMetadata { + // [0]: tile rect + // [1].x: tile offset + // [1].y: unused + // [1].z: unused + // [1].w: unused + restrict readonly uvec4 iClipMetadata[]; +}; + +layout(std430, binding = 2) buffer bBackdrops { + // [0]: backdrop + // [1]: tile X offset + // [2]: path ID + restrict readonly int iBackdrops[]; +}; + +layout(std430, binding = 3) buffer bDrawTiles { + // [0]: next tile ID + // [1]: first fill ID + // [2]: backdrop delta upper 8 bits, alpha tile ID lower 24 + // [3]: color/ctrl/backdrop word + restrict uint iDrawTiles[]; +}; + +layout(std430, binding = 4) buffer bClipTiles { + // [0]: next tile ID + // [1]: first fill ID + // [2]: backdrop delta upper 8 bits, alpha tile ID lower 24 + // [3]: color/ctrl/backdrop word + restrict uint iClipTiles[]; +}; + +layout(std430, binding = 5) buffer bZBuffer { + restrict int iZBuffer[]; +}; + +layout(std430, binding = 6) buffer bFirstTileMap { + restrict int iFirstTileMap[]; +}; + +layout(std430, binding = 7) buffer bIndirectDrawParams { + // [0]: vertexCount (6) + // [1]: instanceCount (of fills) + // [2]: vertexStart (0) + // [3]: baseInstance (0) + // [4]: alpha tile count + restrict uint iIndirectDrawParams[]; +}; + +layout(std430, binding = 8) buffer bAlphaTiles { + // [0]: alpha tile index + // [1]: clip tile index + restrict uint iAlphaTiles[]; +}; + +uint calculateTileIndex(uint bufferOffset, uvec4 tileRect, uvec2 tileCoord) { + return bufferOffset + tileCoord.y * (tileRect.z - tileRect.x) + tileCoord.x; +} + +void main() { + uint columnIndex = gl_GlobalInvocationID.x; + if (int(columnIndex) >= uColumnCount) + return; + + int currentBackdrop = iBackdrops[columnIndex * 3 + 0]; + int tileX = iBackdrops[columnIndex * 3 + 1]; + uint drawPathIndex = uint(iBackdrops[columnIndex * 3 + 2]); + + uvec4 drawTileRect = iDrawMetadata[drawPathIndex * 3 + 0]; + uvec4 drawOffsets = iDrawMetadata[drawPathIndex * 3 + 1]; + uvec2 drawTileSize = drawTileRect.zw - drawTileRect.xy; + uint drawTileBufferOffset = drawOffsets.x; + bool zWrite = drawOffsets.z != 0; + + int clipPathIndex = int(drawOffsets.w); + uvec4 clipTileRect = uvec4(0u), clipOffsets = uvec4(0u); + if (clipPathIndex >= 0) { + clipTileRect = iClipMetadata[clipPathIndex * 2 + 0]; + clipOffsets = iClipMetadata[clipPathIndex * 2 + 1]; + } + uint clipTileBufferOffset = clipOffsets.x, clipBackdropOffset = clipOffsets.y; + + for (uint tileY = 0; tileY < drawTileSize.y; tileY++) { + uvec2 drawTileCoord = uvec2(tileX, tileY); + uint drawTileIndex = calculateTileIndex(drawTileBufferOffset, drawTileRect, drawTileCoord); + + int drawAlphaTileIndex = -1; + int clipAlphaTileIndex = -1; + int drawFirstFillIndex = int(iDrawTiles[drawTileIndex * 4 + TILE_FIELD_FIRST_FILL_ID]); + int drawBackdropDelta = + int(iDrawTiles[drawTileIndex * 4 + TILE_FIELD_BACKDROP_ALPHA_TILE_ID]) >> 24; + uint drawTileWord = iDrawTiles[drawTileIndex * 4 + TILE_FIELD_CONTROL] & 0x00ffffff; + + int drawTileBackdrop = currentBackdrop; + bool haveDrawAlphaMask = drawFirstFillIndex >= 0; + bool needNewAlphaTile = haveDrawAlphaMask; + + // Handle clip if necessary. + if (clipPathIndex >= 0) { + uvec2 tileCoord = drawTileCoord + drawTileRect.xy; + if (all(bvec4(greaterThanEqual(tileCoord, clipTileRect.xy), + lessThan (tileCoord, clipTileRect.zw)))) { + uvec2 clipTileCoord = tileCoord - clipTileRect.xy; + uint clipTileIndex = calculateTileIndex(clipTileBufferOffset, + clipTileRect, + clipTileCoord); + +/* + clipAlphaTileIndex = + int(iClipTiles[clipTileIndex * 4 + + TILE_FIELD_BACKDROP_ALPHA_TILE_ID] << 8) >> 8; + */ + int thisClipAlphaTileIndex = + int(iClipTiles[clipTileIndex * 4 + + TILE_FIELD_BACKDROP_ALPHA_TILE_ID] << 8) >> 8; + + uint clipTileWord = iClipTiles[clipTileIndex * 4 + TILE_FIELD_CONTROL]; + int clipTileBackdrop = int(clipTileWord) >> 24; + + if (thisClipAlphaTileIndex >= 0) { + if (haveDrawAlphaMask) { + clipAlphaTileIndex = thisClipAlphaTileIndex; + needNewAlphaTile = true; + } else { + if (drawTileBackdrop != 0) { + // This is a solid draw tile, but there's a clip applied. Replace it with an + // alpha tile pointing directly to the clip mask. + drawAlphaTileIndex = thisClipAlphaTileIndex; + clipAlphaTileIndex = -1; + needNewAlphaTile = false; + } else { + // No draw alpha tile index, no clip alpha tile index. + drawAlphaTileIndex = -1; + clipAlphaTileIndex = -1; + needNewAlphaTile = false; + } + } + } else { + // No clip tile. + if (clipTileBackdrop == 0) { + // This is a blank clip tile. Cull the draw tile entirely. + drawTileBackdrop = 0; + needNewAlphaTile = false; + } else { + needNewAlphaTile = true; + } + } + } else { + // This draw tile is outside the clip path bounding rect. Cull the draw tile. + drawTileBackdrop = 0; + needNewAlphaTile = false; + } + } + + if (needNewAlphaTile) { + uint drawBatchAlphaTileIndex = atomicAdd(iIndirectDrawParams[4], 1); + iAlphaTiles[drawBatchAlphaTileIndex * 2 + 0] = drawTileIndex; + iAlphaTiles[drawBatchAlphaTileIndex * 2 + 1] = clipAlphaTileIndex; + drawAlphaTileIndex = int(drawBatchAlphaTileIndex) + uFirstAlphaTileIndex; + } + + iDrawTiles[drawTileIndex * 4 + TILE_FIELD_BACKDROP_ALPHA_TILE_ID] = + (uint(drawAlphaTileIndex) & 0x00ffffffu) | (uint(drawBackdropDelta) << 24); + iDrawTiles[drawTileIndex * 4 + TILE_FIELD_CONTROL] = + drawTileWord | (uint(drawTileBackdrop) << 24); + + // Write to Z-buffer if necessary. + ivec2 tileCoord = ivec2(tileX, tileY) + ivec2(drawTileRect.xy); + int tileMapIndex = tileCoord.y * uFramebufferTileSize.x + tileCoord.x; + if (zWrite && drawTileBackdrop != 0 && drawAlphaTileIndex < 0) + atomicMax(iZBuffer[tileMapIndex], int(drawTileIndex)); + + // Stitch into the linked list if necessary. + if (drawTileBackdrop != 0 || drawAlphaTileIndex >= 0) { + int nextTileIndex = atomicExchange(iFirstTileMap[tileMapIndex], int(drawTileIndex)); + iDrawTiles[drawTileIndex * 4 + TILE_FIELD_NEXT_TILE_ID] = nextTileIndex; + } + + currentBackdrop += drawBackdropDelta; + } +} diff --git a/shaders/d3d11/sort.cs.glsl b/shaders/d3d11/sort.cs.glsl new file mode 100644 index 00000000..b89f1664 --- /dev/null +++ b/shaders/d3d11/sort.cs.glsl @@ -0,0 +1,93 @@ +#version 430 + +// pathfinder/shaders/sort.cs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +#define TILE_FIELD_NEXT_TILE_ID 0 +#define TILE_FIELD_FIRST_FILL_ID 1 +#define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2 +#define TILE_FIELD_CONTROL 3 + +uniform int uTileCount; + +layout(std430, binding = 0) buffer bTiles { + // [0]: next tile ID + // [1]: first fill ID + // [2]: backdrop delta upper 8 bits, alpha tile ID lower 24 + // [3]: color/ctrl/backdrop word + restrict uint iTiles[]; +}; + +layout(std430, binding = 1) buffer bFirstTileMap { + restrict int iFirstTileMap[]; +}; + +layout(std430, binding = 2) buffer bZBuffer { + restrict readonly int iZBuffer[]; +}; + +layout(local_size_x = 64) in; + +int getFirst(uint globalTileIndex) { + return iFirstTileMap[globalTileIndex]; +} + +int getNextTile(int tileIndex) { + return int(iTiles[tileIndex * 4 + TILE_FIELD_NEXT_TILE_ID]); +} + +void setNextTile(int tileIndex, int newNextTileIndex) { + iTiles[tileIndex * 4 + TILE_FIELD_NEXT_TILE_ID] = uint(newNextTileIndex); +} + +void main() { + uint globalTileIndex = gl_GlobalInvocationID.x; + if (globalTileIndex >= uint(uTileCount)) + return; + + int zValue = iZBuffer[globalTileIndex]; + + int unsortedFirstTileIndex = getFirst(globalTileIndex); + int sortedFirstTileIndex = -1; + + while (unsortedFirstTileIndex >= 0) { + int currentTileIndex = unsortedFirstTileIndex; + unsortedFirstTileIndex = getNextTile(currentTileIndex); + + if (currentTileIndex >= zValue) { + int prevTrialTileIndex = -1; + int trialTileIndex = sortedFirstTileIndex; + while (true) { + if (trialTileIndex < 0 || currentTileIndex < trialTileIndex) { + if (prevTrialTileIndex < 0) { + setNextTile(currentTileIndex, sortedFirstTileIndex); + sortedFirstTileIndex = currentTileIndex; + } else { + setNextTile(currentTileIndex, trialTileIndex); + setNextTile(prevTrialTileIndex, currentTileIndex); + } + break; + } + prevTrialTileIndex = trialTileIndex; + trialTileIndex = getNextTile(trialTileIndex); + } + } + } + + iFirstTileMap[globalTileIndex] = sortedFirstTileIndex; +} diff --git a/shaders/d3d11/tile.cs.glsl b/shaders/d3d11/tile.cs.glsl new file mode 100644 index 00000000..da171693 --- /dev/null +++ b/shaders/d3d11/tile.cs.glsl @@ -0,0 +1,157 @@ +#version 430 + +// pathfinder/shaders/tile.cs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +layout(local_size_x = 16, local_size_y = 4) in; + +#include "tile_fragment.inc.glsl" +#include "tile_vertex.inc.glsl" + +#define LOAD_ACTION_CLEAR 0 +#define LOAD_ACTION_LOAD 1 + +#define TILE_FIELD_NEXT_TILE_ID 0 +#define TILE_FIELD_FIRST_FILL_ID 1 +#define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2 +#define TILE_FIELD_CONTROL 3 + +uniform int uLoadAction; +uniform vec4 uClearColor; +uniform vec2 uTileSize; +uniform sampler2D uTextureMetadata; +uniform ivec2 uTextureMetadataSize; +uniform sampler2D uZBuffer; +uniform ivec2 uZBufferSize; +uniform sampler2D uColorTexture0; +uniform sampler2D uMaskTexture0; +uniform sampler2D uDestTexture; +uniform sampler2D uGammaLUT; +uniform vec2 uColorTextureSize0; +uniform vec2 uMaskTextureSize0; +uniform vec2 uFramebufferSize; +uniform ivec2 uFramebufferTileSize; +layout(rgba8) uniform image2D uDestImage; + +layout(std430, binding = 0) buffer bTiles { + // [0]: path ID + // [1]: next tile ID + // [2]: first fill ID + // [3]: backdrop delta upper 8 bits, alpha tile ID lower 24 bits + // [4]: color/ctrl/backdrop word + restrict readonly uint iTiles[]; +}; + +layout(std430, binding = 1) buffer bFirstTileMap { + restrict readonly int iFirstTileMap[]; +}; + +uint calculateTileIndex(uint bufferOffset, uvec4 tileRect, uvec2 tileCoord) { + return bufferOffset + tileCoord.y * (tileRect.z - tileRect.x) + tileCoord.x; +} + +ivec2 toImageCoords(ivec2 coords) { + return ivec2(coords.x, uFramebufferSize.y - coords.y); +} + +void main() { + ivec2 tileCoord = ivec2(gl_WorkGroupID.xy); + ivec2 firstTileSubCoord = ivec2(gl_LocalInvocationID.xy) * ivec2(1, 4); + ivec2 firstFragCoord = tileCoord * ivec2(uTileSize) + firstTileSubCoord; + + // Quick exit if this is guaranteed to be empty. + int tileIndex = iFirstTileMap[tileCoord.x + uFramebufferTileSize.x * tileCoord.y]; + if (tileIndex < 0 && uLoadAction != LOAD_ACTION_CLEAR) + return; + + mat4 destColors; + for (int subY = 0; subY < 4; subY++) { + if (uLoadAction == LOAD_ACTION_CLEAR) { + destColors[subY] = uClearColor; + } else { + ivec2 imageCoords = toImageCoords(firstFragCoord + ivec2(0, subY)); + destColors[subY] = imageLoad(uDestImage, imageCoords); + } + } + + while (tileIndex >= 0) { + for (int subY = 0; subY < 4; subY++) { + ivec2 tileSubCoord = firstTileSubCoord + ivec2(0, subY); + vec2 fragCoord = vec2(firstFragCoord + ivec2(0, subY)) + vec2(0.5); + + int alphaTileIndex = + int(iTiles[tileIndex * 4 + TILE_FIELD_BACKDROP_ALPHA_TILE_ID] << 8) >> 8; + uint tileControlWord = iTiles[tileIndex * 4 + TILE_FIELD_CONTROL]; + uint colorEntry = tileControlWord & 0xffff; + int tileCtrl = int((tileControlWord >> 16) & 0xff); + + int backdrop; + uvec2 maskTileCoord; + if (alphaTileIndex >= 0) { + backdrop = 0; + maskTileCoord = uvec2(alphaTileIndex & 0xff, alphaTileIndex >> 8) * + uvec2(uTileSize); + } else { + // We have no alpha mask. Clear the mask bits so we don't try to look one up. + backdrop = int(tileControlWord) >> 24; + maskTileCoord = uvec2(0u); + tileCtrl &= ~(TILE_CTRL_MASK_MASK << TILE_CTRL_MASK_0_SHIFT); + } + + vec3 maskTexCoord0 = vec3(vec2(ivec2(maskTileCoord) + tileSubCoord), backdrop); + + vec2 colorTexCoord0; + vec4 baseColor, filterParams0, filterParams1, filterParams2; + int ctrl; + computeTileVaryings(fragCoord, + int(colorEntry), + uTextureMetadata, + uTextureMetadataSize, + colorTexCoord0, + baseColor, + filterParams0, + filterParams1, + filterParams2, + ctrl); + + vec4 srcColor = calculateColor(fragCoord, + uColorTexture0, + uMaskTexture0, + uDestTexture, + uGammaLUT, + uColorTextureSize0, + uMaskTextureSize0, + filterParams0, + filterParams1, + filterParams2, + uFramebufferSize, + ctrl, + maskTexCoord0, + colorTexCoord0, + baseColor, + tileCtrl); + + destColors[subY] = destColors[subY] * (1.0 - srcColor.a) + srcColor; + } + + tileIndex = int(iTiles[tileIndex * 4 + TILE_FIELD_NEXT_TILE_ID]); + } + + for (int subY = 0; subY < 4; subY++) + imageStore(uDestImage, toImageCoords(firstFragCoord + ivec2(0, subY)), destColors[subY]); +} diff --git a/shaders/fill_area.inc.glsl b/shaders/fill_area.inc.glsl new file mode 100644 index 00000000..02c3b6e3 --- /dev/null +++ b/shaders/fill_area.inc.glsl @@ -0,0 +1,27 @@ +// pathfinder/shaders/fill_area.inc.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +vec4 computeCoverage(vec2 from, vec2 to, sampler2D areaLUT) { + // Determine winding, and sort into a consistent order so we only need to find one root below. + vec2 left = from.x < to.x ? from : to, right = from.x < to.x ? to : from; + + // Shoot a vertical ray toward the curve. + vec2 window = clamp(vec2(from.x, to.x), -0.5, 0.5); + float offset = mix(window.x, window.y, 0.5) - left.x; + float t = offset / (right.x - left.x); + + // Compute position and derivative to form a line approximation. + float y = mix(left.y, right.y, t); + float d = (right.y - left.y) / (right.x - left.x); + + // Look up area under that line, and scale horizontally to the window size. + float dX = window.x - window.y; + return texture(areaLUT, vec2(y + 8.0, abs(d * dX)) / 16.0) * dX; +}