diff --git a/Cargo.lock b/Cargo.lock index 8e28591d..f1a8ca1f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -142,6 +142,11 @@ name = "base64" version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "base64" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "bitflags" version = "1.2.1" @@ -172,6 +177,11 @@ name = "bumpalo" version = "3.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "byte-slice-cast" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "bytemuck" version = "1.2.0" @@ -465,7 +475,7 @@ version = "0.1.0" dependencies = [ "pathfinder_export 0.1.0", "pathfinder_svg 0.5.0", - "usvg 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "usvg 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1242,7 +1252,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "kurbo" -version = "0.5.11" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "arrayvec 0.5.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1748,7 +1758,7 @@ dependencies = [ "pathfinder_svg 0.5.0", "pathfinder_ui 0.5.0", "rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "usvg 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "usvg 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1788,8 +1798,10 @@ name = "pathfinder_gpu" version = "0.5.0" dependencies = [ "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", "half 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "image 0.23.3 (registry+https://github.com/rust-lang/crates.io-index)", + "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "pathfinder_color 0.5.0", "pathfinder_geometry 0.5.1", "pathfinder_resources 0.5.0", @@ -1825,7 +1837,7 @@ dependencies = [ "pathfinder_ui 0.5.0", "rayon 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", "smallvec 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "usvg 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "usvg 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -1837,6 +1849,7 @@ dependencies = [ "byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", "cocoa 0.19.1 (registry+https://github.com/rust-lang/crates.io-index)", "core-foundation 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", + "dispatch 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "foreign-types 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "half 1.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "io-surface 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1854,6 +1867,7 @@ name = "pathfinder_renderer" version = "0.5.0" dependencies = [ "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)", + "byte-slice-cast 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam-channel 0.4.2 (registry+https://github.com/rust-lang/crates.io-index)", "fxhash 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1898,7 +1912,7 @@ dependencies = [ "pathfinder_geometry 0.5.1", "pathfinder_renderer 0.5.0", "pathfinder_simd 0.5.0", - "usvg 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "usvg 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2163,7 +2177,7 @@ dependencies = [ [[package]] name = "roxmltree" -version = "0.9.1" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "xmlparser 0.13.1 (registry+https://github.com/rust-lang/crates.io-index)", @@ -2481,7 +2495,7 @@ dependencies = [ name = "svg-to-skia" version = "0.1.0" dependencies = [ - "usvg 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", + "usvg 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -2614,7 +2628,7 @@ dependencies = [ [[package]] name = "ttf-parser" -version = "0.3.0" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] @@ -2668,22 +2682,22 @@ dependencies = [ [[package]] name = "usvg" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "base64 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", + "base64 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)", "data-url 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "flate2 1.0.14 (registry+https://github.com/rust-lang/crates.io-index)", "harfbuzz_rs 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", - "kurbo 0.5.11 (registry+https://github.com/rust-lang/crates.io-index)", + "kurbo 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "memmap2 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "rctree 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)", - "roxmltree 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", + "roxmltree 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)", "simplecss 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "siphasher 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)", "svgtypes 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", - "ttf-parser 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", + "ttf-parser 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-script 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "unicode-vo 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -3059,11 +3073,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum backtrace 0.3.46 (registry+https://github.com/rust-lang/crates.io-index)" = "b1e692897359247cc6bb902933361652380af0f1b7651ae5c5013407f30e109e" "checksum backtrace-sys 0.1.35 (registry+https://github.com/rust-lang/crates.io-index)" = "7de8aba10a69c8e8d7622c5710229485ec32e9d55fdad160ea559c086fdcd118" "checksum base64 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b41b7ea54a0c9d92199de89e20e58d49f02f8e699814ef3fdf266f6f748d15c7" +"checksum base64 0.12.1 (registry+https://github.com/rust-lang/crates.io-index)" = "53d1ccbaf7d9ec9537465a97bf19edc1a4e158ecb49fc16178202238c569cc42" "checksum bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "cf1de2fe8c75bc145a2f577add951f8134889b4795d47466a54a5c846d691693" "checksum blake2b_simd 0.5.10 (registry+https://github.com/rust-lang/crates.io-index)" = "d8fb2d74254a3a0b5cac33ac9f8ed0e44aa50378d9dbb2e5d83bd21ed1dc2c8a" "checksum block 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" "checksum build_const 0.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "39092a32794787acd8525ee150305ff051b0aa6cc2abaf193924f5ab05425f39" "checksum bumpalo 3.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "12ae9db68ad7fac5fe51304d20f016c911539251075a214f8e663babefa35187" +"checksum byte-slice-cast 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "b0a5e3906bcbf133e33c1d4d95afc664ad37fbdb9f6568d8043e7ea8c27d93d3" "checksum bytemuck 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37fa13df2292ecb479ec23aa06f4507928bef07839be9ef15281411076629431" "checksum byteorder 1.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "08c48aae112d48ed9f069b33538ea9e3e90aa263cfa3d1c24309612b1f7472de" "checksum calloop 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)" = "7aa2097be53a00de9e8fc349fea6d76221f398f5c4fa550d420669906962d160" @@ -3166,7 +3182,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" "checksum khronos 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "c0711aaa80e6ba6eb1fa8978f1f46bfcb38ceb2f3f33f3736efbff39dac89f50" "checksum khronos_api 3.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2db585e1d738fc771bf08a151420d3ed193d9d895a36df7f6f8a9456b911ddc" -"checksum kurbo 0.5.11 (registry+https://github.com/rust-lang/crates.io-index)" = "bf50e17a1697110c694d47c5b1a6b64faf5eb3ffe5a286df23fb8cd516e33be6" +"checksum kurbo 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2520c9c9010461ec2b4573599bca458272319a314fd0b9476cacfcb9b6e5adc8" "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" "checksum lazycell 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "b294d6fa9ee409a054354afc4352b0b9ef7ca222c69b8812cbea9e7d2bf3783f" "checksum leak 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "bd100e01f1154f2908dfa7d02219aeab25d0b9c7fa955164192e3245255a0c73" @@ -3239,7 +3255,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum regex 1.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7f6946991529684867e47d86474e3a6d0c0ab9b82d5821e314b1ede31fa3a4b3" "checksum regex-syntax 0.6.17 (registry+https://github.com/rust-lang/crates.io-index)" = "7fe5bd57d1d7414c6b5ed48563a2c855d995ff777729dcd91c369ec7fea395ae" "checksum remove_dir_all 0.5.2 (registry+https://github.com/rust-lang/crates.io-index)" = "4a83fa3702a688b9359eccba92d153ac33fd2e8462f9e0e3fdf155239ea7792e" -"checksum roxmltree 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "99d696b20b92d3e02e08fd8456f0ab03007c99e6b111a6205b9cb6fc044d0957" +"checksum roxmltree 0.11.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d5001f134077069d87f77c8b9452b690df2445f7a43f1c7ca4a1af8dd505789d" "checksum rust-argon2 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "2bc8af4bda8e1ff4932523b94d3dd20ee30a87232323eda55903ffd71d2fb017" "checksum rustc-demangle 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)" = "4c691c0e608126e00913e33f0ccf3727d5fc84573623b8d65b2df340b5201783" "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" @@ -3287,7 +3303,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum tiff 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "002351e428db1eb1d8656d4ca61947c3519ac3191e1c804d4600cd32093b77ad" "checksum time-point 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "06535c958d6abe68dc4b4ef9e6845f758fc42fe463d0093d0aca40254f03fb14" "checksum toml 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ffc92d160b1eef40665be3a05630d003936a3bc7da7421277846c2613e92c71a" -"checksum ttf-parser 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "a67a691cd15aae8f55fcc6e68efec96ec9e6e3ad967ac16f18681e2268c92037" +"checksum ttf-parser 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "52fbe7769f5af5d7d25aea74b9443b64e544a5ffb4d2b2968295ddea934f1a06" "checksum unicode-bidi 0.3.4 (registry+https://github.com/rust-lang/crates.io-index)" = "49f2bd0c6468a8230e1db229cff8029217cf623c767ea5d60bfbd42729ea54d5" "checksum unicode-normalization 0.1.12 (registry+https://github.com/rust-lang/crates.io-index)" = "5479532badd04e128284890390c1e876ef7a993d0570b3597ae43dfa1d59afa4" "checksum unicode-script 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)" = "dc3ca3febe3d301fa4ff250e63a11d9da58390c3f079c736fc6602bcd36449d2" @@ -3296,7 +3312,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum unicode-xid 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" "checksum unreachable 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" -"checksum usvg 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4725473a52c4ebc949d3141d39c97b5131a575a96bea4912ccd5b03a720d7a1b" +"checksum usvg 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "5d98fe4bbd8cfe811fb84dabebd670d26b1e633ecb4d3a4ef3a4b8c10252448d" "checksum vec_map 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "05c78687fb1a80548ae3250346c3db86a80a7cdd77bda190189f2d0a0987c81a" "checksum version_check 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "078775d0255232fb988e6fccf26ddc9d1ac274299aaedcedce21c6f72cc533ce" "checksum void 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)" = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" diff --git a/c/src/lib.rs b/c/src/lib.rs index 83c3a98a..74b994cf 100644 --- a/c/src/lib.rs +++ b/c/src/lib.rs @@ -23,11 +23,13 @@ use pathfinder_geometry::transform2d::{Matrix2x2F, Transform2F}; use pathfinder_geometry::transform3d::{Perspective, Transform4F}; use pathfinder_geometry::vector::{Vector2F, Vector2I}; use pathfinder_gl::{GLDevice, GLVersion}; +use pathfinder_gpu::Device; use pathfinder_resources::ResourceLoader; use pathfinder_resources::fs::FilesystemResourceLoader; use pathfinder_renderer::concurrent::rayon::RayonExecutor; use pathfinder_renderer::concurrent::scene_proxy::SceneProxy; -use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererOptions}; +use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererLevel}; +use pathfinder_renderer::gpu::options::{RendererMode, RendererOptions}; use pathfinder_renderer::gpu::renderer::Renderer; use pathfinder_renderer::options::{BuildOptions, RenderTransform}; use pathfinder_renderer::scene::Scene; @@ -39,7 +41,7 @@ use std::slice; use std::str; #[cfg(all(target_os = "macos", not(feature = "pf-gl")))] -use metal::{CAMetalLayer, CoreAnimationLayerRef, Device}; +use metal::{self, CAMetalLayer, CoreAnimationLayerRef}; #[cfg(all(target_os = "macos", not(feature = "pf-gl")))] use pathfinder_metal::MetalDevice; #[cfg(all(target_os = "macos", not(feature = "pf-gl")))] @@ -74,6 +76,10 @@ pub const PF_GL_VERSION_GLES3: u8 = 1; // `renderer` pub const PF_RENDERER_OPTIONS_FLAGS_HAS_BACKGROUND_COLOR: u8 = 0x1; +pub const PF_RENDERER_OPTIONS_FLAGS_SHOW_DEBUG_UI: u8 = 0x2; + +pub const PF_RENDERER_LEVEL_D3D9: u8 = 0x1; +pub const PF_RENDERER_LEVEL_D3D11: u8 = 0x2; // Types @@ -182,13 +188,20 @@ pub type PFMetalDeviceRef = *mut MetalDevice; pub type PFSceneRef = *mut Scene; pub type PFSceneProxyRef = *mut SceneProxy; #[repr(C)] +pub struct PFRendererMode { + pub level: PFRendererLevel, +} +pub type PFDestFramebufferRef = *mut c_void; +#[repr(C)] pub struct PFRendererOptions { + pub dest: PFDestFramebufferRef, pub background_color: PFColorF, pub flags: PFRendererOptionsFlags, } pub type PFRendererOptionsFlags = u8; pub type PFBuildOptionsRef = *mut BuildOptions; pub type PFRenderTransformRef = *mut RenderTransform; +pub type PFRendererLevel = u8; // `canvas` @@ -541,12 +554,12 @@ pub unsafe extern "C" fn PFGLDestFramebufferDestroy(dest_framebuffer: PFGLDestFr #[no_mangle] pub unsafe extern "C" fn PFGLRendererCreate(device: PFGLDeviceRef, resources: PFResourceLoaderRef, - dest_framebuffer: PFGLDestFramebufferRef, + mode: *const PFRendererMode, options: *const PFRendererOptions) -> PFGLRendererRef { Box::into_raw(Box::new(Renderer::new(*Box::from_raw(device), &*((*resources).0), - *Box::from_raw(dest_framebuffer), + (*mode).to_rust(), (*options).to_rust()))) } @@ -557,7 +570,7 @@ pub unsafe extern "C" fn PFGLRendererDestroy(renderer: PFGLRendererRef) { #[no_mangle] pub unsafe extern "C" fn PFGLRendererGetDevice(renderer: PFGLRendererRef) -> PFGLDeviceRef { - &mut (*renderer).device + (*renderer).device_mut() } #[cfg(all(target_os = "macos", not(feature = "pf-gl")))] @@ -581,12 +594,12 @@ pub unsafe extern "C" fn PFMetalDestFramebufferDestroy(dest_framebuffer: #[no_mangle] pub unsafe extern "C" fn PFMetalRendererCreate(device: PFMetalDeviceRef, resources: PFResourceLoaderRef, - dest_framebuffer: PFMetalDestFramebufferRef, + mode: *const PFRendererMode, options: *const PFRendererOptions) -> PFMetalRendererRef { Box::into_raw(Box::new(Renderer::new(*Box::from_raw(device), &*((*resources).0), - *Box::from_raw(dest_framebuffer), + (*mode).to_rust(), (*options).to_rust()))) } @@ -603,7 +616,7 @@ pub unsafe extern "C" fn PFMetalRendererDestroy(renderer: PFMetalRendererRef) { #[no_mangle] pub unsafe extern "C" fn PFMetalRendererGetDevice(renderer: PFMetalRendererRef) -> PFMetalDeviceRef { - &mut (*renderer).device + (*renderer).device_mut() } /// This function does not take ownership of `renderer` or `build_options`. Therefore, if you @@ -631,7 +644,8 @@ pub unsafe extern "C" fn PFSceneProxyBuildAndRenderMetal(scene_proxy: PFScenePro #[no_mangle] pub unsafe extern "C" fn PFMetalDeviceCreate(layer: *mut CAMetalLayer) -> PFMetalDeviceRef { - let device = Device::system_default().expect("Failed to get Metal system default device!"); + let device = + metal::Device::system_default().expect("Failed to get Metal system default device!"); let layer = CoreAnimationLayerRef::from_ptr(layer); Box::into_raw(Box::new(MetalDevice::new(device, layer.next_drawable().unwrap()))) } @@ -696,9 +710,12 @@ pub unsafe extern "C" fn PFSceneDestroy(scene: PFSceneRef) { } #[no_mangle] -pub unsafe extern "C" fn PFSceneProxyCreateFromSceneAndRayonExecutor(scene: PFSceneRef) +pub unsafe extern "C" fn PFSceneProxyCreateFromSceneAndRayonExecutor(scene: PFSceneRef, + level: PFRendererLevel) -> PFSceneProxyRef { - Box::into_raw(Box::new(SceneProxy::from_scene(*Box::from_raw(scene), RayonExecutor))) + Box::into_raw(Box::new(SceneProxy::from_scene(*Box::from_raw(scene), + to_rust_renderer_level(level), + RayonExecutor))) } #[no_mangle] @@ -807,17 +824,35 @@ impl PFPerspective { // Helpers for `renderer` -impl PFRendererOptions { - pub fn to_rust(&self) -> RendererOptions { - let has_background_color = self.flags & PF_RENDERER_OPTIONS_FLAGS_HAS_BACKGROUND_COLOR; - RendererOptions { - background_color: if has_background_color != 0 { - Some(self.background_color.to_rust()) - } else { - None - }, - // TODO(pcwalton): Expose this in the C API. - no_compute: false, +impl PFRendererMode { + pub fn to_rust(&self) -> RendererMode { + RendererMode { + level: to_rust_renderer_level(self.level), } } } + +impl PFRendererOptions { + pub fn to_rust(&self) -> RendererOptions where D: Device { + let has_background_color = self.flags & PF_RENDERER_OPTIONS_FLAGS_HAS_BACKGROUND_COLOR; + let show_debug_ui = (self.flags & PF_RENDERER_OPTIONS_FLAGS_SHOW_DEBUG_UI) != 0; + unsafe { + RendererOptions { + background_color: if has_background_color != 0 { + Some(self.background_color.to_rust()) + } else { + None + }, + dest: *Box::from_raw(self.dest as *mut DestFramebuffer), + show_debug_ui, + } + } + } +} + +fn to_rust_renderer_level(level: PFRendererLevel) -> RendererLevel { + match level { + PF_RENDERER_LEVEL_D3D9 => RendererLevel::D3D9, + _ => RendererLevel::D3D11, + } +} diff --git a/canvas/src/lib.rs b/canvas/src/lib.rs index 4de6a028..3dc0a8bc 100644 --- a/canvas/src/lib.rs +++ b/canvas/src/lib.rs @@ -165,7 +165,7 @@ impl CanvasRenderingContext2D { let mut path = DrawPath::new(outline, paint_id); path.set_blend_mode(BlendMode::Clear); - self.canvas.scene.push_path(path); + self.canvas.scene.push_draw_path(path); } // Line styles @@ -345,7 +345,7 @@ impl CanvasRenderingContext2D { } path.set_fill_rule(fill_rule); path.set_blend_mode(blend_mode); - self.canvas.scene.push_path(path); + self.canvas.scene.push_draw_path(path); composite_shadow_blur_render_targets_if_needed(&mut self.canvas.scene, shadow_blur_info, @@ -356,7 +356,7 @@ impl CanvasRenderingContext2D { path.set_clip_path(clip_path); path.set_fill_rule(fill_rule); path.set_blend_mode(blend_mode); - self.canvas.scene.push_path(path); + self.canvas.scene.push_draw_path(path); fn push_shadow_blur_render_targets_if_needed(scene: &mut Scene, current_state: &State, @@ -410,9 +410,9 @@ impl CanvasRenderingContext2D { path_y.set_clip_path(clip_path); scene.pop_render_target(); - scene.push_path(path_x); + scene.push_draw_path(path_x); scene.pop_render_target(); - scene.push_path(path_y); + scene.push_draw_path(path_y); } } diff --git a/content/src/outline.rs b/content/src/outline.rs index 8cb4f229..f5bbd58c 100644 --- a/content/src/outline.rs +++ b/content/src/outline.rs @@ -333,6 +333,11 @@ impl Contour { self.points[self.points.len() - index as usize] } + #[inline] + pub fn flags_of(&self, index: u32) -> PointFlags { + self.flags[index as usize] + } + #[inline] pub fn push_endpoint(&mut self, point: Vector2F) { self.push_point(point, PointFlags::empty(), true); diff --git a/content/src/segment.rs b/content/src/segment.rs index 3c607f79..1c2b1c1b 100644 --- a/content/src/segment.rs +++ b/content/src/segment.rs @@ -12,13 +12,11 @@ use pathfinder_geometry::line_segment::LineSegment2F; use pathfinder_geometry::transform2d::Transform2F; -use pathfinder_geometry::util::{self, EPSILON}; +use pathfinder_geometry::util::EPSILON; use pathfinder_geometry::vector::{Vector2F, vec2f}; use pathfinder_simd::default::F32x4; use std::f32::consts::SQRT_2; -const MAX_NEWTON_ITERATIONS: u32 = 32; - #[derive(Clone, Copy, Debug, PartialEq)] pub struct Segment { pub baseline: LineSegment2F, @@ -155,16 +153,6 @@ impl Segment { new_segment } - #[inline] - pub fn is_monotonic(&self) -> bool { - // FIXME(pcwalton): Don't degree elevate! - match self.kind { - SegmentKind::None | SegmentKind::Line => true, - SegmentKind::Quadratic => self.to_cubic().as_cubic_segment().is_monotonic(), - SegmentKind::Cubic => self.as_cubic_segment().is_monotonic(), - } - } - #[inline] pub fn reversed(&self) -> Segment { Segment { @@ -341,72 +329,6 @@ impl<'s> CubicSegment<'s> { self.split(t).0.baseline.to() } - #[inline] - pub fn is_monotonic(self) -> bool { - // TODO(pcwalton): Optimize this. - let (p0, p3) = (self.0.baseline.from_y(), self.0.baseline.to_y()); - let (p1, p2) = (self.0.ctrl.from_y(), self.0.ctrl.to_y()); - (p0 <= p1 && p1 <= p2 && p2 <= p3) || (p0 >= p1 && p1 >= p2 && p2 >= p3) - } - - #[inline] - pub fn y_extrema(self) -> (Option, Option) { - if self.is_monotonic() { - return (None, None); - } - - let p0p1p2p3 = F32x4::new( - self.0.baseline.from_y(), - self.0.ctrl.from_y(), - self.0.ctrl.to_y(), - self.0.baseline.to_y(), - ); - - let pxp0p1p2 = p0p1p2p3.wxyz(); - let pxv0v1v2 = p0p1p2p3 - pxp0p1p2; - let (v0, v1, v2) = (pxv0v1v2[1], pxv0v1v2[2], pxv0v1v2[3]); - - let (t0, t1); - let (v0_to_v1, v2_to_v1) = (v0 - v1, v2 - v1); - let denom = v0_to_v1 + v2_to_v1; - - if util::approx_eq(denom, 0.0) { - // Let's not divide by zero (issue #146). Fall back to Newton's method. - // FIXME(pcwalton): Can we have two roots here? - let mut t = 0.5; - for _ in 0..MAX_NEWTON_ITERATIONS { - let dydt = 3.0 * ((denom * t - v0_to_v1 - v0_to_v1) * t + v0); - if f32::abs(dydt) <= EPSILON { - break - } - let d2ydt2 = 6.0 * (denom * t - v0_to_v1); - t -= dydt / d2ydt2; - } - t0 = t; - t1 = 0.0; - debug!("... t=(newton) {}", t); - } else { - // Algebraically compute the values for t. - let discrim = f32::sqrt(v1 * v1 - v0 * v2); - let denom_recip = 1.0 / denom; - - t0 = (v0_to_v1 + discrim) * denom_recip; - t1 = (v0_to_v1 - discrim) * denom_recip; - - debug!("... t=({} +/- {})/{} t0={} t1={}", v0_to_v1, discrim, denom, t0, t1); - } - - return match ( - t0 > EPSILON && t0 < 1.0 - EPSILON, - t1 > EPSILON && t1 < 1.0 - EPSILON, - ) { - (false, false) => (None, None), - (true, false) => (Some(t0), None), - (false, true) => (Some(t1), None), - (true, true) => (Some(f32::min(t0, t1)), Some(f32::max(t0, t1))), - }; - } - #[inline] pub fn min_x(&self) -> f32 { f32::min(self.0.baseline.min_x(), self.0.ctrl.min_x()) diff --git a/demo/common/src/lib.rs b/demo/common/src/lib.rs index 6794c21f..c3635dfb 100644 --- a/demo/common/src/lib.rs +++ b/demo/common/src/lib.rs @@ -34,9 +34,10 @@ use pathfinder_geometry::transform2d::Transform2F; use pathfinder_geometry::transform3d::Transform4F; use pathfinder_geometry::vector::{Vector2F, Vector2I, Vector4F, vec2f, vec2i}; use pathfinder_gpu::Device; -use pathfinder_renderer::concurrent::scene_proxy::{RenderCommandStream, SceneProxy}; -use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererOptions}; -use pathfinder_renderer::gpu::renderer::{RenderStats, RenderTime, Renderer}; +use pathfinder_renderer::concurrent::scene_proxy::SceneProxy; +use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererLevel}; +use pathfinder_renderer::gpu::options::{RendererMode, RendererOptions}; +use pathfinder_renderer::gpu::renderer::{DebugUIPresenterInfo, Renderer}; use pathfinder_renderer::options::{BuildOptions, RenderTransform}; use pathfinder_renderer::paint::Paint; use pathfinder_renderer::scene::{DrawPath, RenderTarget, Scene}; @@ -90,7 +91,6 @@ pub struct DemoApp where W: Window { svg_tree: Tree, scene_metadata: SceneMetadata, render_transform: Option, - render_command_stream: Option, camera: Camera, frame_counter: u32, @@ -135,14 +135,25 @@ impl DemoApp where W: Window { let executor = DemoExecutor::new(options.jobs); let mut ui_model = DemoUIModel::new(&options); + + let level = match options.renderer_level { + Some(level) => level, + None => RendererLevel::default_for_device(&device), + }; + let viewport = window.viewport(options.mode.view(0)); + let dest_framebuffer = DestFramebuffer::Default { + viewport, + window_size: window_size.device_size(), + }; + let render_mode = RendererMode { level }; let render_options = RendererOptions { + dest: dest_framebuffer, background_color: None, - no_compute: options.no_compute, + show_debug_ui: true, }; let filter = build_filter(&ui_model); - let viewport = window.viewport(options.mode.view(0)); let (mut built_svg, svg_tree) = load_scene(resources, &options.input_path, viewport.size(), @@ -150,21 +161,16 @@ impl DemoApp where W: Window { let message = get_svg_building_message(&built_svg); - let dest_framebuffer = DestFramebuffer::Default { - viewport, - window_size: window_size.device_size(), - }; - - let renderer = Renderer::new(device, resources, dest_framebuffer, render_options); + let renderer = Renderer::new(device, resources, render_mode, render_options); let scene_metadata = SceneMetadata::new_clipping_view_box(&mut built_svg.scene, viewport.size()); let camera = Camera::new(options.mode, scene_metadata.view_box, viewport.size()); - let scene_proxy = SceneProxy::from_scene(built_svg.scene, executor); + let scene_proxy = SceneProxy::from_scene(built_svg.scene, level, executor); - let ground_program = GroundProgram::new(&renderer.device, resources); - let ground_vertex_array = GroundVertexArray::new(&renderer.device, + let ground_program = GroundProgram::new(renderer.device(), resources); + let ground_vertex_array = GroundVertexArray::new(renderer.device(), &ground_program, &renderer.quad_vertex_positions_buffer(), &renderer.quad_vertex_indices_buffer()); @@ -177,7 +183,7 @@ impl DemoApp where W: Window { message, ); - let ui_presenter = DemoUIPresenter::new(&renderer.device, resources); + let ui_presenter = DemoUIPresenter::new(renderer.device(), resources); DemoApp { window, @@ -189,7 +195,6 @@ impl DemoApp where W: Window { svg_tree, scene_metadata, render_transform: None, - render_command_stream: None, camera, frame_counter: 0, @@ -265,7 +270,11 @@ impl DemoApp where W: Window { subpixel_aa_enabled: self.ui_model.subpixel_aa_effect_enabled, }; - self.render_command_stream = Some(self.scene_proxy.build_with_stream(build_options)); + self.scene_proxy.build(build_options); + /* + self.render_command_stream = + Some(self.scene_proxy.build_with_stream(build_options, self.renderer.gpu_features())); + */ } fn handle_events(&mut self, events: Vec) -> Vec { @@ -469,57 +478,41 @@ impl DemoApp where W: Window { pub fn finish_drawing_frame(&mut self) { self.maybe_take_screenshot(); - self.update_stats(); - self.draw_debug_ui(); let frame = self.current_frame.take().unwrap(); for ui_event in &frame.ui_events { self.dirty = true; - self.renderer.debug_ui_presenter.ui_presenter.event_queue.push(*ui_event); + self.renderer + .debug_ui_presenter_mut() + .debug_ui_presenter + .ui_presenter + .event_queue + .push(*ui_event); } - self.renderer.debug_ui_presenter.ui_presenter.mouse_position = + self.renderer.debug_ui_presenter_mut().debug_ui_presenter.ui_presenter.mouse_position = self.last_mouse_position.to_f32() * self.window_size.backing_scale_factor; let mut ui_action = UIAction::None; if self.options.ui == UIVisibility::All { - self.ui_presenter.update( - &self.renderer.device, - &mut self.window, - &mut self.renderer.debug_ui_presenter, - &mut ui_action, - &mut self.ui_model, - ); + let DebugUIPresenterInfo { device, allocator, debug_ui_presenter } = + self.renderer.debug_ui_presenter_mut(); + self.ui_presenter.update(device, + allocator, + &mut self.window, + debug_ui_presenter, + &mut ui_action, + &mut self.ui_model); } self.handle_ui_events(frame, &mut ui_action); - self.renderer.device.end_commands(); + self.renderer.device().end_commands(); - self.window.present(&mut self.renderer.device); + self.window.present(self.renderer.device_mut()); self.frame_counter += 1; } - fn update_stats(&mut self) { - let frame = self.current_frame.as_mut().unwrap(); - if let Some(rendering_time) = self.renderer.shift_rendering_time() { - frame.scene_rendering_times.push(rendering_time); - } - - if frame.scene_stats.is_empty() && frame.scene_rendering_times.is_empty() { - return - } - - let zero = RenderStats::default(); - let aggregate_stats = frame.scene_stats.iter().fold(zero, |sum, item| sum + *item); - if !frame.scene_rendering_times.is_empty() { - let total_rendering_time = frame.scene_rendering_times - .iter() - .fold(RenderTime::default(), |sum, item| sum + *item); - self.renderer.debug_ui_presenter.add_sample(aggregate_stats, total_rendering_time); - } - } - fn maybe_take_screenshot(&mut self) { match self.pending_screenshot_info.take() { None => {} @@ -535,7 +528,12 @@ impl DemoApp where W: Window { } fn handle_ui_events(&mut self, mut frame: Frame, ui_action: &mut UIAction) { - frame.ui_events = self.renderer.debug_ui_presenter.ui_presenter.event_queue.drain(); + frame.ui_events = self.renderer + .debug_ui_presenter_mut() + .debug_ui_presenter + .ui_presenter + .event_queue + .drain(); self.handle_ui_action(ui_action); @@ -625,7 +623,7 @@ pub struct Options { pub ui: UIVisibility, pub background_color: BackgroundColor, pub high_performance_gpu: bool, - pub no_compute: bool, + pub renderer_level: Option, hidden_field_for_future_proofing: (), } @@ -638,7 +636,7 @@ impl Default for Options { ui: UIVisibility::All, background_color: BackgroundColor::Light, high_performance_gpu: false, - no_compute: false, + renderer_level: None, hidden_field_for_future_proofing: (), } } @@ -646,7 +644,7 @@ impl Default for Options { impl Options { pub fn command_line_overrides(&mut self) { - let matches = App::new("tile-svg") + let matches = App::new("demo") .arg( Arg::with_name("jobs") .short("j") @@ -692,10 +690,12 @@ impl Options { .help("Use the high-performance (discrete) GPU, if available") ) .arg( - Arg::with_name("no-compute") - .short("c") - .long("no-compute") - .help("Never use compute shaders") + Arg::with_name("level") + .long("level") + .short("l") + .help("Set the renderer feature level as a Direct3D version equivalent") + .takes_value(true) + .possible_values(&["9", "11"]) ) .arg( Arg::with_name("INPUT") @@ -734,13 +734,17 @@ impl Options { self.high_performance_gpu = true; } - if matches.is_present("no-compute") { - self.no_compute = true; + if let Some(renderer_level) = matches.value_of("level") { + if renderer_level == "11" { + self.renderer_level = Some(RendererLevel::D3D11); + } else if renderer_level == "9" { + self.renderer_level = Some(RendererLevel::D3D9); + } } if let Some(path) = matches.value_of("INPUT") { self.input_path = SVGPath::Path(PathBuf::from(path)); - }; + } } } @@ -798,7 +802,7 @@ fn build_svg_tree(tree: &Tree, viewport_size: Vector2I, filter: Option( struct Frame { transform: RenderTransform, ui_events: Vec, - scene_rendering_times: Vec, - scene_stats: Vec, } impl Frame { fn new(transform: RenderTransform, ui_events: Vec) -> Frame { - Frame { - transform, - ui_events, - scene_rendering_times: vec![], - scene_stats: vec![], - } + Frame { transform, ui_events } } } diff --git a/demo/common/src/renderer.rs b/demo/common/src/renderer.rs index 05a29575..0d1d136f 100644 --- a/demo/common/src/renderer.rs +++ b/demo/common/src/renderer.rs @@ -22,6 +22,7 @@ use pathfinder_geometry::transform3d::Transform4F; use pathfinder_geometry::vector::{Vector2I, Vector4F}; use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererOptions}; use pathfinder_renderer::options::RenderTransform; +use std::mem; use std::path::PathBuf; const GROUND_SOLID_COLOR: ColorU = ColorU { @@ -46,59 +47,58 @@ impl DemoApp where W: Window { let view = self.ui_model.mode.view(0); self.window.make_current(view); - // Set up framebuffers. - let window_size = self.window_size.device_size(); - let mode = self.camera.mode(); - let scene_count = match mode { - Mode::VR => { - let viewport = self.window.viewport(View::Stereo(0)); - if self.scene_framebuffer.is_none() - || self.renderer.device.texture_size( - &self - .renderer - .device - .framebuffer_texture(self.scene_framebuffer.as_ref().unwrap()), - ) != viewport.size() - { - let scene_texture = self - .renderer - .device - .create_texture(TextureFormat::RGBA8, viewport.size()); - self.scene_framebuffer = - Some(self.renderer.device.create_framebuffer(scene_texture)); - } - self.renderer - .replace_dest_framebuffer(DestFramebuffer::Other( - self.scene_framebuffer.take().unwrap(), - )); - 2 - } - _ => { - self.renderer - .replace_dest_framebuffer(DestFramebuffer::Default { - viewport: self.window.viewport(View::Mono), - window_size, - }); - 1 - } - }; - // Clear to the appropriate color. + let mode = self.camera.mode(); let clear_color = match mode { Mode::TwoD => Some(self.ui_model.background_color().to_f32()), Mode::ThreeD => None, Mode::VR => Some(ColorF::transparent_black()), }; - self.renderer.set_options(RendererOptions { - background_color: clear_color, - no_compute: self.options.no_compute, - }); + + // Set up framebuffers. + let window_size = self.window_size.device_size(); + let scene_count = match mode { + Mode::VR => { + let viewport = self.window.viewport(View::Stereo(0)); + if self.scene_framebuffer.is_none() + || self.renderer.device().texture_size( + &self.renderer.device().framebuffer_texture(self.scene_framebuffer + .as_ref() + .unwrap()), + ) != viewport.size() + { + let scene_texture = self + .renderer + .device() + .create_texture(TextureFormat::RGBA8, viewport.size()); + self.scene_framebuffer = + Some(self.renderer.device().create_framebuffer(scene_texture)); + } + *self.renderer.options_mut() = RendererOptions { + dest: DestFramebuffer::Other(self.scene_framebuffer.take().unwrap()), + background_color: clear_color, + show_debug_ui: self.options.ui != UIVisibility::None, + }; + 2 + } + _ => { + *self.renderer.options_mut() = RendererOptions { + dest: DestFramebuffer::Default { + viewport: self.window.viewport(View::Mono), + window_size, + }, + background_color: clear_color, + show_debug_ui: self.options.ui != UIVisibility::None, + }; + 1 + } + }; scene_count } pub fn draw_scene(&mut self) { - self.renderer.device.begin_commands(); + self.renderer.device().begin_commands(); let view = self.ui_model.mode.view(0); self.window.make_current(view); @@ -107,26 +107,29 @@ impl DemoApp where W: Window { self.draw_environment(0); } - self.renderer.device.end_commands(); + self.renderer.device().end_commands(); self.render_vector_scene(); // Reattach default framebuffer. if self.camera.mode() == Mode::VR { - if let DestFramebuffer::Other(scene_framebuffer) = - self.renderer - .replace_dest_framebuffer(DestFramebuffer::Default { - viewport: self.window.viewport(View::Mono), - window_size: self.window_size.device_size(), - }) - { + let new_options = RendererOptions { + dest: DestFramebuffer::Default { + viewport: self.window.viewport(View::Mono), + window_size: self.window_size.device_size(), + }, + ..*self.renderer.options() + }; + if let DestFramebuffer::Other(scene_framebuffer) = mem::replace(self.renderer + .options_mut(), + new_options).dest { self.scene_framebuffer = Some(scene_framebuffer); } } } pub fn begin_compositing(&mut self) { - self.renderer.device.begin_commands(); + self.renderer.device().begin_commands(); } pub fn composite_scene(&mut self, render_scene_index: u32) { @@ -153,15 +156,15 @@ impl DemoApp where W: Window { let viewport = self.window.viewport(View::Stereo(render_scene_index)); self.window.make_current(View::Stereo(render_scene_index)); - self.renderer.replace_dest_framebuffer(DestFramebuffer::Default { + self.renderer.options_mut().dest = DestFramebuffer::Default { viewport, window_size: self.window_size.device_size(), - }); + }; self.draw_environment(render_scene_index); let scene_framebuffer = self.scene_framebuffer.as_ref().unwrap(); - let scene_texture = self.renderer.device.framebuffer_texture(scene_framebuffer); + let scene_texture = self.renderer.device().framebuffer_texture(scene_framebuffer); let mut quad_scale = self.scene_metadata.view_box.size().to_4d(); quad_scale.set_z(1.0); @@ -226,13 +229,14 @@ impl DemoApp where W: Window { None }; - self.renderer.device.draw_elements(6, &RenderState { + self.renderer.device().draw_elements(6, &RenderState { target: &self.renderer.draw_render_target(), program: &self.ground_program.program, vertex_array: &self.ground_vertex_array.vertex_array, primitive: Primitive::Triangles, textures: &[], images: &[], + storage_buffers: &[], uniforms: &[ (&self.ground_program.transform_uniform, UniformData::from_transform_3d(&transform)), @@ -258,27 +262,16 @@ impl DemoApp where W: Window { self.renderer.enable_depth(); } - self.renderer.begin_scene(); - // Issue render commands! - for command in self.render_command_stream.as_mut().unwrap() { - self.renderer.render_command(&command); - } - - self.current_frame - .as_mut() - .unwrap() - .scene_stats - .push(self.renderer.stats); - self.renderer.end_scene(); + self.scene_proxy.render(&mut self.renderer); } pub fn take_raster_screenshot(&mut self, path: PathBuf) { let drawable_size = self.window_size.device_size(); let viewport = RectI::new(Vector2I::default(), drawable_size); let texture_data_receiver = - self.renderer.device.read_pixels(&RenderTarget::Default, viewport); - let pixels = match self.renderer.device.recv_texture_data(&texture_data_receiver) { + self.renderer.device().read_pixels(&RenderTarget::Default, viewport); + let pixels = match self.renderer.device().recv_texture_data(&texture_data_receiver) { TextureData::U8(pixels) => pixels, _ => panic!("Unexpected pixel format for default framebuffer!"), }; @@ -291,19 +284,4 @@ impl DemoApp where W: Window { ) .unwrap(); } - - pub fn draw_debug_ui(&mut self) { - if self.options.ui == UIVisibility::None { - return; - } - - let viewport = self.window.viewport(View::Mono); - self.window.make_current(View::Mono); - self.renderer.replace_dest_framebuffer(DestFramebuffer::Default { - viewport, - window_size: self.window_size.device_size(), - }); - - self.renderer.draw_debug_ui(); - } } diff --git a/demo/common/src/ui.rs b/demo/common/src/ui.rs index 2c29d9e2..e1825478 100644 --- a/demo/common/src/ui.rs +++ b/demo/common/src/ui.rs @@ -14,6 +14,7 @@ use crate::{BackgroundColor, Options}; use pathfinder_color::ColorU; use pathfinder_geometry::rect::RectI; use pathfinder_geometry::vector::{Vector2I, vec2i}; +use pathfinder_gpu::allocator::GPUMemoryAllocator; use pathfinder_gpu::{Device, TextureFormat}; use pathfinder_renderer::gpu::debug::DebugUIPresenter; use pathfinder_resources::ResourceLoader; @@ -97,10 +98,7 @@ impl DemoUIModel { } } -pub struct DemoUIPresenter -where - D: Device, -{ +pub struct DemoUIPresenter where D: Device { effects_texture: D::Texture, open_texture: D::Texture, rotate_texture: D::Texture, @@ -116,11 +114,10 @@ where rotate_panel_visible: bool, } -impl DemoUIPresenter -where - D: Device, -{ +impl DemoUIPresenter where D: Device { pub fn new(device: &D, resources: &dyn ResourceLoader) -> DemoUIPresenter { + device.begin_commands(); + let effects_texture = device.create_texture_from_png(resources, EFFECTS_PNG_NAME, TextureFormat::R8); @@ -146,6 +143,8 @@ where SCREENSHOT_PNG_NAME, TextureFormat::R8); + device.end_commands(); + DemoUIPresenter { effects_texture, open_texture, @@ -163,19 +162,17 @@ where } } - pub fn update( - &mut self, - device: &D, - window: &mut W, - debug_ui_presenter: &mut DebugUIPresenter, - action: &mut UIAction, - model: &mut DemoUIModel - ) where - W: Window, - { + pub fn update(&mut self, + device: &D, + allocator: &mut GPUMemoryAllocator, + window: &mut W, + debug_ui_presenter: &mut DebugUIPresenter, + action: &mut UIAction, + model: &mut DemoUIModel) + where W: Window { // Draw message text. - self.draw_message_text(device, debug_ui_presenter, model); + self.draw_message_text(device, allocator, debug_ui_presenter, model); // Draw button strip. @@ -185,53 +182,60 @@ where let button_size = vec2i(BUTTON_WIDTH, BUTTON_HEIGHT); // Draw effects button. - if debug_ui_presenter.ui_presenter.draw_button(device, position, &self.effects_texture) { + if debug_ui_presenter.ui_presenter + .draw_button(device, allocator, position, &self.effects_texture) { self.effects_panel_visible = !self.effects_panel_visible; } if !self.effects_panel_visible { - debug_ui_presenter.ui_presenter.draw_tooltip( - device, - "Effects", - RectI::new(position, button_size), - ); + debug_ui_presenter.ui_presenter.draw_tooltip(device, + allocator, + "Effects", + RectI::new(position, button_size)); } position += vec2i(button_size.x() + PADDING, 0); // Draw open button. - if debug_ui_presenter.ui_presenter.draw_button(device, position, &self.open_texture) { + if debug_ui_presenter.ui_presenter + .draw_button(device, allocator, position, &self.open_texture) { // FIXME(pcwalton): This is not sufficient for Android, where we will need to take in // the contents of the file. window.present_open_svg_dialog(); } debug_ui_presenter.ui_presenter.draw_tooltip(device, + allocator, "Open SVG", RectI::new(position, button_size)); position += vec2i(BUTTON_WIDTH + PADDING, 0); // Draw screenshot button. - if debug_ui_presenter.ui_presenter.draw_button(device, - position, - &self.screenshot_texture) { + if debug_ui_presenter.ui_presenter + .draw_button(device, allocator, position, &self.screenshot_texture) { self.screenshot_panel_visible = !self.screenshot_panel_visible; } if !self.screenshot_panel_visible { debug_ui_presenter.ui_presenter.draw_tooltip( device, + allocator, "Take Screenshot", RectI::new(position, button_size), ); } // Draw screenshot panel, if necessary. - self.draw_screenshot_panel(device, window, debug_ui_presenter, position.x(), action); + self.draw_screenshot_panel(device, + allocator, + window, + debug_ui_presenter, + position.x(), + action); position += vec2i(button_size.x() + PADDING, 0); // Draw mode switch. - let new_mode = debug_ui_presenter.ui_presenter.draw_text_switch( - device, - position, - &["2D", "3D", "VR"], - model.mode as u8); + let new_mode = debug_ui_presenter.ui_presenter.draw_text_switch(device, + allocator, + position, + &["2D", "3D", "VR"], + model.mode as u8); if new_mode != model.mode as u8 { model.mode = match new_mode { 0 => Mode::TwoD, @@ -245,6 +249,7 @@ where let mode_switch_size = vec2i(mode_switch_width, BUTTON_HEIGHT); debug_ui_presenter.ui_presenter.draw_tooltip( device, + allocator, "2D/3D/VR Mode", RectI::new(position, mode_switch_size), ); @@ -252,6 +257,7 @@ where // Draw background switch. if debug_ui_presenter.ui_presenter.draw_button(device, + allocator, position, &self.background_texture) { self.background_panel_visible = !self.background_panel_visible; @@ -259,50 +265,60 @@ where if !self.background_panel_visible { debug_ui_presenter.ui_presenter.draw_tooltip( device, + allocator, "Background Color", RectI::new(position, button_size), ); } // Draw background panel, if necessary. - self.draw_background_panel(device, debug_ui_presenter, position.x(), action, model); + self.draw_background_panel(device, + allocator, + debug_ui_presenter, + position.x(), + action, + model); position += vec2i(button_size.x() + PADDING, 0); // Draw effects panel, if necessary. - self.draw_effects_panel(device, debug_ui_presenter, model, action); + self.draw_effects_panel(device, allocator, debug_ui_presenter, model, action); // Draw rotate and zoom buttons, if applicable. if model.mode != Mode::TwoD { return; } - if debug_ui_presenter.ui_presenter.draw_button(device, position, &self.rotate_texture) { + if debug_ui_presenter.ui_presenter.draw_button(device, + allocator, + position, + &self.rotate_texture) { self.rotate_panel_visible = !self.rotate_panel_visible; } if !self.rotate_panel_visible { debug_ui_presenter.ui_presenter.draw_tooltip(device, + allocator, "Rotate", RectI::new(position, button_size)); } - self.draw_rotate_panel(device, debug_ui_presenter, position.x(), action, model); + self.draw_rotate_panel(device, allocator, debug_ui_presenter, position.x(), action, model); position += vec2i(BUTTON_WIDTH + PADDING, 0); // Draw zoom control. - self.draw_zoom_control(device, debug_ui_presenter, position, action); + self.draw_zoom_control(device, allocator, debug_ui_presenter, position, action); } - fn draw_zoom_control( - &mut self, - device: &D, - debug_ui_presenter: &mut DebugUIPresenter, - position: Vector2I, - action: &mut UIAction, - ) { + fn draw_zoom_control(&mut self, + device: &D, + allocator: &mut GPUMemoryAllocator, + debug_ui_presenter: &mut DebugUIPresenter, + position: Vector2I, + action: &mut UIAction) { let zoom_segmented_control_width = debug_ui_presenter.ui_presenter.measure_segmented_control(3); let zoom_segmented_control_rect = RectI::new(position, vec2i(zoom_segmented_control_width, BUTTON_HEIGHT)); - debug_ui_presenter.ui_presenter.draw_tooltip(device, "Zoom", zoom_segmented_control_rect); + debug_ui_presenter.ui_presenter + .draw_tooltip(device, allocator, "Zoom", zoom_segmented_control_rect); let zoom_textures = &[ &self.zoom_in_texture, @@ -311,6 +327,7 @@ where ]; match debug_ui_presenter.ui_presenter.draw_image_segmented_control(device, + allocator, position, zoom_textures, None) { @@ -323,6 +340,7 @@ where fn draw_message_text(&mut self, device: &D, + allocator: &mut GPUMemoryAllocator, debug_ui_presenter: &mut DebugUIPresenter, model: &mut DemoUIModel) { if model.message.is_empty() { @@ -334,11 +352,13 @@ where let window_size = vec2i(PADDING * 2 + message_size, TOOLTIP_HEIGHT); debug_ui_presenter.ui_presenter.draw_solid_rounded_rect( device, + allocator, RectI::new(window_origin, window_size), WINDOW_COLOR, ); debug_ui_presenter.ui_presenter.draw_text( device, + allocator, &model.message, window_origin + vec2i(PADDING, PADDING + FONT_ASCENT), false, @@ -347,6 +367,7 @@ where fn draw_effects_panel(&mut self, device: &D, + allocator: &mut GPUMemoryAllocator, debug_ui_presenter: &mut DebugUIPresenter, model: &mut DemoUIModel, action: &mut UIAction) { @@ -358,45 +379,45 @@ where let effects_panel_y = bottom - (BUTTON_HEIGHT + PADDING + EFFECTS_PANEL_HEIGHT); debug_ui_presenter.ui_presenter.draw_solid_rounded_rect( device, + allocator, RectI::new(vec2i(PADDING, effects_panel_y), vec2i(EFFECTS_PANEL_WIDTH, EFFECTS_PANEL_HEIGHT)), - WINDOW_COLOR, - ); + WINDOW_COLOR); - self.draw_effects_switch( - device, - action, - debug_ui_presenter, - "Gamma Correction", - 0, - effects_panel_y, - &mut model.gamma_correction_effect_enabled); - self.draw_effects_switch( - device, - action, - debug_ui_presenter, - "Stem Darkening", - 1, - effects_panel_y, - &mut model.stem_darkening_effect_enabled); - self.draw_effects_switch( - device, - action, - debug_ui_presenter, - "Subpixel AA", - 2, - effects_panel_y, - &mut model.subpixel_aa_effect_enabled); + self.draw_effects_switch(device, + allocator, + action, + debug_ui_presenter, + "Gamma Correction", + 0, + effects_panel_y, + &mut model.gamma_correction_effect_enabled); + self.draw_effects_switch(device, + allocator, + action, + debug_ui_presenter, + "Stem Darkening", + 1, + effects_panel_y, + &mut model.stem_darkening_effect_enabled); + self.draw_effects_switch(device, + allocator, + action, + debug_ui_presenter, + "Subpixel AA", + 2, + effects_panel_y, + &mut model.subpixel_aa_effect_enabled); } - fn draw_screenshot_panel( - &mut self, - device: &D, - window: &mut W, - debug_ui_presenter: &mut DebugUIPresenter, - panel_x: i32, - action: &mut UIAction, - ) where W: Window { + fn draw_screenshot_panel(&mut self, + device: &D, + allocator: &mut GPUMemoryAllocator, + window: &mut W, + debug_ui_presenter: &mut DebugUIPresenter, + panel_x: i32, + action: &mut UIAction) + where W: Window { if !self.screenshot_panel_visible { return; } @@ -406,36 +427,34 @@ where let panel_position = vec2i(panel_x, panel_y); debug_ui_presenter.ui_presenter.draw_solid_rounded_rect( device, + allocator, RectI::new(panel_position, vec2i(SCREENSHOT_PANEL_WIDTH, SCREENSHOT_PANEL_HEIGHT)), WINDOW_COLOR, ); - self.draw_screenshot_menu_item( - device, - window, - debug_ui_presenter, - ScreenshotType::PNG, - panel_position, - action, - ); - self.draw_screenshot_menu_item( - device, - window, - debug_ui_presenter, - ScreenshotType::SVG, - panel_position, - action, - ); + self.draw_screenshot_menu_item(device, + allocator, + window, + debug_ui_presenter, + ScreenshotType::PNG, + panel_position, + action); + self.draw_screenshot_menu_item(device, + allocator, + window, + debug_ui_presenter, + ScreenshotType::SVG, + panel_position, + action); } - fn draw_background_panel( - &mut self, - device: &D, - debug_ui_presenter: &mut DebugUIPresenter, - panel_x: i32, - action: &mut UIAction, - model: &mut DemoUIModel, - ) { + fn draw_background_panel(&mut self, + device: &D, + allocator: &mut GPUMemoryAllocator, + debug_ui_presenter: &mut DebugUIPresenter, + panel_x: i32, + action: &mut UIAction, + model: &mut DemoUIModel) { if !self.background_panel_visible { return; } @@ -445,44 +464,41 @@ where let panel_position = vec2i(panel_x, panel_y); debug_ui_presenter.ui_presenter.draw_solid_rounded_rect( device, + allocator, RectI::new(panel_position, vec2i(BACKGROUND_PANEL_WIDTH, BACKGROUND_PANEL_HEIGHT)), WINDOW_COLOR, ); - self.draw_background_menu_item( - device, - debug_ui_presenter, - BackgroundColor::Light, - panel_position, - action, - model, - ); - self.draw_background_menu_item( - device, - debug_ui_presenter, - BackgroundColor::Dark, - panel_position, - action, - model, - ); - self.draw_background_menu_item( - device, - debug_ui_presenter, - BackgroundColor::Transparent, - panel_position, - action, - model, - ); + self.draw_background_menu_item(device, + allocator, + debug_ui_presenter, + BackgroundColor::Light, + panel_position, + action, + model); + self.draw_background_menu_item(device, + allocator, + debug_ui_presenter, + BackgroundColor::Dark, + panel_position, + action, + model); + self.draw_background_menu_item(device, + allocator, + debug_ui_presenter, + BackgroundColor::Transparent, + panel_position, + action, + model); } - fn draw_rotate_panel( - &mut self, - device: &D, - debug_ui_presenter: &mut DebugUIPresenter, - rotate_panel_x: i32, - action: &mut UIAction, - model: &mut DemoUIModel - ) { + fn draw_rotate_panel(&mut self, + device: &D, + allocator: &mut GPUMemoryAllocator, + debug_ui_presenter: &mut DebugUIPresenter, + rotate_panel_x: i32, + action: &mut UIAction, + model: &mut DemoUIModel) { if !self.rotate_panel_visible { return; } @@ -493,9 +509,9 @@ where let rotate_panel_size = vec2i(ROTATE_PANEL_WIDTH, ROTATE_PANEL_HEIGHT); debug_ui_presenter.ui_presenter.draw_solid_rounded_rect( device, + allocator, RectI::new(rotate_panel_origin, rotate_panel_size), - WINDOW_COLOR, - ); + WINDOW_COLOR); let (widget_x, widget_y) = (rotate_panel_x + PADDING, rotate_panel_y + PADDING); let widget_rect = RectI::new(vec2i(widget_x, widget_y), @@ -503,8 +519,7 @@ where if let Some(position) = debug_ui_presenter .ui_presenter .event_queue - .handle_mouse_down_or_dragged_in_rect(widget_rect) - { + .handle_mouse_down_or_dragged_in_rect(widget_rect) { model.rotation = position.x(); *action = UIAction::Rotate(model.rotation()); } @@ -513,23 +528,25 @@ where rotate_panel_y + PADDING + SLIDER_KNOB_HEIGHT / 2 - SLIDER_TRACK_HEIGHT / 2; let slider_track_rect = RectI::new(vec2i(widget_x, slider_track_y), vec2i(SLIDER_WIDTH, SLIDER_TRACK_HEIGHT)); - debug_ui_presenter.ui_presenter.draw_rect_outline(device, slider_track_rect, TEXT_COLOR); + debug_ui_presenter.ui_presenter + .draw_rect_outline(device, allocator, slider_track_rect, TEXT_COLOR); let slider_knob_x = widget_x + model.rotation - SLIDER_KNOB_WIDTH / 2; let slider_knob_rect = RectI::new(vec2i(slider_knob_x, widget_y), vec2i(SLIDER_KNOB_WIDTH, SLIDER_KNOB_HEIGHT)); - debug_ui_presenter.ui_presenter.draw_solid_rect(device, slider_knob_rect, TEXT_COLOR); + debug_ui_presenter.ui_presenter + .draw_solid_rect(device, allocator, slider_knob_rect, TEXT_COLOR); } - fn draw_screenshot_menu_item( - &mut self, - device: &D, - window: &mut W, - debug_ui_presenter: &mut DebugUIPresenter, - screenshot_type: ScreenshotType, - panel_position: Vector2I, - action: &mut UIAction, - ) where W: Window { + fn draw_screenshot_menu_item(&mut self, + device: &D, + allocator: &mut GPUMemoryAllocator, + window: &mut W, + debug_ui_presenter: &mut DebugUIPresenter, + screenshot_type: ScreenshotType, + panel_position: Vector2I, + action: &mut UIAction) + where W: Window { let index = screenshot_type as i32; let text = format!("Save as {}...", screenshot_type.as_str()); @@ -538,6 +555,7 @@ where let widget_rect = RectI::new(widget_origin, widget_size); if self.draw_menu_item(device, + allocator, debug_ui_presenter, &text, widget_rect, @@ -551,15 +569,14 @@ where } } - fn draw_background_menu_item( - &mut self, - device: &D, - debug_ui_presenter: &mut DebugUIPresenter, - color: BackgroundColor, - panel_position: Vector2I, - action: &mut UIAction, - model: &mut DemoUIModel, - ) { + fn draw_background_menu_item(&mut self, + device: &D, + allocator: &mut GPUMemoryAllocator, + debug_ui_presenter: &mut DebugUIPresenter, + color: BackgroundColor, + panel_position: Vector2I, + action: &mut UIAction, + model: &mut DemoUIModel) { let (text, index) = (color.as_str(), color as i32); let widget_size = vec2i(BACKGROUND_PANEL_WIDTH, BUTTON_HEIGHT); @@ -568,6 +585,7 @@ where let selected = color == model.background_color; if self.draw_menu_item(device, + allocator, debug_ui_presenter, text, widget_rect, @@ -579,20 +597,21 @@ where fn draw_menu_item(&self, device: &D, + allocator: &mut GPUMemoryAllocator, debug_ui_presenter: &mut DebugUIPresenter, text: &str, widget_rect: RectI, selected: bool) -> bool { if selected { - debug_ui_presenter.ui_presenter.draw_solid_rounded_rect(device, - widget_rect, - TEXT_COLOR); + debug_ui_presenter.ui_presenter + .draw_solid_rounded_rect(device, allocator, widget_rect, TEXT_COLOR); } let (text_x, text_y) = (PADDING * 2, BUTTON_TEXT_OFFSET); let text_position = widget_rect.origin() + vec2i(text_x, text_y); - debug_ui_presenter.ui_presenter.draw_text(device, text, text_position, selected); + debug_ui_presenter.ui_presenter + .draw_text(device, allocator, text, text_position, selected); debug_ui_presenter.ui_presenter .event_queue @@ -600,28 +619,31 @@ where .is_some() } - fn draw_effects_switch( - &self, - device: &D, - action: &mut UIAction, - debug_ui_presenter: &mut DebugUIPresenter, - text: &str, - index: i32, - window_y: i32, - value: &mut bool) { + fn draw_effects_switch(&self, + device: &D, + allocator: &mut GPUMemoryAllocator, + action: &mut UIAction, + debug_ui_presenter: &mut DebugUIPresenter, + text: &str, + index: i32, + window_y: i32, + value: &mut bool) { let text_x = PADDING * 2; let text_y = window_y + PADDING + BUTTON_TEXT_OFFSET + (BUTTON_HEIGHT + PADDING) * index; - debug_ui_presenter.ui_presenter.draw_text(device, text, vec2i(text_x, text_y), false); + debug_ui_presenter.ui_presenter + .draw_text(device, allocator, text, vec2i(text_x, text_y), false); let switch_width = debug_ui_presenter.ui_presenter.measure_segmented_control(2); let switch_x = PADDING + EFFECTS_PANEL_WIDTH - (switch_width + PADDING); let switch_y = window_y + PADDING + (BUTTON_HEIGHT + PADDING) * index; let switch_position = vec2i(switch_x, switch_y); - let new_value = - debug_ui_presenter - .ui_presenter - .draw_text_switch(device, switch_position, &["Off", "On"], *value as u8) != 0; + let new_value = debug_ui_presenter.ui_presenter + .draw_text_switch(device, + allocator, + switch_position, + &["Off", "On"], + *value as u8) != 0; if new_value != *value { *action = UIAction::EffectsChanged; diff --git a/examples/c_canvas_minimal/c_canvas_minimal.c b/examples/c_canvas_minimal/c_canvas_minimal.c index 8feabc65..8cb54be4 100644 --- a/examples/c_canvas_minimal/c_canvas_minimal.c +++ b/examples/c_canvas_minimal/c_canvas_minimal.c @@ -58,9 +58,11 @@ int main(int argc, const char **argv) { PFGLDestFramebufferCreateFullWindow(&(PFVector2I){640, 480}); PFGLRendererRef renderer = PFGLRendererCreate(PFGLDeviceCreate(PF_GL_VERSION_GL3, 0), PFFilesystemResourceLoaderLocate(), - dest_framebuffer, + &(PFRendererMode){PF_RENDERER_LEVEL_D3D9}, &(PFRendererOptions){ - (PFColorF){1.0, 1.0, 1.0, 1.0}, PF_RENDERER_OPTIONS_FLAGS_HAS_BACKGROUND_COLOR + dest_framebuffer, + (PFColorF){1.0, 1.0, 1.0, 1.0}, + PF_RENDERER_OPTIONS_FLAGS_HAS_BACKGROUND_COLOR }); // Make a canvas. We're going to draw a house. @@ -86,7 +88,8 @@ int main(int argc, const char **argv) { // Render the canvas to screen. PFSceneRef scene = PFCanvasCreateScene(canvas); - PFSceneProxyRef scene_proxy = PFSceneProxyCreateFromSceneAndRayonExecutor(scene); + PFSceneProxyRef scene_proxy = + PFSceneProxyCreateFromSceneAndRayonExecutor(scene, PF_RENDERER_LEVEL_D3D9); PFSceneProxyBuildAndRenderGL(scene_proxy, renderer, PFBuildOptionsCreate()); SDL_GL_SwapWindow(window); diff --git a/examples/canvas_glutin_minimal/src/main.rs b/examples/canvas_glutin_minimal/src/main.rs index 58c5bfd0..072b4c0d 100644 --- a/examples/canvas_glutin_minimal/src/main.rs +++ b/examples/canvas_glutin_minimal/src/main.rs @@ -24,7 +24,7 @@ use pathfinder_resources::embedded::EmbeddedResourceLoader; use pathfinder_renderer::concurrent::rayon::RayonExecutor; use pathfinder_renderer::concurrent::scene_proxy::SceneProxy; use pathfinder_renderer::gpu::renderer::Renderer; -use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererOptions}; +use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererMode, RendererOptions}; use pathfinder_renderer::options::BuildOptions; fn main() { @@ -48,13 +48,14 @@ fn main() { gl::load_with(|name| gl_context.get_proc_address(name) as *const _); // Create a Pathfinder renderer. - let mut renderer = Renderer::new(GLDevice::new(GLVersion::GL3, 0), - &EmbeddedResourceLoader, - DestFramebuffer::full_window(window_size), - RendererOptions { - background_color: Some(ColorF::white()), - ..RendererOptions::default() - }); + let device = GLDevice::new(GLVersion::GL3, 0); + let mode = RendererMode::default_for_device(&device); + let options = RendererOptions { + background_color: Some(ColorF::white()), + dest: DestFramebuffer::full_window(window_size), + ..RendererOptions::default() + }; + let mut renderer = Renderer::new(device, &EmbeddedResourceLoader, mode, options); // Make a canvas. We're going to draw a house. let font_context = CanvasFontContext::from_system_source(); @@ -78,7 +79,9 @@ fn main() { canvas.stroke_path(path); // Render the canvas to screen. - let scene = SceneProxy::from_scene(canvas.into_canvas().into_scene(), RayonExecutor); + let mut scene = SceneProxy::from_scene(canvas.into_canvas().into_scene(), + renderer.mode().level, + RayonExecutor); scene.build_and_render(&mut renderer, BuildOptions::default()); gl_context.swap_buffers().unwrap(); diff --git a/examples/canvas_metal_minimal/src/main.rs b/examples/canvas_metal_minimal/src/main.rs index ced60d7e..0fa6c208 100644 --- a/examples/canvas_metal_minimal/src/main.rs +++ b/examples/canvas_metal_minimal/src/main.rs @@ -10,14 +10,14 @@ use foreign_types::ForeignTypeRef; use metal::{CAMetalLayer, CoreAnimationLayerRef}; -use pathfinder_canvas::{CanvasFontContext, CanvasRenderingContext2D, Path2D}; +use pathfinder_canvas::{Canvas, CanvasFontContext, Path2D}; use pathfinder_color::ColorF; -use pathfinder_geometry::vector::{Vector2F, Vector2I, vec2f, vec2i}; +use pathfinder_geometry::vector::{vec2f, vec2i}; use pathfinder_geometry::rect::RectF; use pathfinder_metal::MetalDevice; use pathfinder_renderer::concurrent::rayon::RayonExecutor; use pathfinder_renderer::concurrent::scene_proxy::SceneProxy; -use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererOptions}; +use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererMode, RendererOptions}; use pathfinder_renderer::gpu::renderer::Renderer; use pathfinder_renderer::options::BuildOptions; use pathfinder_resources::embedded::EmbeddedResourceLoader; @@ -46,14 +46,18 @@ fn main() { }; // Create a Pathfinder renderer. - let mut renderer = Renderer::new(MetalDevice::new(metal_layer), - &EmbeddedResourceLoader, - DestFramebuffer::full_window(window_size), - RendererOptions { background_color: Some(ColorF::white()) }); + let device = MetalDevice::new(metal_layer); + let mode = RendererMode::default_for_device(&device); + let options = RendererOptions { + dest: DestFramebuffer::full_window(window_size), + background_color: Some(ColorF::white()), + ..RendererOptions::default() + }; + let mut renderer = Renderer::new(device, &EmbeddedResourceLoader, mode, options); // Make a canvas. We're going to draw a house. - let mut canvas = CanvasRenderingContext2D::new(CanvasFontContext::from_system_source(), - window_size.to_f32()); + let canvas = Canvas::new(window_size.to_f32()); + let mut canvas = canvas.get_context_2d(CanvasFontContext::from_system_source()); // Set line width. canvas.set_line_width(10.0); @@ -73,9 +77,11 @@ fn main() { canvas.stroke_path(path); // Render the canvas to screen. - let scene = SceneProxy::from_scene(canvas.into_scene(), RayonExecutor); + let mut scene = SceneProxy::from_scene(canvas.into_canvas().into_scene(), + renderer.mode().level, + RayonExecutor); scene.build_and_render(&mut renderer, BuildOptions::default()); - renderer.device.present_drawable(); + renderer.device().present_drawable(); // Wait for a keypress. let mut event_pump = sdl_context.event_pump().unwrap(); diff --git a/examples/canvas_minimal/src/main.rs b/examples/canvas_minimal/src/main.rs index c80dbf98..646e9b10 100644 --- a/examples/canvas_minimal/src/main.rs +++ b/examples/canvas_minimal/src/main.rs @@ -16,7 +16,7 @@ use pathfinder_geometry::vector::{vec2f, vec2i}; use pathfinder_gl::{GLDevice, GLVersion}; use pathfinder_renderer::concurrent::rayon::RayonExecutor; use pathfinder_renderer::concurrent::scene_proxy::SceneProxy; -use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererOptions}; +use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererMode, RendererOptions}; use pathfinder_renderer::gpu::renderer::Renderer; use pathfinder_renderer::options::BuildOptions; use pathfinder_resources::embedded::EmbeddedResourceLoader; @@ -72,13 +72,14 @@ fn main() { let pathfinder_device = GLDevice::new(GLVersion::GL3, default_framebuffer); // Create a Pathfinder renderer. - let mut renderer = Renderer::new(pathfinder_device, - &EmbeddedResourceLoader::new(), - DestFramebuffer::full_window(framebuffer_size), - RendererOptions { - background_color: Some(ColorF::white()), - ..RendererOptions::default() - }); + let mode = RendererMode::default_for_device(&pathfinder_device); + let options = RendererOptions { + dest: DestFramebuffer::full_window(framebuffer_size), + background_color: Some(ColorF::white()), + ..RendererOptions::default() + }; + let resource_loader = EmbeddedResourceLoader::new(); + let mut renderer = Renderer::new(pathfinder_device, &resource_loader, mode, options); // Make a canvas. We're going to draw a house. let font_context = CanvasFontContext::from_system_source(); @@ -102,7 +103,9 @@ fn main() { canvas.stroke_path(path); // Render the canvas to screen. - let scene = SceneProxy::from_scene(canvas.into_canvas().into_scene(), RayonExecutor); + let mut scene = SceneProxy::from_scene(canvas.into_canvas().into_scene(), + renderer.mode().level, + RayonExecutor); scene.build_and_render(&mut renderer, BuildOptions::default()); // Present the surface. diff --git a/examples/canvas_moire/src/main.rs b/examples/canvas_moire/src/main.rs index 2f308dd7..a1d24ed2 100644 --- a/examples/canvas_moire/src/main.rs +++ b/examples/canvas_moire/src/main.rs @@ -15,7 +15,7 @@ use pathfinder_geometry::vector::{Vector2F, Vector2I, vec2f, vec2i}; use pathfinder_gl::{GLDevice, GLVersion}; use pathfinder_renderer::concurrent::rayon::RayonExecutor; use pathfinder_renderer::concurrent::scene_proxy::SceneProxy; -use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererOptions}; +use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererMode, RendererOptions}; use pathfinder_renderer::gpu::renderer::Renderer; use pathfinder_renderer::options::BuildOptions; use pathfinder_resources::embedded::EmbeddedResourceLoader; @@ -85,13 +85,13 @@ fn main() { let pathfinder_device = GLDevice::new(GLVersion::GL3, default_framebuffer); // Create our renderers. - let renderer = Renderer::new(pathfinder_device, - &EmbeddedResourceLoader, - DestFramebuffer::full_window(framebuffer_size), - RendererOptions { - background_color: Some(ColorF::white()), - ..RendererOptions::default() - }); + let mode = RendererMode::default_for_device(&pathfinder_device); + let options = RendererOptions { + background_color: Some(ColorF::white()), + dest: DestFramebuffer::full_window(framebuffer_size), + ..RendererOptions::default() + }; + let renderer = Renderer::new(pathfinder_device, &EmbeddedResourceLoader, mode, options); let window_size = vec2i(window_size.width, window_size.height); let mut moire_renderer = MoireRenderer::new(renderer, window_size, framebuffer_size); @@ -129,10 +129,11 @@ struct MoireRenderer { impl MoireRenderer { fn new(renderer: Renderer, window_size: Vector2I, drawable_size: Vector2I) -> MoireRenderer { + let level = renderer.mode().level; MoireRenderer { renderer, font_context: CanvasFontContext::from_system_source(), - scene: SceneProxy::new(RayonExecutor), + scene: SceneProxy::new(level, RayonExecutor), frame: 0, window_size, drawable_size, @@ -155,10 +156,7 @@ impl MoireRenderer { let inner_center = window_center + vec2f(1.0, sin_time) * (cos_time * INNER_RADIUS); // Clear to background color. - self.renderer.set_options(RendererOptions { - background_color: Some(background_color), - ..RendererOptions::default() - }); + self.renderer.options_mut().background_color = Some(background_color); // Make a canvas. let mut canvas = diff --git a/examples/canvas_nanovg/src/main.rs b/examples/canvas_nanovg/src/main.rs index a8638235..60620972 100644 --- a/examples/canvas_nanovg/src/main.rs +++ b/examples/canvas_nanovg/src/main.rs @@ -1517,13 +1517,14 @@ fn main() { let pathfinder_device = GLDevice::new(GLVersion::GL3, default_framebuffer); // Create a Pathfinder renderer. + let renderer_options = RendererOptions { + background_color: Some(rgbf(0.3, 0.3, 0.32)), + ..RendererOptions::default_for_device(&pathfinder_device) + }; let mut renderer = Renderer::new(pathfinder_device, &resources, DestFramebuffer::full_window(framebuffer_size), - RendererOptions { - background_color: Some(rgbf(0.3, 0.3, 0.32)), - ..RendererOptions::default() - }); + renderer_options); // Initialize font state. let font_source = Arc::new(MemSource::from_fonts(font_data.into_iter()).unwrap()); @@ -1566,7 +1567,9 @@ fn main() { // Render the canvas to screen. let canvas = context.into_canvas(); - let scene = SceneProxy::from_scene(canvas.into_scene(), RayonExecutor); + let mut scene = SceneProxy::from_scene(canvas.into_scene(), + renderer.level(), + RayonExecutor); scene.build_and_render(&mut renderer, BuildOptions::default()); // Present the rendered canvas via `surfman`. @@ -1575,9 +1578,9 @@ fn main() { device.bind_surface_to_context(&mut gl_context, surface).unwrap(); // Add stats to performance graphs. - if let Some(gpu_time) = renderer.shift_rendering_time() { - let cpu_build_time = renderer.stats.cpu_build_time.as_secs_f32(); - let gpu_time = gpu_time.gpu_time.as_secs_f32(); + if let Some(gpu_time) = renderer.last_rendering_time() { + let cpu_build_time = renderer.stats().cpu_build_time.as_secs_f32(); + let gpu_time = gpu_time.total_time().as_secs_f32(); fps_graph.push(cpu_frame_elapsed_time + cpu_build_time.max(gpu_time)); cpu_graph.push(cpu_frame_elapsed_time + cpu_build_time); gpu_graph.push(gpu_time); diff --git a/examples/canvas_text/src/main.rs b/examples/canvas_text/src/main.rs index 8da632b5..5891e0af 100644 --- a/examples/canvas_text/src/main.rs +++ b/examples/canvas_text/src/main.rs @@ -15,11 +15,11 @@ use pathfinder_geometry::vector::{vec2f, vec2i}; use pathfinder_gl::{GLDevice, GLVersion}; use pathfinder_renderer::concurrent::rayon::RayonExecutor; use pathfinder_renderer::concurrent::scene_proxy::SceneProxy; -use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererOptions}; +use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererMode, RendererOptions}; use pathfinder_renderer::gpu::renderer::Renderer; use pathfinder_renderer::options::BuildOptions; use pathfinder_resources::ResourceLoader; -use pathfinder_resources::embedded::EmbeddedResourceLoader; +use pathfinder_resources::fs::FilesystemResourceLoader; use sdl2::event::Event; use sdl2::keyboard::Keycode; use sdl2::video::GLProfile; @@ -49,14 +49,15 @@ fn main() { window.gl_make_current(&gl_context).unwrap(); // Create a Pathfinder renderer. - let resource_loader = EmbeddedResourceLoader; - let mut renderer = Renderer::new(GLDevice::new(GLVersion::GL3, 0), - &resource_loader, - DestFramebuffer::full_window(window_size), - RendererOptions { - background_color: Some(ColorF::white()), - ..RendererOptions::default() - }); + let resource_loader = FilesystemResourceLoader::locate(); + let device = GLDevice::new(GLVersion::GL3, 0); + let mode = RendererMode::default_for_device(&device); + let options = RendererOptions { + background_color: Some(ColorF::white()), + dest: DestFramebuffer::full_window(window_size), + ..RendererOptions::default() + }; + let mut renderer = Renderer::new(device, &resource_loader, mode, options); // Load a font. let font_data = Arc::new(resource_loader.slurp("fonts/Overpass-Regular.otf").unwrap()); @@ -74,7 +75,9 @@ fn main() { canvas.stroke_text("Goodbye Pathfinder!", vec2f(608.0, 464.0)); // Render the canvas to screen. - let scene = SceneProxy::from_scene(canvas.into_canvas().into_scene(), RayonExecutor); + let mut scene = SceneProxy::from_scene(canvas.into_canvas().into_scene(), + renderer.mode().level, + RayonExecutor); scene.build_and_render(&mut renderer, BuildOptions::default()); window.gl_swap_window(); diff --git a/examples/swf_basic/src/main.rs b/examples/swf_basic/src/main.rs index 19053489..72d99f54 100644 --- a/examples/swf_basic/src/main.rs +++ b/examples/swf_basic/src/main.rs @@ -15,7 +15,7 @@ use pathfinder_gl::{GLDevice, GLVersion}; use pathfinder_renderer::concurrent::rayon::RayonExecutor; use pathfinder_renderer::concurrent::scene_proxy::SceneProxy; use pathfinder_renderer::gpu::renderer::Renderer; -use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererOptions}; +use pathfinder_renderer::gpu::options::{DestFramebuffer, RendererMode, RendererOptions}; use pathfinder_renderer::options::{RenderTransform, BuildOptions}; use pathfinder_resources::ResourceLoader; use pathfinder_resources::embedded::EmbeddedResourceLoader; @@ -98,15 +98,15 @@ fn main() { window.gl_make_current(&gl_context).unwrap(); // Create a Pathfinder renderer. - let mut renderer = Renderer::new( - GLDevice::new(GLVersion::GL3, 0), - &resource_loader, - DestFramebuffer::full_window(pixel_size), - RendererOptions { - background_color: Some(stage.background_color()), - ..RendererOptions::default() - } - ); + let device = GLDevice::new(GLVersion::GL3, 0); + let mode = RendererMode::default_for_device(&device); + let options = RendererOptions { + background_color: Some(stage.background_color()), + dest: DestFramebuffer::full_window(pixel_size), + ..RendererOptions::default() + }; + let mut renderer = Renderer::new(device, &resource_loader, mode, options); + // Clear to swf stage background color. let mut scene = Scene::new(); scene.set_view_box(RectF::new(Vector2F::zero(), @@ -115,7 +115,7 @@ fn main() { draw_paths_into_scene(&library, &mut scene); // Render the canvas to screen. - let scene = SceneProxy::from_scene(scene, RayonExecutor); + let mut scene = SceneProxy::from_scene(scene, renderer.mode().level, RayonExecutor); let mut build_options = BuildOptions::default(); let scale_transform = Transform2F::from_scale(device_pixel_ratio); build_options.transform = RenderTransform::Transform2D(scale_transform); diff --git a/export/src/lib.rs b/export/src/lib.rs index 4a4ab5b5..6db4161c 100644 --- a/export/src/lib.rs +++ b/export/src/lib.rs @@ -10,8 +10,8 @@ use pathfinder_content::outline::ContourIterFlags; use pathfinder_content::segment::SegmentKind; -use pathfinder_renderer::scene::Scene; use pathfinder_geometry::vector::{Vector2F, vec2f}; +use pathfinder_renderer::scene::{DrawPath, Scene}; use std::fmt; use std::io::{self, Write}; @@ -53,7 +53,8 @@ fn export_svg(scene: &Scene, writer: &mut W) -> io::Result<()> { view_box.size().x(), view_box.size().y() )?; - for (paint, outline, name) in scene.paths() { + for &DrawPath { paint: paint_id, ref outline, ref name, .. } in scene.draw_paths() { + let paint = scene.palette().paints.get(paint_id.0 as usize).unwrap(); write!(writer, " (scene: &Scene, writer: &mut W) -> io::Result<()> { vec2f(r.x(), height - r.y()) }; - for (paint, outline, _) in scene.paths() { + for &DrawPath { paint: paint_id, ref outline, .. } in scene.draw_paths() { // TODO(pcwalton): Gradients and patterns. + let paint = scene.palette().paints.get(paint_id.0 as usize).unwrap(); if paint.is_color() { pdf.set_fill_color(paint.base_color()); } @@ -138,7 +140,7 @@ fn export_ps(scene: &Scene, writer: &mut W) -> io::Result<()> { writeln!(writer, "0 {} translate", view_box.size().y())?; writeln!(writer, "1 -1 scale")?; - for (paint, outline, name) in scene.paths() { + for &DrawPath { paint: paint_id, ref outline, ref name, .. } in scene.draw_paths() { if !name.is_empty() { writeln!(writer, "newpath % {}", name)?; } else { @@ -180,6 +182,7 @@ fn export_ps(scene: &Scene, writer: &mut W) -> io::Result<()> { } // TODO(pcwalton): Gradients and patterns. + let paint = scene.palette().paints.get(paint_id.0 as usize).unwrap(); if paint.is_color() { let color = paint.base_color(); writeln!(writer, "{} {} {} setrgbcolor", color.r, color.g, color.b)?; diff --git a/geometry/src/line_segment.rs b/geometry/src/line_segment.rs index 43ccdc89..aed60bb9 100644 --- a/geometry/src/line_segment.rs +++ b/geometry/src/line_segment.rs @@ -293,16 +293,9 @@ impl MulAssign for LineSegment2F { #[derive(Clone, Copy, Debug, Default)] #[repr(C)] -pub struct LineSegmentU4 { - pub from: u8, - pub to: u8, -} - -#[derive(Clone, Copy, Debug, Default)] -#[repr(C)] -pub struct LineSegmentU8 { - pub from_x: u8, - pub from_y: u8, - pub to_x: u8, - pub to_y: u8, +pub struct LineSegmentU16 { + pub from_x: u16, + pub from_y: u16, + pub to_x: u16, + pub to_y: u16, } diff --git a/geometry/src/rect.rs b/geometry/src/rect.rs index 08625a29..34a988e7 100644 --- a/geometry/src/rect.rs +++ b/geometry/src/rect.rs @@ -327,6 +327,11 @@ impl RectI { self.0.w() - self.0.y() } + #[inline] + pub fn area(self) -> i32 { + self.width() * self.height() + } + #[inline] pub fn upper_right(&self) -> Vector2I { Vector2I(self.0.zy()) diff --git a/geometry/src/vector.rs b/geometry/src/vector.rs index 711818cd..444fce50 100644 --- a/geometry/src/vector.rs +++ b/geometry/src/vector.rs @@ -349,6 +349,11 @@ impl Vector2I { Vector2I(self.0.max(other.0)) } + #[inline] + pub fn area(self) -> i32 { + self.x() * self.y() + } + #[inline] pub fn to_f32(self) -> Vector2F { Vector2F(self.0.to_f32x2()) diff --git a/gl/src/lib.rs b/gl/src/lib.rs index 6f5b45d6..2fce37a4 100644 --- a/gl/src/lib.rs +++ b/gl/src/lib.rs @@ -21,15 +21,18 @@ use pathfinder_geometry::vector::Vector2I; use pathfinder_gpu::{BlendFactor, BlendOp, BufferData, BufferTarget, BufferUploadMode, ClearOps}; use pathfinder_gpu::{ComputeDimensions, ComputeState, DepthFunc, Device, FeatureLevel}; use pathfinder_gpu::{ImageAccess, ImageBinding, Primitive, ProgramKind, RenderOptions}; -use pathfinder_gpu::{RenderState, RenderTarget, ShaderKind, StencilFunc, TextureBinding, TextureData}; -use pathfinder_gpu::{TextureDataRef, TextureFormat, TextureSamplingFlags, UniformData}; +use pathfinder_gpu::{RenderState, RenderTarget, ShaderKind, StencilFunc, TextureBinding}; +use pathfinder_gpu::{TextureData, TextureDataRef, TextureFormat, TextureSamplingFlags, UniformData}; use pathfinder_gpu::{VertexAttrClass, VertexAttrDescriptor, VertexAttrType}; use pathfinder_resources::ResourceLoader; use pathfinder_simd::default::F32x4; use std::cell::RefCell; -use std::ffi::CString; +use std::ffi::{CStr, CString}; use std::mem; +use std::ops::Range; +use std::os::raw::c_char; use std::ptr; +use std::rc::Rc; use std::str; use std::time::Duration; @@ -83,6 +86,10 @@ impl GLDevice { &render_state.textures, &render_state.images); + for &(storage_buffer, buffer) in render_state.storage_buffers { + self.set_storage_buffer(storage_buffer, buffer); + } + render_state.uniforms.iter().for_each(|(uniform, data)| self.set_uniform(uniform, data)); self.set_render_options(&render_state.options); @@ -241,7 +248,7 @@ impl GLDevice { unsafe { gl::BindBufferBase(gl::SHADER_STORAGE_BUFFER, storage_buffer.location as GLuint, - buffer.gl_buffer); + buffer.object.gl_buffer); } } @@ -254,6 +261,10 @@ impl GLDevice { fn reset_render_state(&self, render_state: &RenderState) { self.reset_render_options(&render_state.options); + for &(storage_buffer, _) in render_state.storage_buffers { + self.unset_storage_buffer(storage_buffer); + } + unsafe { for image_binding in render_state.images { self.unbind_image(image_binding.0.image_unit); @@ -310,6 +321,7 @@ impl GLDevice { impl Device for GLDevice { type Buffer = GLBuffer; + type BufferDataReceiver = GLBufferDataReceiver; type Fence = GLFence; type Framebuffer = GLFramebuffer; type ImageParameter = GLImageParameter; @@ -324,6 +336,19 @@ impl Device for GLDevice { type VertexArray = GLVertexArray; type VertexAttr = GLVertexAttr; + #[inline] + fn backend_name(&self) -> &'static str { + "OpenGL" + } + + #[inline] + fn device_name(&self) -> String { + unsafe { + CStr::from_ptr(gl::GetString(gl::RENDERER) as *const c_char).to_string_lossy() + .to_string() + } + } + fn feature_level(&self) -> FeatureLevel { match self.version { GLVersion::GL3 | GLVersion::GLES3 => FeatureLevel::D3D10, @@ -582,14 +607,12 @@ impl Device for GLDevice { unsafe { let mut gl_buffer = 0; gl::GenBuffers(1, &mut gl_buffer); ck(); - GLBuffer { gl_buffer, mode } + let object = Rc::new(GLBufferObject { gl_buffer }); + GLBuffer { object, mode } } } - fn allocate_buffer(&self, - buffer: &GLBuffer, - data: BufferData, - target: BufferTarget) { + fn allocate_buffer(&self, buffer: &GLBuffer, data: BufferData, target: BufferTarget) { let target = target.to_gl_target(); let (ptr, len) = match data { BufferData::Uninitialized(len) => (ptr::null(), len), @@ -598,7 +621,7 @@ impl Device for GLDevice { let len = (len * mem::size_of::()) as GLsizeiptr; let usage = buffer.mode.to_gl_usage(); unsafe { - gl::BindBuffer(target, buffer.gl_buffer); ck(); + gl::BindBuffer(target, buffer.object.gl_buffer); ck(); gl::BufferData(target, len, ptr, usage); ck(); } } @@ -611,7 +634,7 @@ impl Device for GLDevice { let target = target.to_gl_target(); let len = (data.len() * mem::size_of::()) as GLsizeiptr; unsafe { - gl::BindBuffer(target, buffer.gl_buffer); ck(); + gl::BindBuffer(target, buffer.object.gl_buffer); ck(); gl::BufferSubData(target, position as GLintptr, len, @@ -748,6 +771,14 @@ impl Device for GLDevice { } } + fn read_buffer(&self, buffer: &GLBuffer, target: BufferTarget, range: Range) + -> GLBufferDataReceiver { + unsafe { + let gl_sync = gl::FenceSync(gl::SYNC_GPU_COMMANDS_COMPLETE, 0); + GLBufferDataReceiver { object: buffer.object.clone(), gl_sync, range, target } + } + } + fn begin_commands(&self) { // TODO(pcwalton): Add some checks in debug mode to make sure render commands are bracketed // by these? @@ -867,11 +898,34 @@ impl Device for GLDevice { } } + fn try_recv_buffer(&self, receiver: &Self::BufferDataReceiver) -> Option> { + unsafe { + let result = gl::ClientWaitSync(receiver.gl_sync, + gl::SYNC_FLUSH_COMMANDS_BIT, + 0); ck(); + if result == gl::TIMEOUT_EXPIRED || result == gl::WAIT_FAILED { + None + } else { + Some(self.get_buffer_data(receiver)) + } + } + } + + fn recv_buffer(&self, receiver: &Self::BufferDataReceiver) -> Vec { + unsafe { + let result = gl::ClientWaitSync(receiver.gl_sync, + gl::SYNC_FLUSH_COMMANDS_BIT, + !0); ck(); + debug_assert!(result != gl::TIMEOUT_EXPIRED && result != gl::WAIT_FAILED); + self.get_buffer_data(receiver) + } + } + #[inline] fn bind_buffer(&self, vertex_array: &GLVertexArray, buffer: &GLBuffer, target: BufferTarget) { self.bind_vertex_array(vertex_array); unsafe { - gl::BindBuffer(target.to_gl_target(), buffer.gl_buffer); ck(); + gl::BindBuffer(target.to_gl_target(), buffer.object.gl_buffer); ck(); } self.unbind_vertex_array(); } @@ -964,7 +1018,7 @@ impl GLDevice { fn unbind_image(&self, unit: u32) { unsafe { - gl::BindImageTexture(unit, 0, 0, gl::FALSE, 0, 0, 0); ck(); + gl::BindImageTexture(unit, 0, 0, gl::FALSE, 0, gl::READ_ONLY, gl::RGBA8); ck(); } } @@ -1092,6 +1146,19 @@ impl GLDevice { texture_data } } + + fn get_buffer_data(&self, receiver: &GLBufferDataReceiver) -> Vec { + let mut dest = vec![0; receiver.range.end - receiver.range.start]; + let gl_target = receiver.target.to_gl_target(); + unsafe { + gl::BindBuffer(gl_target, receiver.object.gl_buffer); ck(); + gl::GetBufferSubData(gl_target, + receiver.range.start as GLintptr, + (receiver.range.end - receiver.range.start) as GLsizeiptr, + dest.as_mut_ptr() as *mut GLvoid); ck(); + } + dest + } } pub struct GLVertexArray { @@ -1175,11 +1242,15 @@ impl Drop for GLFramebuffer { } pub struct GLBuffer { - pub gl_buffer: GLuint, + pub object: Rc, pub mode: BufferUploadMode, } -impl Drop for GLBuffer { +pub struct GLBufferObject { + pub gl_buffer: GLuint, +} + +impl Drop for GLBufferObject { fn drop(&mut self) { unsafe { gl::DeleteBuffers(1, &mut self.gl_buffer); ck(); @@ -1397,7 +1468,7 @@ impl TextureFormatExt for TextureFormat { match self { TextureFormat::R8 => gl::R8 as GLint, TextureFormat::R16F => gl::R16F as GLint, - TextureFormat::RGBA8 => gl::RGBA as GLint, + TextureFormat::RGBA8 => gl::RGBA8 as GLint, TextureFormat::RGBA16F => gl::RGBA16F as GLint, TextureFormat::RGBA32F => gl::RGBA32F as GLint, } @@ -1427,14 +1498,22 @@ impl VertexAttrTypeExt for VertexAttrType { fn to_gl_type(self) -> GLuint { match self { VertexAttrType::F32 => gl::FLOAT, - VertexAttrType::I16 => gl::SHORT, VertexAttrType::I8 => gl::BYTE, - VertexAttrType::U16 => gl::UNSIGNED_SHORT, + VertexAttrType::I16 => gl::SHORT, + VertexAttrType::I32 => gl::INT, VertexAttrType::U8 => gl::UNSIGNED_BYTE, + VertexAttrType::U16 => gl::UNSIGNED_SHORT, } } } +pub struct GLBufferDataReceiver { + object: Rc, + gl_sync: GLsync, + range: Range, + target: BufferTarget, +} + pub struct GLTextureDataReceiver { gl_pixel_buffer: GLuint, gl_sync: GLsync, diff --git a/gpu/Cargo.toml b/gpu/Cargo.toml index d9585819..72c023b1 100644 --- a/gpu/Cargo.toml +++ b/gpu/Cargo.toml @@ -10,7 +10,9 @@ homepage = "https://github.com/servo/pathfinder" [dependencies] bitflags = "1.0" +fxhash = "0.2" half = "1.5" +log = "0.4" [dependencies.image] version = "0.23" diff --git a/gpu/src/allocator.rs b/gpu/src/allocator.rs new file mode 100644 index 00000000..fd248c1b --- /dev/null +++ b/gpu/src/allocator.rs @@ -0,0 +1,400 @@ +// pathfinder/gpu/src/gpu/allocator.rs +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! GPU memory management. + +use crate::{BufferData, BufferTarget, BufferUploadMode, Device, TextureFormat}; +use fxhash::FxHashMap; +use pathfinder_geometry::vector::Vector2I; +use std::collections::VecDeque; +use std::default::Default; +use std::mem; +use std::time::Instant; + +// Everything above 16 MB is allocated exactly. +const MAX_BUFFER_SIZE_CLASS: u64 = 16 * 1024 * 1024; + +// Number of seconds before unused memory is purged. +// +// TODO(pcwalton): jemalloc uses a sigmoidal decay curve here. Consider something similar. +const DECAY_TIME: f32 = 0.250; + +// Number of seconds before we can reuse an object buffer. +// +// This helps avoid stalls. This is admittedly a bit of a hack. +const REUSE_TIME: f32 = 0.015; + +pub struct GPUMemoryAllocator where D: Device { + buffers_in_use: FxHashMap>, + textures_in_use: FxHashMap>, + framebuffers_in_use: FxHashMap>, + free_objects: VecDeque>, + next_buffer_id: BufferID, + next_texture_id: TextureID, + next_framebuffer_id: FramebufferID, + bytes_committed: u64, + bytes_allocated: u64, +} + +struct BufferAllocation where D: Device { + buffer: D::Buffer, + size: u64, + tag: BufferTag, +} + +struct TextureAllocation where D: Device { + texture: D::Texture, + descriptor: TextureDescriptor, + tag: TextureTag, +} + +struct FramebufferAllocation where D: Device { + framebuffer: D::Framebuffer, + descriptor: TextureDescriptor, + tag: FramebufferTag, +} + +struct FreeObject where D: Device { + timestamp: Instant, + kind: FreeObjectKind, +} + +enum FreeObjectKind where D: Device { + Buffer { id: BufferID, allocation: BufferAllocation }, + Texture { id: TextureID, allocation: TextureAllocation }, + Framebuffer { id: FramebufferID, allocation: FramebufferAllocation }, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TextureDescriptor { + width: u32, + height: u32, + format: TextureFormat, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct BufferID(pub u64); + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TextureID(pub u64); + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct FramebufferID(pub u64); + +// For debugging and profiling. +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub struct BufferTag(pub &'static str); + +// For debugging and profiling. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct TextureTag(pub &'static str); + +// For debugging and profiling. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct FramebufferTag(pub &'static str); + +impl GPUMemoryAllocator where D: Device { + pub fn new() -> GPUMemoryAllocator { + GPUMemoryAllocator { + buffers_in_use: FxHashMap::default(), + textures_in_use: FxHashMap::default(), + framebuffers_in_use: FxHashMap::default(), + free_objects: VecDeque::new(), + next_buffer_id: BufferID(0), + next_texture_id: TextureID(0), + next_framebuffer_id: FramebufferID(0), + bytes_committed: 0, + bytes_allocated: 0, + } + } + + pub fn allocate_buffer(&mut self, device: &D, size: u64, tag: BufferTag) -> BufferID { + let mut byte_size = size * mem::size_of::() as u64; + if byte_size < MAX_BUFFER_SIZE_CLASS { + byte_size = byte_size.next_power_of_two(); + } + + let now = Instant::now(); + + for free_object_index in 0..self.free_objects.len() { + match self.free_objects[free_object_index] { + FreeObject { + ref timestamp, + kind: FreeObjectKind::Buffer { ref allocation, .. }, + } if allocation.size == byte_size && + (now - *timestamp).as_secs_f32() >= REUSE_TIME => {} + _ => continue, + } + + let (id, mut allocation) = match self.free_objects.remove(free_object_index) { + Some(FreeObject { kind: FreeObjectKind::Buffer { id, allocation }, .. }) => { + (id, allocation) + } + _ => unreachable!(), + }; + + allocation.tag = tag; + self.bytes_committed += allocation.size; + self.buffers_in_use.insert(id, allocation); + return id; + } + + let buffer = device.create_buffer(BufferUploadMode::Dynamic); + device.allocate_buffer::(&buffer, + BufferData::Uninitialized(byte_size as usize), + BufferTarget::Vertex); + + let id = self.next_buffer_id; + self.next_buffer_id.0 += 1; + + debug!("mapping buffer: {:?} {} ({}x{}) {:?}", + id, + byte_size, + size, + mem::size_of::(), + tag); + + self.buffers_in_use.insert(id, BufferAllocation { buffer, size: byte_size, tag }); + self.bytes_allocated += byte_size; + self.bytes_committed += byte_size; + + id + } + + pub fn allocate_texture(&mut self, + device: &D, + size: Vector2I, + format: TextureFormat, + tag: TextureTag) + -> TextureID { + let descriptor = TextureDescriptor { + width: size.x() as u32, + height: size.y() as u32, + format, + }; + let byte_size = descriptor.byte_size(); + + for free_object_index in 0..self.free_objects.len() { + match self.free_objects[free_object_index] { + FreeObject { kind: FreeObjectKind::Texture { ref allocation, .. }, .. } if + allocation.descriptor == descriptor => {} + _ => continue, + } + + let (id, mut allocation) = match self.free_objects.remove(free_object_index) { + Some(FreeObject { kind: FreeObjectKind::Texture { id, allocation }, .. }) => { + (id, allocation) + } + _ => unreachable!(), + }; + + allocation.tag = tag; + self.bytes_committed += allocation.descriptor.byte_size(); + self.textures_in_use.insert(id, allocation); + return id; + } + + debug!("mapping texture: {:?} {:?}", descriptor, tag); + + let texture = device.create_texture(format, size); + let id = self.next_texture_id; + self.next_texture_id.0 += 1; + + self.textures_in_use.insert(id, TextureAllocation { texture, descriptor, tag }); + + self.bytes_allocated += byte_size; + self.bytes_committed += byte_size; + + id + } + + pub fn allocate_framebuffer(&mut self, + device: &D, + size: Vector2I, + format: TextureFormat, + tag: FramebufferTag) + -> FramebufferID { + let descriptor = TextureDescriptor { + width: size.x() as u32, + height: size.y() as u32, + format, + }; + let byte_size = descriptor.byte_size(); + + for free_object_index in 0..self.free_objects.len() { + match self.free_objects[free_object_index].kind { + FreeObjectKind::Framebuffer { ref allocation, .. } if allocation.descriptor == + descriptor => {} + _ => continue, + } + + let (id, mut allocation) = match self.free_objects.remove(free_object_index) { + Some(FreeObject { kind: FreeObjectKind::Framebuffer { id, allocation }, .. }) => { + (id, allocation) + } + _ => unreachable!(), + }; + + allocation.tag = tag; + self.bytes_committed += allocation.descriptor.byte_size(); + self.framebuffers_in_use.insert(id, allocation); + return id; + } + + debug!("mapping framebuffer: {:?} {:?}", descriptor, tag); + + let texture = device.create_texture(format, size); + let framebuffer = device.create_framebuffer(texture); + let id = self.next_framebuffer_id; + self.next_framebuffer_id.0 += 1; + + self.framebuffers_in_use.insert(id, FramebufferAllocation { + framebuffer, + descriptor, + tag, + }); + + self.bytes_allocated += byte_size; + self.bytes_committed += byte_size; + + id + } + + pub fn purge_if_needed(&mut self) { + let now = Instant::now(); + loop { + match self.free_objects.front() { + Some(FreeObject { timestamp, .. }) if (now - *timestamp).as_secs_f32() >= + DECAY_TIME => {} + _ => break, + } + match self.free_objects.pop_front() { + None => break, + Some(FreeObject { kind: FreeObjectKind::Buffer { allocation, .. }, .. }) => { + debug!("purging buffer: {}", allocation.size); + self.bytes_allocated -= allocation.size; + } + Some(FreeObject { kind: FreeObjectKind::Texture { allocation, .. }, .. }) => { + debug!("purging texture: {:?}", allocation.descriptor); + self.bytes_allocated -= allocation.descriptor.byte_size(); + } + Some(FreeObject { kind: FreeObjectKind::Framebuffer { allocation, .. }, .. }) => { + debug!("purging framebuffer: {:?}", allocation.descriptor); + self.bytes_allocated -= allocation.descriptor.byte_size(); + } + } + } + } + + pub fn free_buffer(&mut self, id: BufferID) { + let allocation = self.buffers_in_use + .remove(&id) + .expect("Attempted to free unallocated buffer!"); + self.bytes_committed -= allocation.size; + self.free_objects.push_back(FreeObject { + timestamp: Instant::now(), + kind: FreeObjectKind::Buffer { id, allocation }, + }); + } + + pub fn free_texture(&mut self, id: TextureID) { + let allocation = self.textures_in_use + .remove(&id) + .expect("Attempted to free unallocated texture!"); + let byte_size = allocation.descriptor.byte_size(); + self.bytes_committed -= byte_size; + self.free_objects.push_back(FreeObject { + timestamp: Instant::now(), + kind: FreeObjectKind::Texture { id, allocation }, + }); + } + + pub fn free_framebuffer(&mut self, id: FramebufferID) { + let allocation = self.framebuffers_in_use + .remove(&id) + .expect("Attempted to free unallocated framebuffer!"); + let byte_size = allocation.descriptor.byte_size(); + self.bytes_committed -= byte_size; + self.free_objects.push_back(FreeObject { + timestamp: Instant::now(), + kind: FreeObjectKind::Framebuffer { id, allocation }, + }); + } + + pub fn get_buffer(&self, id: BufferID) -> &D::Buffer { + &self.buffers_in_use[&id].buffer + } + + pub fn get_texture(&self, id: TextureID) -> &D::Texture { + &self.textures_in_use[&id].texture + } + + pub fn get_framebuffer(&self, id: FramebufferID) -> &D::Framebuffer { + &self.framebuffers_in_use[&id].framebuffer + } + + #[inline] + pub fn bytes_allocated(&self) -> u64 { + self.bytes_allocated + } + + #[inline] + pub fn bytes_committed(&self) -> u64 { + self.bytes_committed + } + + #[allow(dead_code)] + pub fn dump(&self) { + println!("GPU memory dump"); + println!("---------------"); + + println!("Buffers:"); + let mut ids: Vec = self.buffers_in_use.keys().cloned().collect(); + ids.sort(); + for id in ids { + let allocation = &self.buffers_in_use[&id]; + println!("id {:?}: {:?} ({:?} B)", id, allocation.tag, allocation.size); + } + + println!("Textures:"); + let mut ids: Vec = self.textures_in_use.keys().cloned().collect(); + ids.sort(); + for id in ids { + let allocation = &self.textures_in_use[&id]; + println!("id {:?}: {:?} {:?}x{:?} {:?} ({:?} B)", + id, + allocation.tag, + allocation.descriptor.width, + allocation.descriptor.height, + allocation.descriptor.format, + allocation.descriptor.byte_size()); + } + + println!("Framebuffers:"); + let mut ids: Vec = self.framebuffers_in_use.keys().cloned().collect(); + ids.sort(); + for id in ids { + let allocation = &self.framebuffers_in_use[&id]; + println!("id {:?}: {:?} {:?}x{:?} {:?} ({:?} B)", + id, + allocation.tag, + allocation.descriptor.width, + allocation.descriptor.height, + allocation.descriptor.format, + allocation.descriptor.byte_size()); + } + } +} + +impl TextureDescriptor { + fn byte_size(&self) -> u64 { + self.width as u64 * self.height as u64 * self.format.bytes_per_pixel() as u64 + } +} diff --git a/gpu/src/lib.rs b/gpu/src/lib.rs index 34f7e3ec..440194ba 100644 --- a/gpu/src/lib.rs +++ b/gpu/src/lib.rs @@ -12,6 +12,10 @@ #[macro_use] extern crate bitflags; +#[macro_use] +extern crate log; + +pub mod allocator; use half::f16; use image::ImageFormat; @@ -21,11 +25,13 @@ use pathfinder_geometry::transform3d::Transform4F; use pathfinder_geometry::vector::{Vector2I, vec2i}; use pathfinder_resources::ResourceLoader; use pathfinder_simd::default::{F32x2, F32x4, I32x2}; +use std::ops::Range; use std::os::raw::c_void; use std::time::Duration; pub trait Device: Sized { type Buffer; + type BufferDataReceiver; type Fence; type Framebuffer; type ImageParameter; @@ -40,6 +46,8 @@ pub trait Device: Sized { type VertexArray; type VertexAttr; + fn backend_name(&self) -> &'static str; + fn device_name(&self) -> String; fn feature_level(&self) -> FeatureLevel; fn create_texture(&self, format: TextureFormat, size: Vector2I) -> Self::Texture; fn create_texture_from_data(&self, format: TextureFormat, size: Vector2I, data: TextureDataRef) @@ -90,6 +98,8 @@ pub trait Device: Sized { fn upload_to_texture(&self, texture: &Self::Texture, rect: RectI, data: TextureDataRef); fn read_pixels(&self, target: &RenderTarget, viewport: RectI) -> Self::TextureDataReceiver; + fn read_buffer(&self, buffer: &Self::Buffer, target: BufferTarget, range: Range) + -> Self::BufferDataReceiver; fn begin_commands(&self); fn end_commands(&self); fn draw_arrays(&self, index_count: u32, render_state: &RenderState); @@ -108,6 +118,8 @@ pub trait Device: Sized { fn recv_timer_query(&self, query: &Self::TimerQuery) -> Duration; fn try_recv_texture_data(&self, receiver: &Self::TextureDataReceiver) -> Option; fn recv_texture_data(&self, receiver: &Self::TextureDataReceiver) -> TextureData; + fn try_recv_buffer(&self, receiver: &Self::BufferDataReceiver) -> Option>; + fn recv_buffer(&self, receiver: &Self::BufferDataReceiver) -> Vec; fn create_texture_from_png(&self, resources: &dyn ResourceLoader, @@ -131,6 +143,30 @@ pub trait Device: Sized { } } + fn upload_png_to_texture(&self, + resources: &dyn ResourceLoader, + name: &str, + texture: &Self::Texture, + format: TextureFormat) { + let data = resources.slurp(&format!("textures/{}.png", name)).unwrap(); + let image = image::load_from_memory_with_format(&data, ImageFormat::Png).unwrap(); + match format { + TextureFormat::R8 => { + let image = image.to_luma(); + let size = vec2i(image.width() as i32, image.height() as i32); + let rect = RectI::new(Vector2I::default(), size); + self.upload_to_texture(&texture, rect, TextureDataRef::U8(&image)) + } + TextureFormat::RGBA8 => { + let image = image.to_rgba(); + let size = vec2i(image.width() as i32, image.height() as i32); + let rect = RectI::new(Vector2I::default(), size); + self.upload_to_texture(&texture, rect, TextureDataRef::U8(&image)) + } + _ => unimplemented!(), + } + } + fn create_program_from_shader_names( &self, resources: &dyn ResourceLoader, @@ -170,7 +206,7 @@ pub enum FeatureLevel { D3D11, } -#[derive(Clone, Copy, Debug, PartialEq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum TextureFormat { R8, R16F, @@ -182,10 +218,11 @@ pub enum TextureFormat { #[derive(Clone, Copy, Debug)] pub enum VertexAttrType { F32, - I16, I8, - U16, + I16, + I32, U8, + U16, } #[derive(Clone, Copy, Debug)] @@ -258,6 +295,7 @@ pub struct RenderState<'a, D> where D: Device { pub uniforms: &'a [UniformBinding<'a, D::Uniform>], pub textures: &'a [TextureBinding<'a, D::TextureParameter, D::Texture>], pub images: &'a [ImageBinding<'a, D::ImageParameter, D::Texture>], + pub storage_buffers: &'a [(&'a D::StorageBuffer, &'a D::Buffer)], pub viewport: RectI, pub options: RenderOptions, } diff --git a/metal/Cargo.toml b/metal/Cargo.toml index 0bd817ee..2ec9c903 100644 --- a/metal/Cargo.toml +++ b/metal/Cargo.toml @@ -14,6 +14,7 @@ byteorder = "1.3" block = "0.1" cocoa = "0.19" core-foundation = "0.6" +dispatch = "0.2" foreign-types = "0.3" half = "1.5" io-surface = "0.12" diff --git a/metal/src/lib.rs b/metal/src/lib.rs index e7706049..52e1d09d 100644 --- a/metal/src/lib.rs +++ b/metal/src/lib.rs @@ -19,19 +19,21 @@ extern crate objc; use block::{Block, ConcreteBlock, RcBlock}; use byteorder::{NativeEndian, WriteBytesExt}; -use cocoa::foundation::{NSRange, NSUInteger}; +use cocoa::foundation::NSUInteger; use core_foundation::base::TCFType; use core_foundation::string::{CFString, CFStringRef}; +use dispatch::ffi::dispatch_queue_t; +use dispatch::{Queue, QueueAttribute}; use foreign_types::{ForeignType, ForeignTypeRef}; use half::f16; use io_surface::IOSurfaceRef; use libc::size_t; -use metal::{self, Argument, ArgumentEncoder, Buffer, CommandBuffer, CommandBufferRef}; -use metal::{CommandQueue, CompileOptions, ComputeCommandEncoderRef, ComputePipelineDescriptor}; +use metal::{self, Argument, ArgumentEncoder, BlitCommandEncoder, Buffer, CommandBuffer}; +use metal::{CommandQueue, CompileOptions, ComputeCommandEncoder, ComputePipelineDescriptor}; use metal::{ComputePipelineState, CoreAnimationDrawable, CoreAnimationDrawableRef}; use metal::{CoreAnimationLayer, CoreAnimationLayerRef, DepthStencilDescriptor, Function, Library}; use metal::{MTLArgument, MTLArgumentEncoder, MTLArgumentType, MTLBlendFactor, MTLBlendOperation}; -use metal::{MTLClearColor, MTLColorWriteMask, MTLCompareFunction, MTLComputePipelineState}; +use metal::{MTLBlitOption, MTLClearColor, MTLColorWriteMask, MTLCompareFunction, MTLComputePipelineState}; use metal::{MTLDataType, MTLDevice, MTLIndexType, MTLLoadAction, MTLOrigin, MTLPixelFormat}; use metal::{MTLPrimitiveType, MTLRegion, MTLRenderPipelineReflection, MTLRenderPipelineState}; use metal::{MTLResourceOptions, MTLResourceUsage, MTLSamplerAddressMode, MTLSamplerMinMagFilter}; @@ -55,6 +57,7 @@ use pathfinder_gpu::{VertexAttrDescriptor, VertexAttrType}; use pathfinder_resources::ResourceLoader; use pathfinder_simd::default::{F32x2, F32x4, I32x2}; use std::cell::{Cell, RefCell}; +use std::convert::TryInto; use std::mem; use std::ops::Range; use std::ptr; @@ -72,9 +75,15 @@ pub struct MetalDevice { command_queue: CommandQueue, command_buffers: RefCell>, samplers: Vec, - shared_event: SharedEvent, + #[allow(dead_code)] + dispatch_queue: Queue, + timer_query_shared_event: SharedEvent, + buffer_upload_shared_event: SharedEvent, shared_event_listener: SharedEventListener, + compute_fence: RefCell>, next_timer_query_event_value: Cell, + next_buffer_upload_event_value: Cell, + buffer_upload_event_data: Arc, } pub enum MetalProgram { @@ -94,10 +103,21 @@ pub struct MetalComputeProgram { #[derive(Clone)] pub struct MetalBuffer { - buffer: Rc>>, + allocations: Rc>, mode: BufferUploadMode, } +struct BufferAllocations { + private: Option, + shared: Option, + byte_size: u64, +} + +struct StagingBuffer { + buffer: Buffer, + event_value: u64, +} + impl MetalDevice { #[inline] pub unsafe fn new(device: metal::Device, texture: T) -> MetalDevice where T: IntoTexture { @@ -139,7 +159,17 @@ impl MetalDevice { let framebuffer_size = vec2i(texture.width() as i32, texture.height() as i32); let main_depth_stencil_texture = device.create_depth_stencil_texture(framebuffer_size); - let shared_event = device.new_shared_event(); + let timer_query_shared_event = device.new_shared_event(); + let buffer_upload_shared_event = device.new_shared_event(); + + let dispatch_queue = Queue::create("graphics.pathfinder.queue", + QueueAttribute::Concurrent); + let shared_event_listener = SharedEventListener::new_from_dispatch_queue(&dispatch_queue); + + let buffer_upload_event_data = Arc::new(BufferUploadEventData { + mutex: Mutex::new(0), + cond: Condvar::new(), + }); MetalDevice { device, @@ -148,9 +178,14 @@ impl MetalDevice { command_queue, command_buffers: RefCell::new(vec![]), samplers, - shared_event, - shared_event_listener: SharedEventListener::new(), + dispatch_queue, + timer_query_shared_event, + buffer_upload_shared_event, + shared_event_listener, + compute_fence: RefCell::new(None), next_timer_query_event_value: Cell::new(1), + next_buffer_upload_event_value: Cell::new(1), + buffer_upload_event_data, } } @@ -180,24 +215,33 @@ pub struct MetalShader { } pub struct MetalTexture { - texture: Texture, + private_texture: Texture, + shared_buffer: RefCell>, sampling_flags: Cell, - dirty: Cell, } #[derive(Clone)] pub struct MetalTextureDataReceiver(Arc); struct MetalTextureDataReceiverInfo { - mutex: Mutex, + mutex: Mutex>, cond: Condvar, texture: Texture, viewport: RectI, } -enum MetalTextureDataReceiverState { +#[derive(Clone)] +pub struct MetalBufferDataReceiver(Arc); + +struct MetalBufferDataReceiverInfo { + mutex: Mutex>>, + cond: Condvar, + staging_buffer: Buffer, +} + +enum MetalDataReceiverState { Pending, - Downloaded(TextureData), + Downloaded(T), Finished, } @@ -207,12 +251,14 @@ pub struct MetalTimerQuery(Arc); struct MetalTimerQueryInfo { mutex: Mutex, cond: Condvar, - event_value: u64, } struct MetalTimerQueryData { start_time: Option, end_time: Option, + start_block: Option>, + end_block: Option>, + start_event_value: u64, } #[derive(Clone)] @@ -288,6 +334,7 @@ pub struct MetalVertexArray { impl Device for MetalDevice { type Buffer = MetalBuffer; + type BufferDataReceiver = MetalBufferDataReceiver; type Fence = MetalFence; type Framebuffer = MetalFramebuffer; type ImageParameter = MetalImageParameter; @@ -302,6 +349,16 @@ impl Device for MetalDevice { type VertexArray = MetalVertexArray; type VertexAttr = VertexAttribute; + #[inline] + fn backend_name(&self) -> &'static str { + "Metal" + } + + #[inline] + fn device_name(&self) -> String { + self.device.name().to_owned() + } + #[inline] fn feature_level(&self) -> FeatureLevel { FeatureLevel::D3D11 @@ -309,23 +366,12 @@ impl Device for MetalDevice { // TODO: Add texture usage hint. fn create_texture(&self, format: TextureFormat, size: Vector2I) -> MetalTexture { - let descriptor = TextureDescriptor::new(); - descriptor.set_texture_type(MTLTextureType::D2); - match format { - TextureFormat::R8 => descriptor.set_pixel_format(MTLPixelFormat::R8Unorm), - TextureFormat::R16F => descriptor.set_pixel_format(MTLPixelFormat::R16Float), - TextureFormat::RGBA8 => descriptor.set_pixel_format(MTLPixelFormat::RGBA8Unorm), - TextureFormat::RGBA16F => descriptor.set_pixel_format(MTLPixelFormat::RGBA16Float), - TextureFormat::RGBA32F => descriptor.set_pixel_format(MTLPixelFormat::RGBA32Float), - } - descriptor.set_width(size.x() as u64); - descriptor.set_height(size.y() as u64); - descriptor.set_storage_mode(MTLStorageMode::Managed); - descriptor.set_usage(MTLTextureUsage::Unknown); + let descriptor = create_texture_descriptor(format, size); + descriptor.set_storage_mode(MTLStorageMode::Private); MetalTexture { - texture: self.device.new_texture(&descriptor), + private_texture: self.device.new_texture(&descriptor), + shared_buffer: RefCell::new(None), sampling_flags: Cell::new(TextureSamplingFlags::empty()), - dirty: Cell::new(false), } } @@ -415,7 +461,6 @@ impl Device for MetalDevice { let attribute = attributes.object_at(attribute_index); let this_name = attribute.name().as_bytes(); if this_name[0] == b'a' && this_name[1..] == *name.as_bytes() { - //println!("found attribute: \"{}\"", name); return Some(attribute.retain()) } } @@ -510,6 +555,7 @@ impl Device for MetalDevice { MTLVertexFormat::UCharNormalized } (VertexAttrClass::Int, VertexAttrType::I16, 1) => MTLVertexFormat::Short, + (VertexAttrClass::Int, VertexAttrType::I32, 1) => MTLVertexFormat::Int, (VertexAttrClass::Int, VertexAttrType::U16, 1) => MTLVertexFormat::UShort, (VertexAttrClass::FloatNorm, VertexAttrType::U16, 1) => { MTLVertexFormat::UShortNormalized @@ -547,38 +593,112 @@ impl Device for MetalDevice { } fn create_buffer(&self, mode: BufferUploadMode) -> MetalBuffer { - MetalBuffer { buffer: Rc::new(RefCell::new(None)), mode } + MetalBuffer { + allocations: Rc::new(RefCell::new(BufferAllocations { + private: None, + shared: None, + byte_size: 0, + })), + mode, + } } fn allocate_buffer(&self, buffer: &MetalBuffer, data: BufferData, - _: BufferTarget) { + target: BufferTarget) { let options = buffer.mode.to_metal_resource_options(); + let length = match data { + BufferData::Uninitialized(size) => size, + BufferData::Memory(slice) => slice.len(), + }; + let byte_size = (length * mem::size_of::()) as u64; + let new_buffer = self.device.new_buffer(byte_size, options); + + *buffer.allocations.borrow_mut() = BufferAllocations { + private: Some(new_buffer), + shared: None, + byte_size, + }; + match data { - BufferData::Uninitialized(size) => { - let size = (size * mem::size_of::()) as u64; - let new_buffer = self.device.new_buffer(size, options); - *buffer.buffer.borrow_mut() = Some(new_buffer); - } - BufferData::Memory(slice) => { - let size = (slice.len() * mem::size_of::()) as u64; - let new_buffer = self.device.new_buffer_with_data(slice.as_ptr() as *const _, - size, - options); - *buffer.buffer.borrow_mut() = Some(new_buffer); - } + BufferData::Uninitialized(_) => {} + BufferData::Memory(slice) => self.upload_to_buffer(buffer, 0, slice, target), } } fn upload_to_buffer(&self, - buffer: &MetalBuffer, + dest_buffer: &MetalBuffer, start: usize, data: &[T], _: BufferTarget) { - let mut buffer = buffer.buffer.borrow_mut(); - let buffer = buffer.as_mut().unwrap(); - self.upload_to_metal_buffer(buffer, start, data) + if data.is_empty() { + return; + } + + let mut dest_allocations = dest_buffer.allocations.borrow_mut(); + let dest_allocations = &mut *dest_allocations; + let dest_private_buffer = dest_allocations.private.as_mut().unwrap(); + + let byte_start = (start * mem::size_of::()) as u64; + let byte_size = (data.len() * mem::size_of::()) as u64; + + if dest_allocations.shared.is_none() { + let resource_options = MTLResourceOptions::CPUCacheModeWriteCombined | + MTLResourceOptions::StorageModeShared; + dest_allocations.shared = Some(StagingBuffer { + buffer: self.device.new_buffer(dest_allocations.byte_size, resource_options), + event_value: 0, + }); + } + + let staging_buffer = dest_allocations.shared.as_mut().unwrap(); + if staging_buffer.event_value != 0 { + let mut mutex = self.buffer_upload_event_data.mutex.lock().unwrap(); + while *mutex < staging_buffer.event_value { + mutex = self.buffer_upload_event_data.cond.wait(mutex).unwrap(); + } + } + + unsafe { + ptr::copy_nonoverlapping( + data.as_ptr() as *const u8, + (staging_buffer.buffer.contents() as *mut u8).offset(byte_start as isize), + byte_size as usize) + } + + staging_buffer.event_value = self.next_buffer_upload_event_value.get(); + self.next_buffer_upload_event_value.set(staging_buffer.event_value + 1); + + { + let command_buffers = self.command_buffers.borrow(); + let command_buffer = command_buffers.last().unwrap(); + let blit_command_encoder = command_buffer.real_new_blit_command_encoder(); + blit_command_encoder.copy_from_buffer(&staging_buffer.buffer, + byte_start, + &dest_private_buffer, + byte_start, + byte_size); + blit_command_encoder.end_encoding(); + + command_buffer.encode_signal_event(&self.buffer_upload_shared_event, + staging_buffer.event_value); + + let buffer_upload_event_data = self.buffer_upload_event_data.clone(); + let event_value = staging_buffer.event_value; + let listener_block = ConcreteBlock::new(move |_, _| { + let mut mutex = buffer_upload_event_data.mutex.lock().unwrap(); + *mutex = (*mutex).max(event_value); + buffer_upload_event_data.cond.notify_all(); + }); + self.buffer_upload_shared_event.notify_listener_at_value(&self.shared_event_listener, + staging_buffer.event_value, + listener_block.copy()); + } + + // Flush to avoid deadlock. + self.end_commands(); + self.begin_commands(); } #[inline] @@ -592,7 +712,7 @@ impl Device for MetalDevice { } fn texture_format(&self, texture: &MetalTexture) -> TextureFormat { - match texture.texture.pixel_format() { + match texture.private_texture.pixel_format() { MTLPixelFormat::R8Unorm => TextureFormat::R8, MTLPixelFormat::R16Float => TextureFormat::R16F, MTLPixelFormat::RGBA8Unorm => TextureFormat::RGBA8, @@ -603,34 +723,70 @@ impl Device for MetalDevice { } fn texture_size(&self, texture: &MetalTexture) -> Vector2I { - vec2i(texture.texture.width() as i32, texture.texture.height() as i32) + vec2i(texture.private_texture.width() as i32, texture.private_texture.height() as i32) } fn set_texture_sampling_mode(&self, texture: &MetalTexture, flags: TextureSamplingFlags) { texture.sampling_flags.set(flags) } - fn upload_to_texture(&self, texture: &MetalTexture, rect: RectI, data: TextureDataRef) { - let texture_size = self.texture_size(texture); - assert!(rect.size().x() >= 0); - assert!(rect.size().y() >= 0); - assert!(rect.max_x() <= texture_size.x()); - assert!(rect.max_y() <= texture_size.y()); + fn upload_to_texture(&self, dest_texture: &MetalTexture, rect: RectI, data: TextureDataRef) { + let command_buffers = self.command_buffers.borrow(); + let command_buffer = command_buffers.last().expect("Must call `begin_commands()` first!"); - let format = self.texture_format(&texture.texture).expect("Unexpected texture format!"); - let data_ptr = data.check_and_extract_data_ptr(rect.size(), format); + let texture_size = self.texture_size(dest_texture); + let texture_format = self.texture_format(&dest_texture.private_texture) + .expect("Unexpected texture format!"); + let bytes_per_pixel = texture_format.bytes_per_pixel() as u64; + let texture_byte_size = texture_size.area() as u64 * bytes_per_pixel; - let origin = MTLOrigin { x: rect.origin().x() as u64, y: rect.origin().y() as u64, z: 0 }; - let size = MTLSize { - width: rect.size().x() as u64, - height: rect.size().y() as u64, + let mut src_shared_buffer = dest_texture.shared_buffer.borrow_mut(); + if src_shared_buffer.is_none() { + let resource_options = MTLResourceOptions::CPUCacheModeWriteCombined | + MTLResourceOptions::StorageModeShared; + *src_shared_buffer = Some(self.device.new_buffer(texture_byte_size, resource_options)); + } + + // TODO(pcwalton): Wait if necessary... + let src_shared_buffer = src_shared_buffer.as_ref().unwrap(); + let texture_data_ptr = + data.check_and_extract_data_ptr(rect.size(), texture_format) as *const u8; + let src_stride = rect.width() as u64 * bytes_per_pixel; + let dest_stride = texture_size.x() as u64 * bytes_per_pixel; + unsafe { + let dest_contents = src_shared_buffer.contents() as *mut u8; + for src_y in 0..rect.height() { + let dest_y = src_y + rect.origin_y(); + let src_offset = src_y as isize * src_stride as isize; + let dest_offset = dest_y as isize * dest_stride as isize + + rect.origin_x() as isize * bytes_per_pixel as isize; + ptr::copy_nonoverlapping(texture_data_ptr.offset(src_offset), + dest_contents.offset(dest_offset), + src_stride as usize); + } + } + + let src_size = MTLSize { + width: rect.width() as u64, + height: rect.height() as u64, depth: 1, }; - let region = MTLRegion { origin, size }; - let stride = format.bytes_per_pixel() as u64 * size.width; - texture.texture.replace_region(region, 0, stride, data_ptr); + let dest_origin = MTLOrigin { x: rect.origin_x() as u64, y: rect.origin_y() as u64, z: 0 }; + let dest_byte_offset = rect.origin_y() as u64 * src_stride as u64 + + rect.origin_x() as u64 * bytes_per_pixel as u64; - texture.dirty.set(true); + let blit_command_encoder = command_buffer.real_new_blit_command_encoder(); + blit_command_encoder.copy_from_buffer_to_texture(&src_shared_buffer, + dest_byte_offset, + dest_stride, + 0, + src_size, + &dest_texture.private_texture, + 0, + 0, + dest_origin, + MTLBlitOption::empty()); + blit_command_encoder.end_encoding(); } fn read_pixels(&self, target: &RenderTarget, viewport: RectI) @@ -638,7 +794,7 @@ impl Device for MetalDevice { let texture = self.render_target_color_texture(target); let texture_data_receiver = MetalTextureDataReceiver(Arc::new(MetalTextureDataReceiverInfo { - mutex: Mutex::new(MetalTextureDataReceiverState::Pending), + mutex: Mutex::new(MetalDataReceiverState::Pending), cond: Condvar::new(), texture, viewport, @@ -650,11 +806,81 @@ impl Device for MetalDevice { }); self.synchronize_texture(&texture_data_receiver.0.texture, block.copy()); + + self.end_commands(); + self.begin_commands(); + texture_data_receiver } + fn read_buffer(&self, src_buffer: &MetalBuffer, _: BufferTarget, range: Range) + -> MetalBufferDataReceiver { + let buffer_data_receiver; + { + let command_buffers = self.command_buffers.borrow(); + let command_buffer = command_buffers.last().unwrap(); + + let mut src_allocations = src_buffer.allocations.borrow_mut(); + let src_allocations = &mut *src_allocations; + let src_private_buffer = src_allocations.private + .as_ref() + .expect("Private buffer not allocated!"); + + if src_allocations.shared.is_none() { + let resource_options = MTLResourceOptions::CPUCacheModeWriteCombined | + MTLResourceOptions::StorageModeShared; + src_allocations.shared = Some(StagingBuffer { + buffer: self.device.new_buffer(src_allocations.byte_size, resource_options), + event_value: 0, + }); + } + + let staging_buffer = src_allocations.shared.as_ref().unwrap(); + let byte_size = (range.end - range.start) as u64; + let blit_command_encoder = command_buffer.real_new_blit_command_encoder(); + blit_command_encoder.copy_from_buffer(src_private_buffer, + 0, + &staging_buffer.buffer, + range.start as u64, + byte_size); + + buffer_data_receiver = MetalBufferDataReceiver(Arc::new(MetalBufferDataReceiverInfo { + mutex: Mutex::new(MetalDataReceiverState::Pending), + cond: Condvar::new(), + staging_buffer: staging_buffer.buffer.clone(), + })); + + blit_command_encoder.end_encoding(); + + let buffer_data_receiver_for_block = buffer_data_receiver.clone(); + let block = ConcreteBlock::new(move |_| buffer_data_receiver_for_block.download()); + command_buffer.add_completed_handler(block.copy()); + } + + self.end_commands(); + self.begin_commands(); + + buffer_data_receiver + } + + fn try_recv_buffer(&self, buffer_data_receiver: &MetalBufferDataReceiver) -> Option> { + try_recv_data_with_guard(&mut buffer_data_receiver.0.mutex.lock().unwrap()) + } + + fn recv_buffer(&self, buffer_data_receiver: &MetalBufferDataReceiver) -> Vec { + let mut guard = buffer_data_receiver.0.mutex.lock().unwrap(); + + loop { + let buffer_data = try_recv_data_with_guard(&mut guard); + if let Some(buffer_data) = buffer_data { + return buffer_data + } + guard = buffer_data_receiver.0.cond.wait(guard).unwrap(); + } + } + fn begin_commands(&self) { - self.command_buffers.borrow_mut().push(self.command_queue.new_command_buffer().retain()); + self.command_buffers.borrow_mut().push(self.command_queue.new_command_buffer_retained()) } fn end_commands(&self) { @@ -678,8 +904,8 @@ impl Device for MetalDevice { .index_buffer .borrow(); let index_buffer = index_buffer.as_ref().expect("No index buffer bound to VAO!"); - let index_buffer = index_buffer.buffer.borrow(); - let index_buffer = index_buffer.as_ref().expect("Index buffer not allocated!"); + let index_buffer = index_buffer.allocations.borrow(); + let index_buffer = index_buffer.private.as_ref().expect("Index buffer not allocated!"); encoder.draw_indexed_primitives(primitive, index_count, index_type, index_buffer, 0); encoder.end_encoding(); } @@ -690,13 +916,15 @@ impl Device for MetalDevice { render_state: &RenderState) { let encoder = self.prepare_to_draw(render_state); let primitive = render_state.primitive.to_metal_primitive(); + let index_type = MTLIndexType::UInt32; let index_buffer = render_state.vertex_array .index_buffer .borrow(); let index_buffer = index_buffer.as_ref().expect("No index buffer bound to VAO!"); - let index_buffer = index_buffer.buffer.borrow(); - let index_buffer = index_buffer.as_ref().expect("Index buffer not allocated!"); + let index_buffer = index_buffer.allocations.borrow(); + let index_buffer = index_buffer.private.as_ref().expect("Index buffer not allocated!"); + encoder.draw_indexed_primitives_instanced(primitive, index_count as u64, index_type, @@ -712,7 +940,7 @@ impl Device for MetalDevice { let command_buffers = self.command_buffers.borrow(); let command_buffer = command_buffers.last().unwrap(); - let encoder = command_buffer.new_compute_command_encoder(); + let encoder = command_buffer.real_new_compute_command_encoder(); let program = match compute_state.program { MetalProgram::Compute(ref compute_program) => compute_program, @@ -724,7 +952,7 @@ impl Device for MetalDevice { let compute_pipeline_state = unsafe { if program.shader.arguments.borrow().is_none() { - // FIXME(pcwalton): Factor these raw Objective-C method calls out into a trait. + // FIXME(pcwalton): Factor these raw Objective-C method calls out into a trait. let mut reflection: *mut Object = ptr::null_mut(); let reflection_options = MTLPipelineOption::ArgumentInfo | MTLPipelineOption::BufferTypeInfo; @@ -756,56 +984,74 @@ impl Device for MetalDevice { }; encoder.dispatch_thread_groups(size.to_metal_size(), local_size); + + let fence = self.device.new_fence(); + encoder.update_fence(&fence); + *self.compute_fence.borrow_mut() = Some(fence); + encoder.end_encoding(); } fn create_timer_query(&self) -> MetalTimerQuery { - let event_value = self.next_timer_query_event_value.get(); - self.next_timer_query_event_value.set(event_value + 2); - let query = MetalTimerQuery(Arc::new(MetalTimerQueryInfo { - event_value, - mutex: Mutex::new(MetalTimerQueryData { start_time: None, end_time: None }), + mutex: Mutex::new(MetalTimerQueryData { + start_time: None, + end_time: None, + start_block: None, + end_block: None, + start_event_value: 0, + }), cond: Condvar::new(), })); - let captured_query = query.clone(); - let start_block = ConcreteBlock::new(move |_: *mut Object, _: u64| { + let captured_query = Arc::downgrade(&query.0); + query.0.mutex.lock().unwrap().start_block = Some(ConcreteBlock::new(move |_: *mut Object, + _: u64| { let start_time = Instant::now(); - let mut guard = captured_query.0.mutex.lock().unwrap(); + let query = captured_query.upgrade().unwrap(); + let mut guard = query.mutex.lock().unwrap(); guard.start_time = Some(start_time); - }); - let captured_query = query.clone(); - let end_block = ConcreteBlock::new(move |_: *mut Object, _: u64| { + }).copy()); + let captured_query = Arc::downgrade(&query.0); + query.0.mutex.lock().unwrap().end_block = Some(ConcreteBlock::new(move |_: *mut Object, + _: u64| { let end_time = Instant::now(); - let mut guard = captured_query.0.mutex.lock().unwrap(); + let query = captured_query.upgrade().unwrap(); + let mut guard = query.mutex.lock().unwrap(); guard.end_time = Some(end_time); - captured_query.0.cond.notify_all(); - }); - self.shared_event.notify_listener_at_value(&self.shared_event_listener, - event_value, - start_block.copy()); - self.shared_event.notify_listener_at_value(&self.shared_event_listener, - event_value + 1, - end_block.copy()); + query.cond.notify_all(); + }).copy()); query } fn begin_timer_query(&self, query: &MetalTimerQuery) { + let start_event_value = self.next_timer_query_event_value.get(); + self.next_timer_query_event_value.set(start_event_value + 2); + let mut guard = query.0.mutex.lock().unwrap(); + guard.start_event_value = start_event_value; + self.timer_query_shared_event + .notify_listener_at_value(&self.shared_event_listener, + start_event_value, + (*guard.start_block.as_ref().unwrap()).clone()); self.command_buffers .borrow_mut() .last() .unwrap() - .encode_signal_event(&self.shared_event, query.0.event_value); + .encode_signal_event(&self.timer_query_shared_event, start_event_value); } fn end_timer_query(&self, query: &MetalTimerQuery) { + let guard = query.0.mutex.lock().unwrap(); + self.timer_query_shared_event + .notify_listener_at_value(&self.shared_event_listener, + guard.start_event_value + 1, + (*guard.end_block.as_ref().unwrap()).clone()); self.command_buffers .borrow_mut() .last() .unwrap() - .encode_signal_event(&self.shared_event, query.0.event_value + 1); + .encode_signal_event(&self.timer_query_shared_event, guard.start_event_value + 1); } fn try_recv_timer_query(&self, query: &MetalTimerQuery) -> Option { @@ -824,13 +1070,13 @@ impl Device for MetalDevice { } fn try_recv_texture_data(&self, receiver: &MetalTextureDataReceiver) -> Option { - try_recv_texture_data_with_guard(&mut receiver.0.mutex.lock().unwrap()) + try_recv_data_with_guard(&mut receiver.0.mutex.lock().unwrap()) } fn recv_texture_data(&self, receiver: &MetalTextureDataReceiver) -> TextureData { let mut guard = receiver.0.mutex.lock().unwrap(); loop { - let texture_data = try_recv_texture_data_with_guard(&mut guard); + let texture_data = try_recv_data_with_guard(&mut guard); if let Some(texture_data) = texture_data { return texture_data } @@ -1068,7 +1314,7 @@ impl MetalDevice { -> Texture { match *render_target { RenderTarget::Default {..} => self.main_color_texture.retain(), - RenderTarget::Framebuffer(framebuffer) => framebuffer.0.texture.retain(), + RenderTarget::Framebuffer(framebuffer) => framebuffer.0.private_texture.retain(), } } @@ -1091,27 +1337,16 @@ impl MetalDevice { let command_buffers = self.command_buffers.borrow(); let command_buffer = command_buffers.last().unwrap(); - // FIXME(pcwalton): Is this necessary? - let mut blit_command_encoder = None; - for &(_, texture) in render_state.textures { - if !texture.dirty.get() { - continue; - } - if blit_command_encoder.is_none() { - blit_command_encoder = Some(command_buffer.new_blit_command_encoder()); - } - let blit_command_encoder = - blit_command_encoder.as_ref().expect("Where's the blit command encoder?"); - blit_command_encoder.synchronize_resource(&texture.texture); - texture.dirty.set(false); - } - if let Some(blit_command_encoder) = blit_command_encoder { - blit_command_encoder.end_encoding(); - } - let render_pass_descriptor = self.create_render_pass_descriptor(render_state); - let encoder = command_buffer.new_render_command_encoder(&render_pass_descriptor).retain(); + let encoder = command_buffer.new_render_command_encoder_retained(&render_pass_descriptor); + + // Wait on the previous compute command, if any. + let compute_fence = self.compute_fence.borrow(); + if let Some(ref compute_fence) = *compute_fence { + encoder.wait_for_fence_before_stages(compute_fence, MTLRenderStage::Vertex); + } + self.set_viewport(&encoder, &render_state.viewport); let program = match render_state.program { @@ -1168,16 +1403,18 @@ impl MetalDevice { .iter() .enumerate() { let real_index = vertex_buffer_index as u64 + FIRST_VERTEX_BUFFER_INDEX; - let buffer = vertex_buffer.buffer.borrow(); - let buffer = buffer.as_ref() + let buffer = vertex_buffer.allocations.borrow(); + let buffer = buffer.private + .as_ref() .map(|buffer| buffer.as_ref()) - .expect("Where's the vertex buffer?"); + .expect("Where's the private vertex buffer?"); encoder.set_vertex_buffer(real_index, Some(buffer), 0); } self.set_raster_uniforms(&encoder, render_state); encoder.set_render_pipeline_state(&render_pipeline_state); self.set_depth_stencil_state(&encoder, render_state); + encoder } @@ -1257,17 +1494,42 @@ impl MetalDevice { }; if let Some(vertex_index) = *vertex_indices { - render_command_encoder.set_vertex_texture(vertex_index.0, Some(&image.texture)); + render_command_encoder.set_vertex_texture(vertex_index.0, + Some(&image.private_texture)); } if let Some(fragment_index) = *fragment_indices { render_command_encoder.set_fragment_texture(fragment_index.0, - Some(&image.texture)); + Some(&image.private_texture)); + } + } + + // Set storage buffers. + for &(storage_buffer_id, storage_buffer_binding) in render_state.storage_buffers { + self.populate_storage_buffer_indices_if_necessary(storage_buffer_id, + &render_state.program); + + let indices = storage_buffer_id.indices.borrow_mut(); + let indices = indices.as_ref().unwrap(); + let (vertex_indices, fragment_indices) = match indices.0 { + ProgramKind::Raster { ref vertex, ref fragment } => (vertex, fragment), + _ => unreachable!(), + }; + + if let Some(vertex_index) = *vertex_indices { + if let Some(ref buffer) = storage_buffer_binding.allocations.borrow().private { + render_command_encoder.set_vertex_buffer(vertex_index.0, Some(buffer), 0); + } + } + if let Some(fragment_index) = *fragment_indices { + if let Some(ref buffer) = storage_buffer_binding.allocations.borrow().private { + render_command_encoder.set_fragment_buffer(fragment_index.0, Some(buffer), 0); + } } } } fn set_compute_uniforms(&self, - compute_command_encoder: &ComputeCommandEncoderRef, + compute_command_encoder: &ComputeCommandEncoder, compute_state: &ComputeState) { // Set uniforms. let uniform_buffer = self.create_uniform_buffer(&compute_state.uniforms); @@ -1318,7 +1580,7 @@ impl MetalDevice { }; if let Some(indices) = *indices { - compute_command_encoder.set_texture(indices.0, Some(&image.texture)); + compute_command_encoder.set_texture(indices.0, Some(&image.private_texture)); } } @@ -1335,11 +1597,10 @@ impl MetalDevice { }; if let Some(index) = *indices { - if let Some(ref buffer) = *storage_buffer_binding.buffer.borrow() { + if let Some(ref buffer) = storage_buffer_binding.allocations.borrow().private { compute_command_encoder.set_buffer(index.0, Some(buffer), 0); } } - } } @@ -1432,7 +1693,7 @@ impl MetalDevice { argument_index: MetalUniformIndex, buffer: &[u8], buffer_range: &Range, - compute_command_encoder: &ComputeCommandEncoderRef) { + compute_command_encoder: &ComputeCommandEncoder) { compute_command_encoder.set_bytes( argument_index.0, (buffer_range.end - buffer_range.start) as u64, @@ -1443,7 +1704,8 @@ impl MetalDevice { argument_index: MetalTextureIndex, render_command_encoder: &RenderCommandEncoderRef, texture: &MetalTexture) { - render_command_encoder.set_vertex_texture(argument_index.main, Some(&texture.texture)); + render_command_encoder.set_vertex_texture(argument_index.main, + Some(&texture.private_texture)); let sampler = &self.samplers[texture.sampling_flags.get().bits() as usize]; render_command_encoder.set_vertex_sampler_state(argument_index.sampler, Some(sampler)); } @@ -1452,16 +1714,17 @@ impl MetalDevice { argument_index: MetalTextureIndex, render_command_encoder: &RenderCommandEncoderRef, texture: &MetalTexture) { - render_command_encoder.set_fragment_texture(argument_index.main, Some(&texture.texture)); + render_command_encoder.set_fragment_texture(argument_index.main, + Some(&texture.private_texture)); let sampler = &self.samplers[texture.sampling_flags.get().bits() as usize]; render_command_encoder.set_fragment_sampler_state(argument_index.sampler, Some(sampler)); } fn encode_compute_texture_parameter(&self, argument_index: MetalTextureIndex, - compute_command_encoder: &ComputeCommandEncoderRef, + compute_command_encoder: &ComputeCommandEncoder, texture: &MetalTexture) { - compute_command_encoder.set_texture(argument_index.main, Some(&texture.texture)); + compute_command_encoder.set_texture(argument_index.main, Some(&texture.private_texture)); let sampler = &self.samplers[texture.sampling_flags.get().bits() as usize]; compute_command_encoder.set_sampler_state(argument_index.sampler, Some(sampler)); } @@ -1502,7 +1765,7 @@ impl MetalDevice { fn create_render_pass_descriptor(&self, render_state: &RenderState) -> RenderPassDescriptor { - let render_pass_descriptor = RenderPassDescriptor::new().retain(); + let render_pass_descriptor = RenderPassDescriptor::new_retained(); let color_attachment = render_pass_descriptor.color_attachments().object_at(0).unwrap(); color_attachment.set_texture(Some(&self.render_target_color_texture(render_state.target))); @@ -1609,28 +1872,18 @@ impl MetalDevice { } fn synchronize_texture(&self, texture: &Texture, block: RcBlock<(*mut Object,), ()>) { - let command_buffers = self.command_buffers.borrow(); - let command_buffer = command_buffers.last().unwrap(); - let encoder = command_buffer.new_blit_command_encoder(); - encoder.synchronize_resource(&texture); - command_buffer.add_completed_handler(block); - encoder.end_encoding(); + { + let command_buffers = self.command_buffers.borrow(); + let command_buffer = command_buffers.last().unwrap(); + let encoder = command_buffer.real_new_blit_command_encoder(); + encoder.synchronize_resource(&texture); + command_buffer.add_completed_handler(block); + encoder.end_encoding(); + } self.end_commands(); self.begin_commands(); } - - fn upload_to_metal_buffer(&self, buffer: &Buffer, start: usize, data: &[T]) { - unsafe { - let start = (start * mem::size_of::()) as u64; - let size = (data.len() * mem::size_of::()) as u64; - assert!(start + size <= buffer.length()); - ptr::copy_nonoverlapping(data.as_ptr() as *const u8, - (buffer.contents() as *mut u8).offset(start as isize), - size as usize); - buffer.did_modify_range(NSRange::new(start, size)); - } - } } trait DeviceExtra { @@ -1662,7 +1915,7 @@ struct UniformBuffer { impl MetalTexture { #[inline] pub fn metal_texture(&self) -> Texture { - self.texture.clone() + self.private_texture.clone() } } @@ -1688,10 +1941,12 @@ impl IntoTexture for IOSurfaceRef { descriptor.set_pixel_format(MTLPixelFormat::BGRA8Unorm); descriptor.set_width(width as u64); descriptor.set_height(height as u64); - descriptor.set_storage_mode(MTLStorageMode::Managed); + descriptor.set_storage_mode(MTLStorageMode::Private); descriptor.set_usage(MTLTextureUsage::Unknown); - msg_send![*metal_device, newTextureWithDescriptor:descriptor iosurface:self plane:0] + msg_send![*metal_device, newTextureWithDescriptor:descriptor.as_ptr() + iosurface:self + plane:0] } } @@ -1751,7 +2006,7 @@ impl BufferUploadModeExt for BufferUploadMode { BufferUploadMode::Static => MTLResourceOptions::CPUCacheModeWriteCombined, BufferUploadMode::Dynamic => MTLResourceOptions::CPUCacheModeDefaultCache, }; - options |= MTLResourceOptions::StorageModeManaged; + options |= MTLResourceOptions::StorageModePrivate; options } } @@ -1935,21 +2190,34 @@ impl MetalTextureDataReceiver { }; let mut guard = self.0.mutex.lock().unwrap(); - *guard = MetalTextureDataReceiverState::Downloaded(texture_data); + *guard = MetalDataReceiverState::Downloaded(texture_data); self.0.cond.notify_all(); } } -fn try_recv_texture_data_with_guard(guard: &mut MutexGuard) - -> Option { +impl MetalBufferDataReceiver { + fn download(&self) { + let staging_buffer_contents = self.0.staging_buffer.contents() as *const u8; + let staging_buffer_length = self.0.staging_buffer.length(); + unsafe { + let contents = slice::from_raw_parts(staging_buffer_contents, + staging_buffer_length.try_into().unwrap()); + let mut guard = self.0.mutex.lock().unwrap(); + *guard = MetalDataReceiverState::Downloaded(contents.to_vec()); + self.0.cond.notify_all(); + } + } +} + +fn try_recv_data_with_guard(guard: &mut MutexGuard>) -> Option { match **guard { - MetalTextureDataReceiverState::Pending | MetalTextureDataReceiverState::Finished => { + MetalDataReceiverState::Pending | MetalDataReceiverState::Finished => { return None } - MetalTextureDataReceiverState::Downloaded(_) => {} + MetalDataReceiverState::Downloaded(_) => {} } - match mem::replace(&mut **guard, MetalTextureDataReceiverState::Finished) { - MetalTextureDataReceiverState::Downloaded(texture_data) => Some(texture_data), + match mem::replace(&mut **guard, MetalDataReceiverState::Finished) { + MetalDataReceiverState::Downloaded(texture_data) => Some(texture_data), _ => unreachable!(), } } @@ -2038,14 +2306,23 @@ impl Drop for SharedEventListener { } impl SharedEventListener { - fn new() -> SharedEventListener { + fn new_from_dispatch_queue(queue: &Queue) -> SharedEventListener { unsafe { let listener: *mut Object = msg_send![class!(MTLSharedEventListener), alloc]; - SharedEventListener(msg_send![listener, init]) + let raw_queue: *const *mut dispatch_queue_t = mem::transmute(queue); + SharedEventListener(msg_send![listener, initWithDispatchQueue:*raw_queue]) } } } +struct Fence(*mut Object); + +impl Drop for Fence { + fn drop(&mut self) { + unsafe { msg_send![self.0, release] } + } +} + struct VertexAttributeArray(*mut Object); impl Drop for VertexAttributeArray { @@ -2086,6 +2363,13 @@ impl CoreAnimationLayerExt for CoreAnimationLayer { trait CommandBufferExt { fn encode_signal_event(&self, event: &SharedEvent, value: u64); fn add_completed_handler(&self, block: RcBlock<(*mut Object,), ()>); + // Just like `new_render_command_encoder`, but returns an owned version. + fn new_render_command_encoder_retained(&self, render_pass_descriptor: &RenderPassDescriptorRef) + -> RenderCommandEncoder; + // Just like `new_blit_command_encoder`, but doesn't leak. + fn real_new_blit_command_encoder(&self) -> BlitCommandEncoder; + // Just like `new_compute_command_encoder`, but doesn't leak. + fn real_new_compute_command_encoder(&self) -> ComputeCommandEncoder; } impl CommandBufferExt for CommandBuffer { @@ -2100,6 +2384,40 @@ impl CommandBufferExt for CommandBuffer { msg_send![self.as_ptr(), addCompletedHandler:&*block] } } + + fn new_render_command_encoder_retained(&self, render_pass_descriptor: &RenderPassDescriptorRef) + -> RenderCommandEncoder { + unsafe { + RenderCommandEncoder::from_ptr( + msg_send![self.as_ptr(), + renderCommandEncoderWithDescriptor:render_pass_descriptor.as_ptr()]) + } + } + + fn real_new_blit_command_encoder(&self) -> BlitCommandEncoder { + unsafe { + BlitCommandEncoder::from_ptr(msg_send![self.as_ptr(), blitCommandEncoder]) + } + } + + fn real_new_compute_command_encoder(&self) -> ComputeCommandEncoder { + unsafe { + ComputeCommandEncoder::from_ptr(msg_send![self.as_ptr(), computeCommandEncoder]) + } + } +} + +trait CommandQueueExt { + // Just like `new_command_buffer()`, but returns an owned version. + fn new_command_buffer_retained(&self) -> CommandBuffer; +} + +impl CommandQueueExt for CommandQueue { + fn new_command_buffer_retained(&self) -> CommandBuffer { + unsafe { + CommandBuffer::from_ptr(msg_send![self.as_ptr(), commandBuffer]) + } + } } trait DeviceExt { @@ -2111,6 +2429,7 @@ trait DeviceExt { -> (RenderPipelineState, RenderPipelineReflection); fn new_shared_event(&self) -> SharedEvent; + fn new_fence(&self) -> Fence; } impl DeviceExt for metal::Device { @@ -2143,6 +2462,10 @@ impl DeviceExt for metal::Device { fn new_shared_event(&self) -> SharedEvent { unsafe { SharedEvent(msg_send![self.as_ptr(), newSharedEvent]) } } + + fn new_fence(&self) -> Fence { + unsafe { Fence(msg_send![self.as_ptr(), newFence]) } + } } trait FunctionExt { @@ -2204,6 +2527,59 @@ impl StructMemberExt for StructMemberRef { } } +trait ComputeCommandEncoderExt { + fn update_fence(&self, fence: &Fence); + fn wait_for_fence(&self, fence: &Fence); +} + +impl ComputeCommandEncoderExt for ComputeCommandEncoder { + fn update_fence(&self, fence: &Fence) { + unsafe { msg_send![self.as_ptr(), updateFence:fence.0] } + } + + fn wait_for_fence(&self, fence: &Fence) { + unsafe { msg_send![self.as_ptr(), waitForFence:fence.0] } + } +} + +trait RenderCommandEncoderExt { + fn update_fence_before_stages(&self, fence: &Fence, stages: MTLRenderStage); + fn wait_for_fence_before_stages(&self, fence: &Fence, stages: MTLRenderStage); +} + +impl RenderCommandEncoderExt for RenderCommandEncoderRef { + fn update_fence_before_stages(&self, fence: &Fence, stages: MTLRenderStage) { + unsafe { msg_send![self.as_ptr(), updateFence:fence.0 beforeStages:stages] } + } + + fn wait_for_fence_before_stages(&self, fence: &Fence, stages: MTLRenderStage) { + unsafe { + msg_send![self.as_ptr(), waitForFence:fence.0 beforeStages:stages] + } + } +} + +trait RenderPassDescriptorExt { + // Returns a new owned version. + fn new_retained() -> Self; +} + +impl RenderPassDescriptorExt for RenderPassDescriptor { + fn new_retained() -> RenderPassDescriptor { + unsafe { + RenderPassDescriptor::from_ptr(msg_send![class!(MTLRenderPassDescriptor), + renderPassDescriptor]) + } + } +} + +#[repr(u32)] +enum MTLRenderStage { + Vertex = 0, + #[allow(dead_code)] + Fragment = 1, +} + // Memory management helpers trait Retain { @@ -2211,13 +2587,6 @@ trait Retain { fn retain(&self) -> Self::Owned; } -impl Retain for CommandBufferRef { - type Owned = CommandBuffer; - fn retain(&self) -> CommandBuffer { - unsafe { CommandBuffer::from_ptr(msg_send![self.as_ptr(), retain]) } - } -} - impl Retain for CoreAnimationDrawableRef { type Owned = CoreAnimationDrawable; fn retain(&self) -> CoreAnimationDrawable { @@ -2232,20 +2601,6 @@ impl Retain for CoreAnimationLayerRef { } } -impl Retain for RenderCommandEncoderRef { - type Owned = RenderCommandEncoder; - fn retain(&self) -> RenderCommandEncoder { - unsafe { RenderCommandEncoder::from_ptr(msg_send![self.as_ptr(), retain]) } - } -} - -impl Retain for RenderPassDescriptorRef { - type Owned = RenderPassDescriptor; - fn retain(&self) -> RenderPassDescriptor { - unsafe { RenderPassDescriptor::from_ptr(msg_send![self.as_ptr(), retain]) } - } -} - impl Retain for StructTypeRef { type Owned = StructType; fn retain(&self) -> StructType { @@ -2305,3 +2660,26 @@ extern { fn IOSurfaceGetWidth(buffer: IOSurfaceRef) -> size_t; fn IOSurfaceGetHeight(buffer: IOSurfaceRef) -> size_t; } + +// Helper functions + +fn create_texture_descriptor(format: TextureFormat, size: Vector2I) -> TextureDescriptor { + let descriptor = TextureDescriptor::new(); + descriptor.set_texture_type(MTLTextureType::D2); + match format { + TextureFormat::R8 => descriptor.set_pixel_format(MTLPixelFormat::R8Unorm), + TextureFormat::R16F => descriptor.set_pixel_format(MTLPixelFormat::R16Float), + TextureFormat::RGBA8 => descriptor.set_pixel_format(MTLPixelFormat::RGBA8Unorm), + TextureFormat::RGBA16F => descriptor.set_pixel_format(MTLPixelFormat::RGBA16Float), + TextureFormat::RGBA32F => descriptor.set_pixel_format(MTLPixelFormat::RGBA32Float), + } + descriptor.set_width(size.x() as u64); + descriptor.set_height(size.y() as u64); + descriptor.set_usage(MTLTextureUsage::Unknown); + descriptor +} + +struct BufferUploadEventData { + mutex: Mutex, + cond: Condvar, +} diff --git a/renderer/Cargo.toml b/renderer/Cargo.toml index 0bac653e..8d84d49e 100644 --- a/renderer/Cargo.toml +++ b/renderer/Cargo.toml @@ -10,11 +10,13 @@ homepage = "https://github.com/servo/pathfinder" [dependencies] bitflags = "1.0" +byte-slice-cast = "0.3" byteorder = "1.2" crossbeam-channel = "0.4" fxhash = "0.2" half = "1.5" hashbrown = "0.7" +log = "0.4" rayon = "1.0" serde = "1.0" serde_json = "1.0" @@ -22,9 +24,6 @@ smallvec = "1.2" vec_map = "0.8" instant = { version = "0.1.2", features = ["wasm-bindgen"] } -[dependencies.log] -version = "0.4" - [dependencies.pathfinder_color] path = "../color" version = "0.5" diff --git a/renderer/src/builder.rs b/renderer/src/builder.rs index 4758cc72..07ae83fe 100644 --- a/renderer/src/builder.rs +++ b/renderer/src/builder.rs @@ -11,86 +11,119 @@ //! Packs data onto the GPU. use crate::concurrent::executor::Executor; -use crate::gpu::renderer::{BlendModeExt, MASK_TILES_ACROSS, MASK_TILES_DOWN}; -use crate::gpu_data::{AlphaTileId, Clip, ClipBatch, ClipBatchKey, ClipBatchKind, Fill}; -use crate::gpu_data::{FillBatchEntry, RenderCommand, TILE_CTRL_MASK_0_SHIFT}; -use crate::gpu_data::{TILE_CTRL_MASK_EVEN_ODD, TILE_CTRL_MASK_WINDING, Tile, TileBatch}; -use crate::gpu_data::{TileBatchTexture, TileObjectPrimitive}; -use crate::options::{PreparedBuildOptions, PreparedRenderTransform, RenderCommandListener}; -use crate::paint::{PaintInfo, PaintMetadata}; -use crate::scene::{DisplayItem, Scene}; +use crate::gpu::blend::BlendModeExt; +use crate::gpu::options::RendererLevel; +use crate::gpu_data::{AlphaTileId, BackdropInfoD3D11, Clip, ClippedPathInfo, DiceMetadataD3D11}; +use crate::gpu_data::{DrawTileBatch, DrawTileBatchD3D9, DrawTileBatchD3D11, Fill, PathBatchIndex}; +use crate::gpu_data::{PathSource, PrepareTilesInfoD3D11}; +use crate::gpu_data::{PropagateMetadataD3D11, RenderCommand, SegmentIndicesD3D11, SegmentsD3D11}; +use crate::gpu_data::{TileBatchDataD3D11, TileBatchId, TileBatchTexture}; +use crate::gpu_data::{TileObjectPrimitive, TilePathInfoD3D11}; +use crate::options::{PrepareMode, PreparedBuildOptions, PreparedRenderTransform}; +use crate::paint::{PaintId, PaintInfo, PaintMetadata}; +use crate::scene::{ClipPathId, DisplayItem, DrawPath, DrawPathId, LastSceneInfo, PathId}; +use crate::scene::{Scene, SceneSink}; use crate::tile_map::DenseTileMap; use crate::tiler::Tiler; -use crate::tiles::{self, DrawTilingPathInfo, PackedTile, TILE_HEIGHT, TILE_WIDTH, TilingPathInfo}; -use crate::z_buffer::{DepthMetadata, ZBuffer}; +use crate::tiles::{self, DrawTilingPathInfo, TILE_HEIGHT, TILE_WIDTH, TilingPathInfo}; +use fxhash::FxHashMap; +use instant::Instant; use pathfinder_content::effects::{BlendMode, Filter}; use pathfinder_content::fill::FillRule; -use pathfinder_content::render_target::RenderTargetId; -use pathfinder_geometry::line_segment::{LineSegment2F, LineSegmentU4, LineSegmentU8}; -use pathfinder_geometry::rect::RectF; +use pathfinder_content::outline::{Outline, PointFlags}; +use pathfinder_geometry::line_segment::{LineSegment2F, LineSegmentU16}; +use pathfinder_geometry::rect::{RectF, RectI}; use pathfinder_geometry::transform2d::Transform2F; use pathfinder_geometry::vector::{Vector2I, vec2i}; use pathfinder_gpu::TextureSamplingFlags; -use pathfinder_simd::default::{F32x4, I32x4}; +use pathfinder_simd::default::F32x4; +use std::borrow::Cow; +use std::ops::Range; use std::sync::atomic::AtomicUsize; -use instant::Instant; use std::u32; pub(crate) const ALPHA_TILE_LEVEL_COUNT: usize = 2; pub(crate) const ALPHA_TILES_PER_LEVEL: usize = 1 << (32 - ALPHA_TILE_LEVEL_COUNT + 1); -pub(crate) struct SceneBuilder<'a, 'b> { +const CURVE_IS_QUADRATIC: u32 = 0x80000000; +const CURVE_IS_CUBIC: u32 = 0x40000000; + +pub(crate) struct SceneBuilder<'a, 'b, 'c, 'd> { scene: &'a mut Scene, built_options: &'b PreparedBuildOptions, next_alpha_tile_indices: [AtomicUsize; ALPHA_TILE_LEVEL_COUNT], - pub(crate) listener: Box, + pub(crate) sink: &'c mut SceneSink<'d>, } #[derive(Debug)] pub(crate) struct ObjectBuilder { pub built_path: BuiltPath, - /// During tiling, this stores the sum of backdrops for tile columns above the viewport. - pub current_backdrops: Vec, - pub fills: Vec, + pub fills: Vec, pub bounds: RectF, } -#[derive(Debug)] +// Derives `Clone` just so we can use `Cow`, not because we actually want to clone it. +#[derive(Clone, Debug)] struct BuiltDrawPath { path: BuiltPath, + clip_path_id: Option, blend_mode: BlendMode, filter: Filter, color_texture: Option, sampling_flags_1: TextureSamplingFlags, mask_0_fill_rule: FillRule, + occludes: bool, } -#[derive(Debug)] +impl BuiltDrawPath { + fn new(built_path: BuiltPath, path_object: &DrawPath, paint_metadata: &PaintMetadata) + -> BuiltDrawPath { + let blend_mode = path_object.blend_mode(); + let occludes = paint_metadata.is_opaque && blend_mode.occludes_backdrop(); + BuiltDrawPath { + path: built_path, + clip_path_id: path_object.clip_path(), + filter: paint_metadata.filter(), + color_texture: paint_metadata.tile_batch_texture(), + sampling_flags_1: TextureSamplingFlags::empty(), + mask_0_fill_rule: path_object.fill_rule(), + blend_mode, + occludes, + } + } +} + +// Derives `Clone` just so we can use `Cow`, not because we actually want to clone it. +#[derive(Clone, Debug)] pub(crate) struct BuiltPath { - pub solid_tiles: SolidTiles, - pub empty_tiles: Vec, - pub single_mask_tiles: Vec, - pub clip_tiles: Vec, - pub tiles: DenseTileMap, + pub data: BuiltPathData, + pub tile_bounds: RectI, pub fill_rule: FillRule, + pub clip_path_id: Option, + pub ctrl_byte: u8, + pub paint_id: PaintId, } #[derive(Clone, Debug)] -pub struct BuiltTile { - pub page: u16, - pub tile: Tile, -} - -#[derive(Clone, Copy, Debug)] -pub struct BuiltClip { - pub clip: Clip, - pub key: ClipBatchKey, +pub(crate) enum BuiltPathData { + CPU(BuiltPathBinCPUData), + TransformCPUBinGPU(BuiltPathTransformCPUBinGPUData), + GPU, } #[derive(Clone, Debug)] -pub(crate) enum SolidTiles { - Occluders(Vec), - Regular(Vec), +pub(crate) struct BuiltPathBinCPUData { + /// During tiling, or if backdrop computation is done on GPU, this stores the sum of backdrops + /// for tile columns above the viewport. + pub backdrops: Vec, + pub tiles: DenseTileMap, + pub clip_tiles: Option>, +} + +#[derive(Clone, Debug)] +pub(crate) struct BuiltPathTransformCPUBinGPUData { + /// The transformed outline. + pub outline: Outline, } #[derive(Clone, Copy, Debug)] @@ -98,18 +131,16 @@ pub(crate) struct Occluder { pub(crate) coords: Vector2I, } -impl<'a, 'b> SceneBuilder<'a, 'b> { - pub(crate) fn new( - scene: &'a mut Scene, - built_options: &'b PreparedBuildOptions, - listener: Box, - ) -> SceneBuilder<'a, 'b> { - let effective_view_box = scene.effective_view_box(built_options); +impl<'a, 'b, 'c, 'd> SceneBuilder<'a, 'b, 'c, 'd> { + pub(crate) fn new(scene: &'a mut Scene, + built_options: &'b PreparedBuildOptions, + sink: &'c mut SceneSink<'d>) + -> SceneBuilder<'a, 'b, 'c, 'd> { SceneBuilder { scene, built_options, next_alpha_tile_indices: [AtomicUsize::new(0), AtomicUsize::new(0)], - listener, + sink, } } @@ -119,18 +150,20 @@ impl<'a, 'b> SceneBuilder<'a, 'b> { // Send the start rendering command. let bounding_quad = self.built_options.bounding_quad(); - let clip_path_count = self.scene.clip_paths.len(); - let draw_path_count = self.scene.paths.len(); + let clip_path_count = self.scene.clip_paths().len(); + let draw_path_count = self.scene.draw_paths().len(); let total_path_count = clip_path_count + draw_path_count; let needs_readable_framebuffer = self.needs_readable_framebuffer(); - self.listener.send(RenderCommand::Start { + self.sink.listener.send(RenderCommand::Start { bounding_quad, path_count: total_path_count, needs_readable_framebuffer, }); + let prepare_mode = self.built_options.to_prepare_mode(self.sink.renderer_level); + let render_transform = match self.built_options.transform { PreparedRenderTransform::Transform2D(transform) => transform.inverse(), _ => Transform2F::default() @@ -143,25 +176,73 @@ impl<'a, 'b> SceneBuilder<'a, 'b> { render_target_metadata: _, } = self.scene.build_paint_info(render_transform); for render_command in render_commands { - self.listener.send(render_command); + self.sink.listener.send(render_command); } + let built_paths = match prepare_mode { + PrepareMode::CPU | PrepareMode::TransformCPUBinGPU => { + Some(self.build_paths_on_cpu(executor, &paint_metadata, &prepare_mode)) + } + PrepareMode::GPU { .. } => None, + }; + + // TODO(pcwalton): Do this earlier? + let scene_is_dirty = match (&prepare_mode, &self.sink.last_scene) { + (&PrepareMode::GPU { .. }, &None) => true, + (&PrepareMode::GPU { .. }, &Some(LastSceneInfo { + scene_id: ref last_scene_id, + scene_epoch: ref last_scene_epoch, + .. + })) => *last_scene_id != self.scene.id() || *last_scene_epoch != self.scene.epoch(), + _ => false, + }; + + if scene_is_dirty { + let built_segments = BuiltSegments::from_scene(&self.scene); + self.sink.listener.send(RenderCommand::UploadSceneD3D11 { + draw_segments: built_segments.draw_segments, + clip_segments: built_segments.clip_segments, + }); + self.sink.last_scene = Some(LastSceneInfo { + scene_id: self.scene.id(), + scene_epoch: self.scene.epoch(), + draw_segment_ranges: built_segments.draw_segment_ranges, + clip_segment_ranges: built_segments.clip_segment_ranges, + }); + } + + self.finish_building(&paint_metadata, built_paths, &prepare_mode); + + let cpu_build_time = Instant::now() - start_time; + self.sink.listener.send(RenderCommand::Finish { cpu_build_time }); + } + + fn build_paths_on_cpu(&mut self, + executor: &E, + paint_metadata: &[PaintMetadata], + prepare_mode: &PrepareMode) + -> BuiltPaths + where E: Executor { + let clip_path_count = self.scene.clip_paths().len(); + let draw_path_count = self.scene.draw_paths().len(); let effective_view_box = self.scene.effective_view_box(self.built_options); let built_clip_paths = executor.build_vector(clip_path_count, |path_index| { - self.build_clip_path(PathBuildParams { - path_index, + self.build_clip_path_on_cpu(PathBuildParams { + path_id: PathId(path_index as u32), view_box: effective_view_box, + prepare_mode: *prepare_mode, built_options: &self.built_options, scene: &self.scene, }) }); let built_draw_paths = executor.build_vector(draw_path_count, |path_index| { - self.build_draw_path(DrawPathBuildParams { + self.build_draw_path_on_cpu(DrawPathBuildParams { path_build_params: PathBuildParams { - path_index, + path_id: PathId(path_index as u32), view_box: effective_view_box, + prepare_mode: *prepare_mode, built_options: &self.built_options, scene: &self.scene, }, @@ -170,21 +251,21 @@ impl<'a, 'b> SceneBuilder<'a, 'b> { }) }); - self.finish_building(&paint_metadata, built_draw_paths); - - let cpu_build_time = Instant::now() - start_time; - self.listener.send(RenderCommand::Finish { cpu_build_time }); + BuiltPaths { draw: built_draw_paths } } - fn build_clip_path(&self, params: PathBuildParams) -> BuiltPath { - let PathBuildParams { path_index, view_box, built_options, scene } = params; - let path_object = &scene.clip_paths[path_index]; + fn build_clip_path_on_cpu(&self, params: PathBuildParams) -> BuiltPath { + let PathBuildParams { path_id, view_box, built_options, scene, prepare_mode } = params; + let path_object = &scene.get_clip_path(path_id.to_clip_path_id()); let outline = scene.apply_render_options(path_object.outline(), built_options); let mut tiler = Tiler::new(self, + path_id, &outline, path_object.fill_rule(), view_box, + &prepare_mode, + &[], TilingPathInfo::Clip); tiler.generate_tiles(); @@ -192,320 +273,108 @@ impl<'a, 'b> SceneBuilder<'a, 'b> { tiler.object_builder.built_path } - fn build_draw_path(&self, params: DrawPathBuildParams) -> BuiltDrawPath { + fn build_draw_path_on_cpu(&self, params: DrawPathBuildParams) -> BuiltDrawPath { let DrawPathBuildParams { - path_build_params: PathBuildParams { path_index, view_box, built_options, scene }, + path_build_params: PathBuildParams { + path_id, + view_box, + built_options, + prepare_mode, + scene, + }, paint_metadata, built_clip_paths, } = params; - let path_object = &scene.paths[path_index]; + let path_object = scene.get_draw_path(path_id.to_draw_path_id()); let outline = scene.apply_render_options(path_object.outline(), built_options); let paint_id = path_object.paint(); let paint_metadata = &paint_metadata[paint_id.0 as usize]; - let built_clip_path = path_object.clip_path().map(|clip_path_id| { - &built_clip_paths[clip_path_id.0 as usize] - }); let mut tiler = Tiler::new(self, + path_id, &outline, path_object.fill_rule(), view_box, + &prepare_mode, + &built_clip_paths, TilingPathInfo::Draw(DrawTilingPathInfo { paint_id, - paint_metadata, blend_mode: path_object.blend_mode(), - built_clip_path, + clip_path_id: path_object.clip_path(), fill_rule: path_object.fill_rule(), })); tiler.generate_tiles(); self.send_fills(tiler.object_builder.fills); - BuiltDrawPath { - path: tiler.object_builder.built_path, - blend_mode: path_object.blend_mode(), - filter: paint_metadata.filter(), - color_texture: paint_metadata.tile_batch_texture(), - sampling_flags_1: TextureSamplingFlags::empty(), - mask_0_fill_rule: path_object.fill_rule(), - } + + BuiltDrawPath::new(tiler.object_builder.built_path, path_object, paint_metadata) } - fn send_fills(&self, fills: Vec) { + fn send_fills(&self, fills: Vec) { if !fills.is_empty() { - self.listener.send(RenderCommand::AddFills(fills)); + self.sink.listener.send(RenderCommand::AddFillsD3D9(fills)); } } - fn build_clips(&self, built_draw_paths: &[BuiltDrawPath]) { - let mut built_clip_tiles = vec![]; - for built_draw_path in built_draw_paths { - for built_clip_tile in &built_draw_path.path.clip_tiles { - built_clip_tiles.push(*built_clip_tile); - } - } + fn build_tile_batches(&mut self, + paint_metadata: &[PaintMetadata], + prepare_mode: &PrepareMode, + built_paths: Option) { + let mut tile_batch_builder = TileBatchBuilder::new(&prepare_mode, built_paths); - built_clip_tiles.sort_by_key(|built_clip_tile| built_clip_tile.key); - - let mut batches: Vec = vec![]; - for built_clip_tile in built_clip_tiles { - if batches.is_empty() || batches.last_mut().unwrap().key != built_clip_tile.key { - batches.push(ClipBatch { key: built_clip_tile.key, clips: vec![] }); - } - batches.last_mut().unwrap().clips.push(built_clip_tile.clip); - } - - if !batches.is_empty() { - self.listener.send(RenderCommand::ClipTiles(batches)); - } - } - - fn cull_tiles(&self, paint_metadata: &[PaintMetadata], built_draw_paths: Vec) - -> CulledTiles { - let mut culled_tiles = CulledTiles { display_list: vec![] }; - - let mut remaining_layer_z_buffers = self.build_solid_tiles(&built_draw_paths); - remaining_layer_z_buffers.reverse(); - - // Process first Z-buffer. - let first_z_buffer = remaining_layer_z_buffers.pop().unwrap(); - let first_solid_tiles = first_z_buffer.build_solid_tiles(paint_metadata); - for batch in first_solid_tiles.batches { - culled_tiles.display_list.push(CulledDisplayItem::DrawTiles(batch)); - } - - let mut layer_z_buffers_stack = vec![first_z_buffer]; - let mut current_depth = 1; - - for display_item in &self.scene.display_list { + // Prepare display items. + for display_item in self.scene.display_list() { match *display_item { DisplayItem::PushRenderTarget(render_target_id) => { - culled_tiles.display_list - .push(CulledDisplayItem::PushRenderTarget(render_target_id)); - - let z_buffer = remaining_layer_z_buffers.pop().unwrap(); - let solid_tiles = z_buffer.build_solid_tiles(paint_metadata); - for batch in solid_tiles.batches { - culled_tiles.display_list.push(CulledDisplayItem::DrawTiles(batch)); - } - layer_z_buffers_stack.push(z_buffer); - } - - DisplayItem::PopRenderTarget => { - culled_tiles.display_list.push(CulledDisplayItem::PopRenderTarget); - layer_z_buffers_stack.pop(); - } - - DisplayItem::DrawPaths { - start_index: start_draw_path_index, - end_index: end_draw_path_index, - } => { - for draw_path_index in start_draw_path_index..end_draw_path_index { - let built_draw_path = &built_draw_paths[draw_path_index as usize]; - let layer_z_buffer = layer_z_buffers_stack.last().unwrap(); - let color_texture = built_draw_path.color_texture; - - debug_assert!(built_draw_path.path.empty_tiles.is_empty() || - built_draw_path.blend_mode.is_destructive()); - self.add_alpha_tiles(&mut culled_tiles, - layer_z_buffer, - &built_draw_path.path.empty_tiles, - current_depth, - None, - built_draw_path.blend_mode, - built_draw_path.filter); - - self.add_alpha_tiles(&mut culled_tiles, - layer_z_buffer, - &built_draw_path.path.single_mask_tiles, - current_depth, - color_texture, - built_draw_path.blend_mode, - built_draw_path.filter); - - match built_draw_path.path.solid_tiles { - SolidTiles::Regular(ref tiles) => { - self.add_alpha_tiles(&mut culled_tiles, - layer_z_buffer, - tiles, - current_depth, - color_texture, - built_draw_path.blend_mode, - built_draw_path.filter); - } - SolidTiles::Occluders(_) => {} - } - - current_depth += 1; - } - } - } - } - - culled_tiles - } - - fn build_solid_tiles(&self, built_draw_paths: &[BuiltDrawPath]) -> Vec { - let effective_view_box = self.scene.effective_view_box(self.built_options); - let mut z_buffers = vec![ZBuffer::new(effective_view_box)]; - let mut z_buffer_index_stack = vec![0]; - let mut current_depth = 1; - - // Create Z-buffers. - for display_item in &self.scene.display_list { - match *display_item { - DisplayItem::PushRenderTarget { .. } => { - z_buffer_index_stack.push(z_buffers.len()); - z_buffers.push(ZBuffer::new(effective_view_box)); + tile_batch_builder.draw_commands + .push(RenderCommand::PushRenderTarget(render_target_id)) } DisplayItem::PopRenderTarget => { - z_buffer_index_stack.pop(); + tile_batch_builder.draw_commands.push(RenderCommand::PopRenderTarget) } - DisplayItem::DrawPaths { start_index, end_index } => { - let (start_index, end_index) = (start_index as usize, end_index as usize); - let z_buffer = &mut z_buffers[*z_buffer_index_stack.last().unwrap()]; - for (path_subindex, built_draw_path) in - built_draw_paths[start_index..end_index].iter().enumerate() { - let path_index = (path_subindex + start_index) as u32; - let path = &self.scene.paths[path_index as usize]; - let metadata = DepthMetadata { paint_id: path.paint() }; - match built_draw_path.path.solid_tiles { - SolidTiles::Regular(_) => { - z_buffer.update(&[], current_depth, metadata); - } - SolidTiles::Occluders(ref occluders) => { - z_buffer.update(occluders, current_depth, metadata); - } - } - current_depth += 1; - } + DisplayItem::DrawPaths(ref path_id_range) => { + tile_batch_builder.build_tile_batches_for_draw_path_display_item( + &self.scene, + &self.sink, + self.built_options, + path_id_range.start..path_id_range.end, + paint_metadata, + prepare_mode); } } } - debug_assert_eq!(z_buffer_index_stack.len(), 1); - z_buffers - } - - fn add_alpha_tiles(&self, - culled_tiles: &mut CulledTiles, - layer_z_buffer: &ZBuffer, - built_alpha_tiles: &[BuiltTile], - current_depth: u32, - color_texture: Option, - blend_mode: BlendMode, - filter: Filter) { - let mut batch_indices: Vec = vec![]; - for built_alpha_tile in built_alpha_tiles { - // Early cull if possible. - let alpha_tile_coords = built_alpha_tile.tile.tile_position(); - if !layer_z_buffer.test(alpha_tile_coords, current_depth) { - continue; - } - - // Find an appropriate batch if we can. - let mut dest_batch_index = batch_indices.iter().filter(|&batch_index| { - batch_index.tile_page == built_alpha_tile.page - }).next().cloned(); - - // If no batch was found, try to reuse the last batch in the display list. - // - // TODO(pcwalton): We could try harder to find a batch by taking tile positions into - // account... - if dest_batch_index.is_none() { - match culled_tiles.display_list.last() { - Some(&CulledDisplayItem::DrawTiles(TileBatch { - tiles: _, - color_texture: ref batch_color_texture, - blend_mode: batch_blend_mode, - filter: batch_filter, - tile_page: batch_tile_page - })) if *batch_color_texture == color_texture && - batch_blend_mode == blend_mode && - batch_filter == filter && - !batch_blend_mode.needs_readable_framebuffer() && - batch_tile_page == built_alpha_tile.page => { - dest_batch_index = Some(BatchIndex { - display_item_index: culled_tiles.display_list.len() - 1, - tile_page: batch_tile_page, - }); - batch_indices.push(dest_batch_index.unwrap()); - } - _ => {} - } - } - - // If it's still the case that no suitable batch was found, then make a new one. - if dest_batch_index.is_none() { - dest_batch_index = Some(BatchIndex { - display_item_index: culled_tiles.display_list.len(), - tile_page: built_alpha_tile.page, - }); - batch_indices.push(dest_batch_index.unwrap()); - culled_tiles.display_list.push(CulledDisplayItem::DrawTiles(TileBatch { - tiles: vec![], - color_texture, - blend_mode, - filter, - tile_page: built_alpha_tile.page, - })); - } - - // Add to the appropriate batch. - match culled_tiles.display_list[dest_batch_index.unwrap().display_item_index] { - CulledDisplayItem::DrawTiles(ref mut tiles) => { - tiles.tiles.push(built_alpha_tile.tile); - } - _ => unreachable!(), - } - } - - #[derive(Clone, Copy)] - struct BatchIndex { - display_item_index: usize, - tile_page: u16, - } - } - - fn pack_tiles(&mut self, culled_tiles: CulledTiles) { - self.listener.send(RenderCommand::BeginTileDrawing); - for display_item in culled_tiles.display_list { - match display_item { - CulledDisplayItem::DrawTiles(batch) => { - self.listener.send(RenderCommand::DrawTiles(batch)) - } - CulledDisplayItem::PushRenderTarget(render_target_id) => { - self.listener.send(RenderCommand::PushRenderTarget(render_target_id)) - } - CulledDisplayItem::PopRenderTarget => { - self.listener.send(RenderCommand::PopRenderTarget) - } - } - } + // Send commands. + tile_batch_builder.send_to(&self.sink); } fn finish_building(&mut self, paint_metadata: &[PaintMetadata], - built_draw_paths: Vec) { - self.listener.send(RenderCommand::FlushFills); - self.build_clips(&built_draw_paths); - let culled_tiles = self.cull_tiles(paint_metadata, built_draw_paths); - self.pack_tiles(culled_tiles); + built_paths: Option, + prepare_mode: &PrepareMode) { + match self.sink.renderer_level { + RendererLevel::D3D9 => self.sink.listener.send(RenderCommand::FlushFillsD3D9), + RendererLevel::D3D11 => {} + } + + self.build_tile_batches(paint_metadata, prepare_mode, built_paths); } fn needs_readable_framebuffer(&self) -> bool { let mut framebuffer_nesting = 0; - for display_item in &self.scene.display_list { + for display_item in self.scene.display_list() { match *display_item { DisplayItem::PushRenderTarget(_) => framebuffer_nesting += 1, DisplayItem::PopRenderTarget => framebuffer_nesting -= 1, - DisplayItem::DrawPaths { start_index, end_index } => { + DisplayItem::DrawPaths(ref draw_path_id_range) => { if framebuffer_nesting > 0 { continue; } - for path_index in start_index..end_index { - let blend_mode = self.scene.paths[path_index as usize].blend_mode(); + for draw_path_id in draw_path_id_range.start.0..draw_path_id_range.end.0 { + let draw_path_id = DrawPathId(draw_path_id); + let blend_mode = self.scene.get_draw_path(draw_path_id).blend_mode(); if blend_mode.needs_readable_framebuffer() { return true; } @@ -517,10 +386,15 @@ impl<'a, 'b> SceneBuilder<'a, 'b> { } } +struct BuiltPaths { + draw: Vec, +} + struct PathBuildParams<'a> { - path_index: usize, + path_id: PathId, view_box: RectF, built_options: &'a PreparedBuildOptions, + prepare_mode: PrepareMode, scene: &'a Scene, } @@ -531,74 +405,102 @@ struct DrawPathBuildParams<'a> { } impl BuiltPath { - fn new(path_bounds: RectF, + fn new(path_id: PathId, + path_bounds: RectF, view_box_bounds: RectF, fill_rule: FillRule, + prepare_mode: &PrepareMode, tiling_path_info: &TilingPathInfo) -> BuiltPath { - let occludes = match *tiling_path_info { - TilingPathInfo::Draw(ref draw_tiling_path_info) => { - draw_tiling_path_info.paint_metadata.is_opaque && - draw_tiling_path_info.blend_mode.occludes_backdrop() - } - TilingPathInfo::Clip => true, + let paint_id = match *tiling_path_info { + TilingPathInfo::Draw(ref draw_tiling_path_info) => draw_tiling_path_info.paint_id, + TilingPathInfo::Clip => PaintId(0), }; + let ctrl_byte = tiling_path_info.to_ctrl(); + let tile_map_bounds = if tiling_path_info.has_destructive_blend_mode() { view_box_bounds } else { path_bounds }; + let tile_bounds = tiles::round_rect_out_to_tile_bounds(tile_map_bounds); + + let clip_path_id = match *tiling_path_info { + TilingPathInfo::Draw(ref draw_tiling_path_info) => { + draw_tiling_path_info.clip_path_id + } + _ => None, + }; + + let data = match *prepare_mode { + PrepareMode::CPU => { + BuiltPathData::CPU(BuiltPathBinCPUData { + backdrops: vec![0; tile_bounds.width() as usize], + tiles: DenseTileMap::from_builder(|tile_coord| { + TileObjectPrimitive { + tile_x: tile_coord.x() as i16, + tile_y: tile_coord.y() as i16, + alpha_tile_id: AlphaTileId(!0), + path_id, + color: paint_id.0, + backdrop: 0, + ctrl: ctrl_byte, + } + }, tile_bounds), + clip_tiles: match *tiling_path_info { + TilingPathInfo::Draw(_) if clip_path_id.is_some() => { + Some(DenseTileMap::from_builder(|_| { + Clip { + dest_tile_id: AlphaTileId(!0), + dest_backdrop: 0, + src_tile_id: AlphaTileId(!0), + src_backdrop: 0, + } + }, tile_bounds)) + } + _ => None, + }, + }) + } + PrepareMode::TransformCPUBinGPU => { + BuiltPathData::TransformCPUBinGPU(BuiltPathTransformCPUBinGPUData { + outline: Outline::new(), + }) + } + PrepareMode::GPU { .. } => BuiltPathData::GPU, + }; + BuiltPath { - empty_tiles: vec![], - single_mask_tiles: vec![], - clip_tiles: vec![], - solid_tiles: if occludes { - SolidTiles::Occluders(vec![]) - } else { - SolidTiles::Regular(vec![]) - }, - tiles: DenseTileMap::new(tiles::round_rect_out_to_tile_bounds(tile_map_bounds)), + data, + tile_bounds, + clip_path_id, fill_rule, + ctrl_byte, + paint_id, } } } -impl Occluder { - #[inline] - pub(crate) fn new(coords: Vector2I) -> Occluder { - Occluder { coords } - } -} - -struct CulledTiles { - display_list: Vec, -} - -enum CulledDisplayItem { - DrawTiles(TileBatch), - PushRenderTarget(RenderTargetId), - PopRenderTarget, -} - -#[derive(Clone, Copy, Debug, Default)] -pub struct TileStats { - pub solid_tile_count: u32, - pub alpha_tile_count: u32, -} - // Utilities for built objects impl ObjectBuilder { - pub(crate) fn new(path_bounds: RectF, + // If `outline` is `None`, then tiling is being done on CPU. Otherwise, it's done on GPU. + pub(crate) fn new(path_id: PathId, + path_bounds: RectF, view_box_bounds: RectF, fill_rule: FillRule, + prepare_mode: &PrepareMode, tiling_path_info: &TilingPathInfo) -> ObjectBuilder { - let built_path = BuiltPath::new(path_bounds, view_box_bounds, fill_rule, tiling_path_info); - let current_backdrops = vec![0; built_path.tiles.rect.width() as usize]; - ObjectBuilder { built_path, bounds: path_bounds, current_backdrops, fills: vec![] } + let built_path = BuiltPath::new(path_id, + path_bounds, + view_box_bounds, + fill_rule, + prepare_mode, + tiling_path_info); + ObjectBuilder { built_path, bounds: path_bounds, fills: vec![] } } pub(crate) fn add_fill(&mut self, @@ -618,7 +520,7 @@ impl ObjectBuilder { let tile_size = F32x4::splat(TILE_WIDTH as f32); let tile_upper_left = tile_coords.to_f32().0.to_f32x4().xyxy() * tile_size; - // Convert to 4.8 fixed point. + // Convert to 8.8 fixed point. let segment = (segment.0 - tile_upper_left) * F32x4::splat(256.0); let (min, max) = (F32x4::default(), F32x4::splat((TILE_WIDTH * 256 - 1) as f32)); let segment = segment.clamp(min, max).to_i32x4(); @@ -633,181 +535,604 @@ impl ObjectBuilder { // Allocate a global tile if necessary. let alpha_tile_id = self.get_or_allocate_alpha_tile_index(scene_builder, tile_coords); - // Pack whole pixels. - let px = (segment & I32x4::splat(0xf00)).to_u32x4(); - let px = (px >> 8).to_i32x4() | (px >> 4).to_i32x4().yxwz(); - // Pack instance data. debug!("... OK, pushing"); - self.fills.push(FillBatchEntry { - page: alpha_tile_id.page(), - fill: Fill { - px: LineSegmentU4 { from: px[0] as u8, to: px[2] as u8 }, - subpx: LineSegmentU8 { - from_x: from_x as u8, - from_y: from_y as u8, - to_x: to_x as u8, - to_y: to_y as u8, - }, - alpha_tile_index: alpha_tile_id.tile(), + self.fills.push(Fill { + line_segment: LineSegmentU16 { + from_x: from_x as u16, + from_y: from_y as u16, + to_x: to_x as u16, + to_y: to_y as u16, }, + // If fills are being done with compute, then this value will be overwritten later. + link: alpha_tile_id.0, }); } - fn get_or_allocate_alpha_tile_index( - &mut self, - scene_builder: &SceneBuilder, - tile_coords: Vector2I, - ) -> AlphaTileId { - let local_tile_index = self.built_path.tiles.coords_to_index_unchecked(tile_coords); - let alpha_tile_id = self.built_path.tiles.data[local_tile_index].alpha_tile_id; + fn get_or_allocate_alpha_tile_index(&mut self, + scene_builder: &SceneBuilder, + tile_coords: Vector2I) + -> AlphaTileId { + let local_tile_index = self.tile_coords_to_local_index_unchecked(tile_coords) as usize; + + let tiles = match self.built_path.data { + BuiltPathData::CPU(ref mut cpu_data) => &mut cpu_data.tiles, + BuiltPathData::GPU | BuiltPathData::TransformCPUBinGPU(_) => { + panic!("Can't allocate alpha tile index on CPU if not doing building on CPU!") + } + }; + + let alpha_tile_id = tiles.data[local_tile_index].alpha_tile_id; if alpha_tile_id.is_valid() { return alpha_tile_id; } let alpha_tile_id = AlphaTileId::new(&scene_builder.next_alpha_tile_indices, 0); - self.built_path.tiles.data[local_tile_index].alpha_tile_id = alpha_tile_id; + tiles.data[local_tile_index].alpha_tile_id = alpha_tile_id; alpha_tile_id } #[inline] - pub(crate) fn tile_coords_to_local_index(&self, coords: Vector2I) -> Option { - self.built_path.tiles.coords_to_index(coords).map(|index| index as u32) + pub(crate) fn tile_coords_to_local_index_unchecked(&self, coords: Vector2I) -> u32 { + let tile_rect = self.built_path.tile_bounds; + let offset = coords - tile_rect.origin(); + (offset.x() + tile_rect.width() * offset.y()) as u32 } #[inline] - pub(crate) fn local_tile_index_to_coords(&self, tile_index: u32) -> Vector2I { - self.built_path.tiles.index_to_coords(tile_index as usize) + pub(crate) fn tile_coords_to_local_index(&self, coords: Vector2I) -> Option { + if self.built_path.tile_bounds.contains_point(coords) { + Some(self.tile_coords_to_local_index_unchecked(coords)) + } else { + None + } } #[inline] pub(crate) fn adjust_alpha_tile_backdrop(&mut self, tile_coords: Vector2I, delta: i8) { - let tile_offset = tile_coords - self.built_path.tiles.rect.origin(); - if tile_offset.x() < 0 || tile_offset.x() >= self.built_path.tiles.rect.width() || - tile_offset.y() >= self.built_path.tiles.rect.height() { + let (tiles, backdrops) = match self.built_path.data { + BuiltPathData::CPU(ref mut tiled_data) => { + (&mut tiled_data.tiles, &mut tiled_data.backdrops) + } + BuiltPathData::TransformCPUBinGPU(_) | BuiltPathData::GPU => unreachable!(), + }; + + let tile_offset = tile_coords - tiles.rect.origin(); + if tile_offset.x() < 0 || tile_offset.x() >= tiles.rect.width() || + tile_offset.y() >= tiles.rect.height() { return; } if tile_offset.y() < 0 { - self.current_backdrops[tile_offset.x() as usize] += delta; + backdrops[tile_offset.x() as usize] += delta as i32; return; } - let local_tile_index = self.built_path.tiles.coords_to_index_unchecked(tile_coords); - self.built_path.tiles.data[local_tile_index].backdrop += delta; + let local_tile_index = tiles.coords_to_index_unchecked(tile_coords); + tiles.data[local_tile_index].backdrop += delta; } } -impl<'a> PackedTile<'a> { - pub(crate) fn add_to(&self, - tiles: &mut Vec, - clips: &mut Vec, - draw_tiling_path_info: &DrawTilingPathInfo, - scene_builder: &SceneBuilder) { - let draw_tile_page = self.draw_tile.alpha_tile_id.page() as u16; - let draw_tile_index = self.draw_tile.alpha_tile_id.tile() as u16; - let draw_tile_backdrop = self.draw_tile.backdrop as i8; - - match self.clip_tile { - None => { - tiles.push(BuiltTile { - page: draw_tile_page, - tile: Tile::new_alpha(self.tile_coords, - draw_tile_index, - draw_tile_backdrop, - draw_tiling_path_info), - }); - } - Some(clip_tile) => { - let clip_tile_page = clip_tile.alpha_tile_id.page() as u16; - let clip_tile_index = clip_tile.alpha_tile_id.tile() as u16; - let clip_tile_backdrop = clip_tile.backdrop; - - let dest_tile_id = AlphaTileId::new(&scene_builder.next_alpha_tile_indices, 1); - let dest_tile_page = dest_tile_id.page() as u16; - let dest_tile_index = dest_tile_id.tile() as u16; - - clips.push(BuiltClip { - clip: Clip::new(dest_tile_index, draw_tile_index, draw_tile_backdrop), - key: ClipBatchKey { - src_page: draw_tile_page, - dest_page: dest_tile_page, - kind: ClipBatchKind::Draw, - }, - }); - clips.push(BuiltClip { - clip: Clip::new(dest_tile_index, clip_tile_index, clip_tile_backdrop), - key: ClipBatchKey { - src_page: clip_tile_page, - dest_page: dest_tile_page, - kind: ClipBatchKind::Clip, - }, - }); - tiles.push(BuiltTile { - page: dest_tile_page, - tile: Tile::new_alpha(self.tile_coords, - dest_tile_index, - 0, - draw_tiling_path_info), - }); - } +impl TileBatchDataD3D11 { + fn new(batch_id: TileBatchId, mode: &PrepareMode, path_source: PathSource) + -> TileBatchDataD3D11 { + TileBatchDataD3D11 { + batch_id, + path_count: 0, + tile_count: 0, + segment_count: 0, + path_source, + prepare_info: match *mode { + PrepareMode::CPU => unimplemented!(), + PrepareMode::TransformCPUBinGPU => { + PrepareTilesInfoD3D11 { + backdrops: vec![], + propagate_metadata: vec![], + dice_metadata: vec![], + tile_path_info: vec![], + transform: Transform2F::default(), + } + } + PrepareMode::GPU { ref transform } => { + PrepareTilesInfoD3D11 { + backdrops: vec![], + propagate_metadata: vec![], + dice_metadata: vec![], + tile_path_info: vec![], + transform: *transform, + } + } + }, + clipped_path_info: None, } } -} -impl Tile { - #[inline] - fn new_alpha(tile_origin: Vector2I, - draw_tile_index: u16, - draw_tile_backdrop: i8, - draw_tiling_path_info: &DrawTilingPathInfo) - -> Tile { - let mask_0_uv = calculate_mask_uv(draw_tile_index); + fn push(&mut self, + path: &BuiltPath, + global_path_id: PathId, + batch_clip_path_index: Option, + z_write: bool, + sink: &SceneSink) + -> PathBatchIndex { + let batch_path_index = PathBatchIndex(self.path_count); + self.path_count += 1; - let mut ctrl = 0; - match draw_tiling_path_info.fill_rule { - FillRule::EvenOdd => ctrl |= TILE_CTRL_MASK_EVEN_ODD << TILE_CTRL_MASK_0_SHIFT, - FillRule::Winding => ctrl |= TILE_CTRL_MASK_WINDING << TILE_CTRL_MASK_0_SHIFT, + self.prepare_info.propagate_metadata.push(PropagateMetadataD3D11 { + tile_rect: path.tile_bounds, + tile_offset: self.tile_count, + path_index: batch_path_index, + z_write: z_write as u32, + clip_path_index: batch_clip_path_index.unwrap_or(PathBatchIndex::none()), + backdrop_offset: self.prepare_info.backdrops.len() as u32, + pad0: 0, + pad1: 0, + pad2: 0, + }); + + match path.data { + BuiltPathData::CPU(ref data) => { + self.prepare_info.backdrops.reserve(data.backdrops.len()); + for (tile_x_offset, backdrop) in data.backdrops.iter().enumerate() { + self.prepare_info.backdrops.push(BackdropInfoD3D11 { + initial_backdrop: *backdrop as i32, + tile_x_offset: tile_x_offset as i32, + path_index: batch_path_index, + }); + } + } + BuiltPathData::TransformCPUBinGPU(_) | BuiltPathData::GPU => { + init_backdrops(&mut self.prepare_info.backdrops, + batch_path_index, + path.tile_bounds); + } } - Tile { - tile_x: tile_origin.x() as i16, - tile_y: tile_origin.y() as i16, - mask_0_u: mask_0_uv.x() as u8, - mask_0_v: mask_0_uv.y() as u8, - mask_0_backdrop: draw_tile_backdrop, - ctrl: ctrl as u16, + // Add tiles. + let last_scene = sink.last_scene.as_ref().unwrap(); + let segment_ranges = match self.path_source { + PathSource::Draw => &last_scene.draw_segment_ranges, + PathSource::Clip => &last_scene.clip_segment_ranges, + }; + let segment_range = &segment_ranges[global_path_id.0 as usize]; + self.prepare_info.dice_metadata.push(DiceMetadataD3D11 { + first_batch_segment_index: self.segment_count, + first_global_segment_index: segment_range.start, + global_path_id, pad: 0, - color: draw_tiling_path_info.paint_id.0, - } - } + }); + self.prepare_info.tile_path_info.push(TilePathInfoD3D11 { + tile_min_x: path.tile_bounds.min_x() as i16, + tile_min_y: path.tile_bounds.min_y() as i16, + tile_max_x: path.tile_bounds.max_x() as i16, + tile_max_y: path.tile_bounds.max_y() as i16, + first_tile_index: self.tile_count, + color: path.paint_id.0, + ctrl: path.ctrl_byte, + backdrop: 0, + }); - #[inline] - pub fn tile_position(&self) -> Vector2I { - vec2i(self.tile_x as i32, self.tile_y as i32) + self.tile_count += path.tile_bounds.area() as u32; + self.segment_count += segment_range.end - segment_range.start; + + // Handle clip. + + if batch_clip_path_index.is_none() { + return batch_path_index; + } + + if self.clipped_path_info.is_none() { + self.clipped_path_info = Some(ClippedPathInfo { + clip_batch_id: TileBatchId(0), + clipped_path_count: 0, + max_clipped_tile_count: 0, + clips: match sink.renderer_level { + RendererLevel::D3D9 => Some(vec![]), + RendererLevel::D3D11 => None, + }, + }); + } + + let clipped_path_info = self.clipped_path_info.as_mut().unwrap(); + clipped_path_info.clipped_path_count += 1; + clipped_path_info.max_clipped_tile_count += path.tile_bounds.area() as u32; + + // If clips are computed on CPU, add them to this batch. + if let Some(ref mut dest_clips) = clipped_path_info.clips { + let src_tiles = match path.data { + BuiltPathData::CPU(BuiltPathBinCPUData { + clip_tiles: Some(ref src_tiles), + .. + }) => src_tiles, + _ => panic!("Clip tiles weren't computed on CPU!"), + }; + dest_clips.extend_from_slice(&src_tiles.data); + } + + batch_path_index } } -impl Clip { - #[inline] - fn new(dest_tile_index: u16, src_tile_index: u16, src_backdrop: i8) -> Clip { - let dest_uv = calculate_mask_uv(dest_tile_index); - let src_uv = calculate_mask_uv(src_tile_index); - Clip { - dest_u: dest_uv.x() as u8, - dest_v: dest_uv.y() as u8, - src_u: src_uv.x() as u8, - src_v: src_uv.y() as u8, - backdrop: src_backdrop, - pad_0: 0, - pad_1: 0, +fn init_backdrops(backdrops: &mut Vec, + path_index: PathBatchIndex, + tile_rect: RectI) { + for tile_x_offset in 0..tile_rect.width() { + backdrops.push(BackdropInfoD3D11 { initial_backdrop: 0, path_index, tile_x_offset }); + } +} + +struct BuiltSegments { + draw_segments: SegmentsD3D11, + clip_segments: SegmentsD3D11, + draw_segment_ranges: Vec>, + clip_segment_ranges: Vec>, +} + +impl BuiltSegments { + fn from_scene(scene: &Scene) -> BuiltSegments { + let mut built_segments = BuiltSegments { + draw_segments: SegmentsD3D11::new(), + clip_segments: SegmentsD3D11::new(), + draw_segment_ranges: Vec::with_capacity(scene.draw_paths().len()), + clip_segment_ranges: Vec::with_capacity(scene.clip_paths().len()), + }; + + for clip_path in scene.clip_paths() { + let range = built_segments.clip_segments.add_path(clip_path.outline()); + built_segments.clip_segment_ranges.push(range); + } + for draw_path in scene.draw_paths() { + let range = built_segments.draw_segments.add_path(draw_path.outline()); + built_segments.draw_segment_ranges.push(range); + } + + built_segments + } +} + +impl SegmentsD3D11 { + fn new() -> SegmentsD3D11 { + SegmentsD3D11 { points: vec![], indices: vec![] } + } + + fn add_path(&mut self, outline: &Outline) -> Range { + let first_segment_index = self.indices.len() as u32; + for contour in outline.contours() { + let point_count = contour.len() as u32; + self.points.reserve(point_count as usize); + + for point_index in 0..point_count { + if !contour.flags_of(point_index).intersects(PointFlags::CONTROL_POINT_0 | + PointFlags::CONTROL_POINT_1) { + let mut flags = 0; + if point_index + 1 < point_count && + contour.flags_of(point_index + 1) + .contains(PointFlags::CONTROL_POINT_0) { + if point_index + 2 < point_count && + contour.flags_of(point_index + 2) + .contains(PointFlags::CONTROL_POINT_1) { + flags = CURVE_IS_CUBIC + } else { + flags = CURVE_IS_QUADRATIC + } + } + + self.indices.push(SegmentIndicesD3D11 { + first_point_index: self.points.len() as u32, + flags, + }); + } + + self.points.push(contour.position_of(point_index)); + } + + self.points.push(contour.position_of(0)); + } + + let last_segment_index = self.indices.len() as u32; + first_segment_index..last_segment_index + } +} + +struct TileBatchBuilder { + prepare_commands: Vec, + draw_commands: Vec, + clip_id_to_path_batch_index: FxHashMap, + next_batch_id: TileBatchId, + level: TileBatchBuilderLevel, +} + +enum TileBatchBuilderLevel { + D3D9 { built_paths: BuiltPaths }, + D3D11 { clip_prepare_batch: TileBatchDataD3D11 }, +} + +impl TileBatchBuilder { + fn new(prepare_mode: &PrepareMode, built_paths: Option) -> TileBatchBuilder { + TileBatchBuilder { + prepare_commands: vec![], + draw_commands: vec![], + next_batch_id: TileBatchId(1), + clip_id_to_path_batch_index: FxHashMap::default(), + level: match built_paths { + None => { + TileBatchBuilderLevel::D3D11 { + clip_prepare_batch: TileBatchDataD3D11::new(TileBatchId(0), + &prepare_mode, + PathSource::Clip), + } + } + Some(built_paths) => TileBatchBuilderLevel::D3D9 { built_paths }, + }, + } + } + + fn build_tile_batches_for_draw_path_display_item(&mut self, + scene: &Scene, + sink: &SceneSink, + built_options: &PreparedBuildOptions, + draw_path_id_range: Range, + paint_metadata: &[PaintMetadata], + prepare_mode: &PrepareMode) { + let mut draw_tile_batch = None; + for draw_path_id in draw_path_id_range.start.0..draw_path_id_range.end.0 { + let draw_path_id = DrawPathId(draw_path_id); + let draw_path = match self.level { + TileBatchBuilderLevel::D3D11 { .. } => { + match self.prepare_draw_path_for_gpu_binning(scene, + built_options, + draw_path_id, + prepare_mode, + paint_metadata) { + None => continue, + Some(built_draw_path) => Cow::Owned(built_draw_path), + } + } + TileBatchBuilderLevel::D3D9 { ref built_paths } => { + Cow::Borrowed(&built_paths.draw[draw_path_id.0 as usize]) + } + }; + + // Try to reuse the current batch if we can. + let flush_needed = match draw_tile_batch { + Some(DrawTileBatch::D3D11(ref mut existing_batch)) => { + !fixup_batch_for_new_path_if_possible(&mut existing_batch.color_texture, + &draw_path) + } + Some(DrawTileBatch::D3D9(ref mut existing_batch)) => { + !fixup_batch_for_new_path_if_possible(&mut existing_batch.color_texture, + &draw_path) + } + None => false, + }; + + // If we couldn't reuse the batch, flush it. + if flush_needed { + match draw_tile_batch.take() { + Some(DrawTileBatch::D3D11(batch_to_flush)) => { + self.draw_commands.push(RenderCommand::DrawTilesD3D11(batch_to_flush)); + } + Some(DrawTileBatch::D3D9(batch_to_flush)) => { + self.draw_commands.push(RenderCommand::DrawTilesD3D9(batch_to_flush)); + } + _ => {} + } + } + + // Create a new batch if necessary. + if draw_tile_batch.is_none() { + draw_tile_batch = match self.level { + TileBatchBuilderLevel::D3D9 { .. } => { + let tile_bounds = tiles::round_rect_out_to_tile_bounds(scene.view_box()); + Some(DrawTileBatch::D3D9(DrawTileBatchD3D9 { + tiles: vec![], + clips: vec![], + z_buffer_data: DenseTileMap::from_builder(|_| 0, tile_bounds), + color_texture: draw_path.color_texture, + filter: draw_path.filter, + blend_mode: draw_path.blend_mode, + })) + } + TileBatchBuilderLevel::D3D11 { .. } => { + Some(DrawTileBatch::D3D11(DrawTileBatchD3D11 { + tile_batch_data: TileBatchDataD3D11::new(self.next_batch_id, + &prepare_mode, + PathSource::Draw), + color_texture: draw_path.color_texture, + })) + } + }; + self.next_batch_id.0 += 1; + } + + // Add clip path if necessary. + let clip_path = match draw_path.clip_path_id { + None => None, + Some(clip_path_id) => { + match self.clip_id_to_path_batch_index.get(&clip_path_id) { + Some(&clip_path_batch_index) => Some(clip_path_batch_index), + None => { + match self.level { + TileBatchBuilderLevel::D3D9 { .. } => None, + TileBatchBuilderLevel::D3D11 { ref mut clip_prepare_batch } => { + let clip_path = + prepare_clip_path_for_gpu_binning(scene, + built_options, + clip_path_id, + prepare_mode); + let clip_path_index = + clip_prepare_batch.push(&clip_path, + clip_path_id.to_path_id(), + None, + true, + sink); + self.clip_id_to_path_batch_index.insert(clip_path_id, + clip_path_index); + Some(clip_path_index) + } + } + + } + } + } + }; + + let draw_tile_batch = draw_tile_batch.as_mut().unwrap(); + match *draw_tile_batch { + DrawTileBatch::D3D11(ref mut draw_tile_batch) => { + draw_tile_batch.tile_batch_data.push(&draw_path.path, + draw_path_id.to_path_id(), + clip_path, + draw_path.occludes, + sink); + } + DrawTileBatch::D3D9(ref mut draw_tile_batch) => { + let built_paths = match self.level { + TileBatchBuilderLevel::D3D9 { ref built_paths } => built_paths, + TileBatchBuilderLevel::D3D11 { .. } => unreachable!(), + }; + + let cpu_data = match built_paths.draw[draw_path_id.0 as usize].path.data { + BuiltPathData::CPU(ref cpu_data) => cpu_data, + BuiltPathData::GPU | BuiltPathData::TransformCPUBinGPU(_) => { + unreachable!() + } + }; + + for tile in &cpu_data.tiles.data { + if tile.alpha_tile_id == AlphaTileId(!0) && tile.backdrop == 0 { + continue; + } + + draw_tile_batch.tiles.push(*tile); + + if !draw_path.occludes || tile.alpha_tile_id != AlphaTileId(!0) { + continue; + } + + let tile_coords = vec2i(tile.tile_x as i32, tile.tile_y as i32); + let z_value = draw_tile_batch.z_buffer_data + .get_mut(tile_coords) + .expect("Z value out of bounds!"); + *z_value = (*z_value).max(draw_path_id.0 as i32); + } + + let clip_tiles = match cpu_data.clip_tiles { + None => continue, + Some(ref clip_tiles) => clip_tiles, + }; + for clip_tile in &clip_tiles.data { + if clip_tile.dest_tile_id != AlphaTileId(!0) && + clip_tile.src_tile_id != AlphaTileId(!0) { + draw_tile_batch.clips.push(*clip_tile); + } + } + } + } + } + + match draw_tile_batch { + Some(DrawTileBatch::D3D11(draw_tile_batch)) => { + self.draw_commands.push(RenderCommand::DrawTilesD3D11(draw_tile_batch)); + } + Some(DrawTileBatch::D3D9(draw_tile_batch)) => { + self.draw_commands.push(RenderCommand::DrawTilesD3D9(draw_tile_batch)); + } + None => {} + } + } + + fn prepare_draw_path_for_gpu_binning(&self, + scene: &Scene, + built_options: &PreparedBuildOptions, + draw_path_id: DrawPathId, + prepare_mode: &PrepareMode, + paint_metadata: &[PaintMetadata]) + -> Option { + let transform = match *prepare_mode { + PrepareMode::GPU { transform } => transform, + PrepareMode::CPU | PrepareMode::TransformCPUBinGPU => { + panic!("`prepare_draw_path_for_gpu_binning()` requires a GPU prepare mode!") + } + }; + + let effective_view_box = scene.effective_view_box(built_options); + let draw_path = scene.get_draw_path(draw_path_id); + + let mut path_bounds = transform * draw_path.outline().bounds(); + match path_bounds.intersection(effective_view_box) { + Some(intersection) => path_bounds = intersection, + None => return None, + } + + let paint_id = draw_path.paint(); + let paint_metadata = &paint_metadata[paint_id.0 as usize]; + let built_path = BuiltPath::new(draw_path_id.to_path_id(), + path_bounds, + effective_view_box, + draw_path.fill_rule(), + &prepare_mode, + &TilingPathInfo::Draw(DrawTilingPathInfo { + paint_id, + blend_mode: draw_path.blend_mode(), + clip_path_id: draw_path.clip_path(), + fill_rule: draw_path.fill_rule(), + })); + Some(BuiltDrawPath::new(built_path, draw_path, paint_metadata)) + } + + fn send_to(self, sink: &SceneSink) { + if let TileBatchBuilderLevel::D3D11 { clip_prepare_batch } = self.level { + if clip_prepare_batch.path_count > 0 { + sink.listener.send(RenderCommand::PrepareClipTilesD3D11(clip_prepare_batch)); + } + } + + for command in self.prepare_commands { + sink.listener.send(command); + } + for command in self.draw_commands { + sink.listener.send(command); } } } -fn calculate_mask_uv(tile_index: u16) -> Vector2I { - debug_assert_eq!(MASK_TILES_ACROSS, MASK_TILES_DOWN); - let mask_u = tile_index as i32 % MASK_TILES_ACROSS as i32; - let mask_v = tile_index as i32 / MASK_TILES_ACROSS as i32; - vec2i(mask_u, mask_v) +fn prepare_clip_path_for_gpu_binning(scene: &Scene, + built_options: &PreparedBuildOptions, + clip_path_id: ClipPathId, + prepare_mode: &PrepareMode) + -> BuiltPath { + let transform = match *prepare_mode { + PrepareMode::GPU { transform } => transform, + PrepareMode::CPU | PrepareMode::TransformCPUBinGPU => { + panic!("`prepare_clip_path_for_gpu_binning()` requires a GPU prepare mode!") + } + }; + let effective_view_box = scene.effective_view_box(built_options); + let clip_path = scene.get_clip_path(clip_path_id); + let path_bounds = transform * clip_path.outline().bounds(); + // TODO(pcwalton): Clip to view box! + BuiltPath::new(clip_path_id.to_path_id(), + path_bounds, + effective_view_box, + clip_path.fill_rule(), + &prepare_mode, + &TilingPathInfo::Clip) +} + +fn fixup_batch_for_new_path_if_possible(batch_color_texture: &mut Option, + draw_path: &BuiltDrawPath) + -> bool { + if draw_path.color_texture.is_some() { + if batch_color_texture.is_none() { + *batch_color_texture = draw_path.color_texture; + return true; + } + if draw_path.color_texture != *batch_color_texture { + debug!("batch break: path color texture {:?} batch color texture {:?}", + draw_path.color_texture, + batch_color_texture); + return false; + } + } + true } diff --git a/renderer/src/concurrent/scene_proxy.rs b/renderer/src/concurrent/scene_proxy.rs index 69cdf131..e5a88db1 100644 --- a/renderer/src/concurrent/scene_proxy.rs +++ b/renderer/src/concurrent/scene_proxy.rs @@ -20,10 +20,11 @@ //! You don't need to use this API to use Pathfinder; it's only a convenience. use crate::concurrent::executor::Executor; +use crate::gpu::options::RendererLevel; use crate::gpu::renderer::Renderer; use crate::gpu_data::RenderCommand; use crate::options::{BuildOptions, RenderCommandListener}; -use crate::scene::Scene; +use crate::scene::{Scene, SceneSink}; use crossbeam_channel::{self, Receiver, Sender}; use pathfinder_geometry::rect::RectF; use pathfinder_gpu::Device; @@ -33,19 +34,28 @@ const MAX_MESSAGES_IN_FLIGHT: usize = 1024; pub struct SceneProxy { sender: Sender, + receiver: Receiver, } impl SceneProxy { - pub fn new(executor: E) -> SceneProxy where E: Executor + Send + 'static { - SceneProxy::from_scene(Scene::new(), executor) + pub fn new(renderer_level: RendererLevel, executor: E) -> SceneProxy + where E: Executor + Send + 'static { + SceneProxy::from_scene(Scene::new(), renderer_level, executor) } - pub fn from_scene(scene: Scene, executor: E) -> SceneProxy + pub fn from_scene(scene: Scene, renderer_level: RendererLevel, executor: E) + -> SceneProxy where E: Executor + Send + 'static { let (main_to_worker_sender, main_to_worker_receiver) = crossbeam_channel::bounded(MAX_MESSAGES_IN_FLIGHT); - thread::spawn(move || scene_thread(scene, executor, main_to_worker_receiver)); - SceneProxy { sender: main_to_worker_sender } + let (worker_to_main_sender, worker_to_main_receiver) = + crossbeam_channel::bounded(MAX_MESSAGES_IN_FLIGHT); + let listener = RenderCommandListener::new(Box::new(move |command| { + drop(worker_to_main_sender.send(command)) + })); + let sink = SceneSink::new(listener, renderer_level); + thread::spawn(move || scene_thread(scene, executor, sink, main_to_worker_receiver)); + SceneProxy { sender: main_to_worker_sender, receiver: worker_to_main_receiver } } #[inline] @@ -59,18 +69,22 @@ impl SceneProxy { } #[inline] - pub fn build_with_listener(&self, - options: BuildOptions, - listener: Box) { - self.sender.send(MainToWorkerMsg::Build(options, listener)).unwrap(); + pub fn build(&self, options: BuildOptions) { + self.sender.send(MainToWorkerMsg::Build(options)).unwrap(); } + /// Sends all queued commands to the given renderer. #[inline] - pub fn build_with_stream(&self, options: BuildOptions) -> RenderCommandStream { - let (sender, receiver) = crossbeam_channel::bounded(MAX_MESSAGES_IN_FLIGHT); - let listener = Box::new(move |command| drop(sender.send(command))); - self.build_with_listener(options, listener); - RenderCommandStream::new(receiver) + pub fn render(&mut self, renderer: &mut Renderer) where D: Device { + renderer.begin_scene(); + while let Ok(command) = self.receiver.recv() { + renderer.render_command(&command); + match command { + RenderCommand::Finish { .. } => break, + _ => {} + } + } + renderer.end_scene(); } /// A convenience method to build a scene and send the resulting commands @@ -79,18 +93,15 @@ impl SceneProxy { /// Exactly equivalent to: /// /// ```norun - /// for command in scene_proxy.build_with_stream(options) { - /// renderer.render_command(&command) + /// scene_proxy.build(build_options); + /// scene_proxy.render(renderer); /// } /// ``` #[inline] - pub fn build_and_render(&self, renderer: &mut Renderer, build_options: BuildOptions) + pub fn build_and_render(&mut self, renderer: &mut Renderer, build_options: BuildOptions) where D: Device { - renderer.begin_scene(); - for command in self.build_with_stream(build_options) { - renderer.render_command(&command); - } - renderer.end_scene(); + self.build(build_options); + self.render(renderer); } #[inline] @@ -103,6 +114,7 @@ impl SceneProxy { fn scene_thread(mut scene: Scene, executor: E, + mut sink: SceneSink<'static>, main_to_worker_receiver: Receiver) where E: Executor { while let Ok(msg) = main_to_worker_receiver.recv() { @@ -110,7 +122,7 @@ fn scene_thread(mut scene: Scene, MainToWorkerMsg::ReplaceScene(new_scene) => scene = new_scene, MainToWorkerMsg::CopyScene(sender) => sender.send(scene.clone()).unwrap(), MainToWorkerMsg::SetViewBox(new_view_box) => scene.set_view_box(new_view_box), - MainToWorkerMsg::Build(options, listener) => scene.build(options, listener, &executor) + MainToWorkerMsg::Build(options) => scene.build(options, &mut sink, &executor), } } } @@ -119,33 +131,5 @@ enum MainToWorkerMsg { ReplaceScene(Scene), CopyScene(Sender), SetViewBox(RectF), - Build(BuildOptions, Box), -} - -pub struct RenderCommandStream { - receiver: Receiver, - done: bool, -} - -impl RenderCommandStream { - fn new(receiver: Receiver) -> RenderCommandStream { - RenderCommandStream { receiver, done: false } - } -} - -impl Iterator for RenderCommandStream { - type Item = RenderCommand; - - #[inline] - fn next(&mut self) -> Option { - if self.done { - None - } else { - let command = self.receiver.recv().unwrap(); - if let RenderCommand::Finish { .. } = command { - self.done = true; - } - Some(command) - } - } + Build(BuildOptions), } diff --git a/renderer/src/gpu/blend.rs b/renderer/src/gpu/blend.rs new file mode 100644 index 00000000..02ae3924 --- /dev/null +++ b/renderer/src/gpu/blend.rs @@ -0,0 +1,264 @@ +// pathfinder/renderer/src/gpu/blend.rs +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Helpers for blending. + +use crate::gpu_data::ColorCombineMode; +use crate::paint::PaintCompositeOp; +use pathfinder_content::effects::BlendMode; +use pathfinder_gpu::{BlendFactor, BlendState}; + +const COMBINER_CTRL_COLOR_COMBINE_SRC_IN: i32 = 0x1; +const COMBINER_CTRL_COLOR_COMBINE_DEST_IN: i32 = 0x2; + +const COMBINER_CTRL_COMPOSITE_NORMAL: i32 = 0x0; +const COMBINER_CTRL_COMPOSITE_MULTIPLY: i32 = 0x1; +const COMBINER_CTRL_COMPOSITE_SCREEN: i32 = 0x2; +const COMBINER_CTRL_COMPOSITE_OVERLAY: i32 = 0x3; +const COMBINER_CTRL_COMPOSITE_DARKEN: i32 = 0x4; +const COMBINER_CTRL_COMPOSITE_LIGHTEN: i32 = 0x5; +const COMBINER_CTRL_COMPOSITE_COLOR_DODGE: i32 = 0x6; +const COMBINER_CTRL_COMPOSITE_COLOR_BURN: i32 = 0x7; +const COMBINER_CTRL_COMPOSITE_HARD_LIGHT: i32 = 0x8; +const COMBINER_CTRL_COMPOSITE_SOFT_LIGHT: i32 = 0x9; +const COMBINER_CTRL_COMPOSITE_DIFFERENCE: i32 = 0xa; +const COMBINER_CTRL_COMPOSITE_EXCLUSION: i32 = 0xb; +const COMBINER_CTRL_COMPOSITE_HUE: i32 = 0xc; +const COMBINER_CTRL_COMPOSITE_SATURATION: i32 = 0xd; +const COMBINER_CTRL_COMPOSITE_COLOR: i32 = 0xe; +const COMBINER_CTRL_COMPOSITE_LUMINOSITY: i32 = 0xf; + +pub(crate) trait ToBlendState { + fn to_blend_state(self) -> Option; +} + +impl ToBlendState for BlendMode { + fn to_blend_state(self) -> Option { + match self { + BlendMode::Clear => { + Some(BlendState { + src_rgb_factor: BlendFactor::Zero, + dest_rgb_factor: BlendFactor::Zero, + src_alpha_factor: BlendFactor::Zero, + dest_alpha_factor: BlendFactor::Zero, + ..BlendState::default() + }) + } + BlendMode::SrcOver => { + Some(BlendState { + src_rgb_factor: BlendFactor::One, + dest_rgb_factor: BlendFactor::OneMinusSrcAlpha, + src_alpha_factor: BlendFactor::One, + dest_alpha_factor: BlendFactor::OneMinusSrcAlpha, + ..BlendState::default() + }) + } + BlendMode::DestOver => { + Some(BlendState { + src_rgb_factor: BlendFactor::OneMinusDestAlpha, + dest_rgb_factor: BlendFactor::One, + src_alpha_factor: BlendFactor::OneMinusDestAlpha, + dest_alpha_factor: BlendFactor::One, + ..BlendState::default() + }) + } + BlendMode::SrcIn => { + Some(BlendState { + src_rgb_factor: BlendFactor::DestAlpha, + dest_rgb_factor: BlendFactor::Zero, + src_alpha_factor: BlendFactor::DestAlpha, + dest_alpha_factor: BlendFactor::Zero, + ..BlendState::default() + }) + } + BlendMode::DestIn => { + Some(BlendState { + src_rgb_factor: BlendFactor::Zero, + dest_rgb_factor: BlendFactor::SrcAlpha, + src_alpha_factor: BlendFactor::Zero, + dest_alpha_factor: BlendFactor::SrcAlpha, + ..BlendState::default() + }) + } + BlendMode::SrcOut => { + Some(BlendState { + src_rgb_factor: BlendFactor::OneMinusDestAlpha, + dest_rgb_factor: BlendFactor::Zero, + src_alpha_factor: BlendFactor::OneMinusDestAlpha, + dest_alpha_factor: BlendFactor::Zero, + ..BlendState::default() + }) + } + BlendMode::DestOut => { + Some(BlendState { + src_rgb_factor: BlendFactor::Zero, + dest_rgb_factor: BlendFactor::OneMinusSrcAlpha, + src_alpha_factor: BlendFactor::Zero, + dest_alpha_factor: BlendFactor::OneMinusSrcAlpha, + ..BlendState::default() + }) + } + BlendMode::SrcAtop => { + Some(BlendState { + src_rgb_factor: BlendFactor::DestAlpha, + dest_rgb_factor: BlendFactor::OneMinusSrcAlpha, + src_alpha_factor: BlendFactor::DestAlpha, + dest_alpha_factor: BlendFactor::OneMinusSrcAlpha, + ..BlendState::default() + }) + } + BlendMode::DestAtop => { + Some(BlendState { + src_rgb_factor: BlendFactor::OneMinusDestAlpha, + dest_rgb_factor: BlendFactor::SrcAlpha, + src_alpha_factor: BlendFactor::OneMinusDestAlpha, + dest_alpha_factor: BlendFactor::SrcAlpha, + ..BlendState::default() + }) + } + BlendMode::Xor => { + Some(BlendState { + src_rgb_factor: BlendFactor::OneMinusDestAlpha, + dest_rgb_factor: BlendFactor::OneMinusSrcAlpha, + src_alpha_factor: BlendFactor::OneMinusDestAlpha, + dest_alpha_factor: BlendFactor::OneMinusSrcAlpha, + ..BlendState::default() + }) + } + BlendMode::Lighter => { + Some(BlendState { + src_rgb_factor: BlendFactor::One, + dest_rgb_factor: BlendFactor::One, + src_alpha_factor: BlendFactor::One, + dest_alpha_factor: BlendFactor::One, + ..BlendState::default() + }) + } + BlendMode::Copy | + BlendMode::Darken | + BlendMode::Lighten | + BlendMode::Multiply | + BlendMode::Screen | + BlendMode::HardLight | + BlendMode::Overlay | + BlendMode::ColorDodge | + BlendMode::ColorBurn | + BlendMode::SoftLight | + BlendMode::Difference | + BlendMode::Exclusion | + BlendMode::Hue | + BlendMode::Saturation | + BlendMode::Color | + BlendMode::Luminosity => { + // Blending is done manually in the shader. + None + } + } + } +} + +pub(crate) trait ToCompositeCtrl { + fn to_composite_ctrl(&self) -> i32; +} + +impl ToCompositeCtrl for BlendMode { + fn to_composite_ctrl(&self) -> i32 { + match *self { + BlendMode::SrcOver | + BlendMode::SrcAtop | + BlendMode::DestOver | + BlendMode::DestOut | + BlendMode::Xor | + BlendMode::Lighter | + BlendMode::Clear | + BlendMode::Copy | + BlendMode::SrcIn | + BlendMode::SrcOut | + BlendMode::DestIn | + BlendMode::DestAtop => COMBINER_CTRL_COMPOSITE_NORMAL, + BlendMode::Multiply => COMBINER_CTRL_COMPOSITE_MULTIPLY, + BlendMode::Darken => COMBINER_CTRL_COMPOSITE_DARKEN, + BlendMode::Lighten => COMBINER_CTRL_COMPOSITE_LIGHTEN, + BlendMode::Screen => COMBINER_CTRL_COMPOSITE_SCREEN, + BlendMode::Overlay => COMBINER_CTRL_COMPOSITE_OVERLAY, + BlendMode::ColorDodge => COMBINER_CTRL_COMPOSITE_COLOR_DODGE, + BlendMode::ColorBurn => COMBINER_CTRL_COMPOSITE_COLOR_BURN, + BlendMode::HardLight => COMBINER_CTRL_COMPOSITE_HARD_LIGHT, + BlendMode::SoftLight => COMBINER_CTRL_COMPOSITE_SOFT_LIGHT, + BlendMode::Difference => COMBINER_CTRL_COMPOSITE_DIFFERENCE, + BlendMode::Exclusion => COMBINER_CTRL_COMPOSITE_EXCLUSION, + BlendMode::Hue => COMBINER_CTRL_COMPOSITE_HUE, + BlendMode::Saturation => COMBINER_CTRL_COMPOSITE_SATURATION, + BlendMode::Color => COMBINER_CTRL_COMPOSITE_COLOR, + BlendMode::Luminosity => COMBINER_CTRL_COMPOSITE_LUMINOSITY, + } + } +} + +impl ToCompositeCtrl for ColorCombineMode { + fn to_composite_ctrl(&self) -> i32 { + match *self { + ColorCombineMode::None => 0, + ColorCombineMode::SrcIn => COMBINER_CTRL_COLOR_COMBINE_SRC_IN, + ColorCombineMode::DestIn => COMBINER_CTRL_COLOR_COMBINE_DEST_IN, + } + } +} + +pub trait BlendModeExt { + fn needs_readable_framebuffer(self) -> bool; +} + +impl BlendModeExt for BlendMode { + fn needs_readable_framebuffer(self) -> bool { + match self { + BlendMode::Clear | + BlendMode::SrcOver | + BlendMode::DestOver | + BlendMode::SrcIn | + BlendMode::DestIn | + BlendMode::SrcOut | + BlendMode::DestOut | + BlendMode::SrcAtop | + BlendMode::DestAtop | + BlendMode::Xor | + BlendMode::Lighter | + BlendMode::Copy => false, + BlendMode::Lighten | + BlendMode::Darken | + BlendMode::Multiply | + BlendMode::Screen | + BlendMode::HardLight | + BlendMode::Overlay | + BlendMode::ColorDodge | + BlendMode::ColorBurn | + BlendMode::SoftLight | + BlendMode::Difference | + BlendMode::Exclusion | + BlendMode::Hue | + BlendMode::Saturation | + BlendMode::Color | + BlendMode::Luminosity => true, + } + } +} + +pub(crate) trait ToCombineMode { + fn to_combine_mode(self) -> i32; +} + +impl ToCombineMode for PaintCompositeOp { + fn to_combine_mode(self) -> i32 { + match self { + PaintCompositeOp::DestIn => COMBINER_CTRL_COLOR_COMBINE_DEST_IN, + PaintCompositeOp::SrcIn => COMBINER_CTRL_COLOR_COMBINE_SRC_IN, + } + } +} diff --git a/renderer/src/gpu/d3d11/mod.rs b/renderer/src/gpu/d3d11/mod.rs new file mode 100644 index 00000000..eebeb578 --- /dev/null +++ b/renderer/src/gpu/d3d11/mod.rs @@ -0,0 +1,12 @@ +// pathfinder/renderer/src/gpu/d3d11/mod.rs +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +pub mod renderer; +pub mod shaders; diff --git a/renderer/src/gpu/d3d11/renderer.rs b/renderer/src/gpu/d3d11/renderer.rs new file mode 100644 index 00000000..3668183a --- /dev/null +++ b/renderer/src/gpu/d3d11/renderer.rs @@ -0,0 +1,897 @@ +// pathfinder/renderer/src/gpu/d3d11/renderer.rs +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::gpu::d3d11::shaders::{BOUND_WORKGROUP_SIZE, DICE_WORKGROUP_SIZE}; +use crate::gpu::d3d11::shaders::{PROPAGATE_WORKGROUP_SIZE, ProgramsD3D11, SORT_WORKGROUP_SIZE}; +use crate::gpu::perf::TimerFuture; +use crate::gpu::renderer::{FramebufferFlags, RendererCore}; +use crate::gpu_data::{AlphaTileD3D11, BackdropInfoD3D11, DiceMetadataD3D11, DrawTileBatchD3D11}; +use crate::gpu_data::{Fill, FirstTileD3D11, MicrolineD3D11, PathSource, PropagateMetadataD3D11}; +use crate::gpu_data::{SegmentIndicesD3D11, SegmentsD3D11, TileD3D11, TileBatchDataD3D11}; +use crate::gpu_data::{TileBatchTexture, TilePathInfoD3D11}; +use byte_slice_cast::AsSliceOf; +use pathfinder_geometry::transform2d::Transform2F; +use pathfinder_geometry::vector::Vector2F; +use pathfinder_gpu::allocator::{BufferID, BufferTag, GPUMemoryAllocator}; +use pathfinder_gpu::{BufferTarget, ComputeDimensions, ComputeState, Device, ImageAccess}; +use pathfinder_gpu::{RenderTarget, UniformData}; +use pathfinder_resources::ResourceLoader; +use pathfinder_simd::default::{F32x4, I32x2}; +use std::ops::Range; +use vec_map::VecMap; + +const FILL_INDIRECT_DRAW_PARAMS_INSTANCE_COUNT_INDEX: usize = 1; +const FILL_INDIRECT_DRAW_PARAMS_ALPHA_TILE_COUNT_INDEX: usize = 4; + +const BIN_INDIRECT_DRAW_PARAMS_MICROLINE_COUNT_INDEX: usize = 3; + +const LOAD_ACTION_CLEAR: i32 = 0; +const LOAD_ACTION_LOAD: i32 = 1; + +const INITIAL_ALLOCATED_MICROLINE_COUNT: u32 = 1024 * 16; +const INITIAL_ALLOCATED_FILL_COUNT: u32 = 1024 * 16; + +pub(crate) struct RendererD3D11 where D: Device { + programs: ProgramsD3D11, + allocated_microline_count: u32, + allocated_fill_count: u32, + scene_buffers: SceneBuffers, + tile_batch_info: VecMap, +} + +impl RendererD3D11 where D: Device { + pub(crate) fn new(core: &mut RendererCore, resources: &dyn ResourceLoader) + -> RendererD3D11 { + let programs = ProgramsD3D11::new(&core.device, resources); + RendererD3D11 { + programs, + allocated_fill_count: INITIAL_ALLOCATED_FILL_COUNT, + allocated_microline_count: INITIAL_ALLOCATED_MICROLINE_COUNT, + scene_buffers: SceneBuffers::new(), + tile_batch_info: VecMap::::new(), + } + } + + fn bound(&mut self, + core: &mut RendererCore, + tiles_d3d11_buffer_id: BufferID, + tile_count: u32, + tile_path_info: &[TilePathInfoD3D11]) { + let bound_program = &self.programs.bound_program; + + let path_info_buffer_id = + core.allocator.allocate_buffer::(&core.device, + tile_path_info.len() as u64, + BufferTag("TilePathInfoD3D11")); + let tile_path_info_buffer = core.allocator.get_buffer(path_info_buffer_id); + core.device.upload_to_buffer(tile_path_info_buffer, + 0, + tile_path_info, + BufferTarget::Storage); + + let tiles_buffer = core.allocator.get_buffer(tiles_d3d11_buffer_id); + + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + let compute_dimensions = ComputeDimensions { + x: (tile_count + BOUND_WORKGROUP_SIZE - 1) / BOUND_WORKGROUP_SIZE, + y: 1, + z: 1, + }; + core.device.dispatch_compute(compute_dimensions, &ComputeState { + program: &bound_program.program, + textures: &[], + uniforms: &[ + (&bound_program.path_count_uniform, UniformData::Int(tile_path_info.len() as i32)), + (&bound_program.tile_count_uniform, UniformData::Int(tile_count as i32)), + ], + images: &[], + storage_buffers: &[ + (&bound_program.tile_path_info_storage_buffer, tile_path_info_buffer), + (&bound_program.tiles_storage_buffer, tiles_buffer), + ], + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().other_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 1; + + core.allocator.free_buffer(path_info_buffer_id); + } + + fn upload_propagate_metadata(&mut self, + core: &mut RendererCore, + propagate_metadata: &[PropagateMetadataD3D11], + backdrops: &[BackdropInfoD3D11]) + -> PropagateMetadataBufferIDsD3D11 { + let propagate_metadata_storage_id = + core.allocator + .allocate_buffer::(&core.device, + propagate_metadata.len() as u64, + BufferTag("PropagateMetadataD3D11")); + let propagate_metadata_buffer = core.allocator.get_buffer(propagate_metadata_storage_id); + core.device.upload_to_buffer(propagate_metadata_buffer, + 0, + propagate_metadata, + BufferTarget::Storage); + + let backdrops_storage_id = + core.allocator.allocate_buffer::(&core.device, + backdrops.len() as u64, + BufferTag("BackdropInfoD3D11")); + + PropagateMetadataBufferIDsD3D11 { + propagate_metadata: propagate_metadata_storage_id, + backdrops: backdrops_storage_id, + } + } + + fn upload_initial_backdrops(&self, + core: &RendererCore, + backdrops_buffer_id: BufferID, + backdrops: &[BackdropInfoD3D11]) { + let backdrops_buffer = core.allocator.get_buffer(backdrops_buffer_id); + core.device.upload_to_buffer(backdrops_buffer, 0, backdrops, BufferTarget::Storage); + } + + fn bin_segments(&mut self, + core: &mut RendererCore, + microlines_storage: &MicrolinesBufferIDsD3D11, + propagate_metadata_buffer_ids: &PropagateMetadataBufferIDsD3D11, + tiles_d3d11_buffer_id: BufferID) + -> Option { + let bin_program = &self.programs.bin_program; + + let fill_vertex_buffer_id = + core.allocator.allocate_buffer::(&core.device, + self.allocated_fill_count as u64, + BufferTag("Fill")); + let fill_indirect_draw_params_buffer_id = + core.allocator.allocate_buffer::(&core.device, + 8, + BufferTag("FillIndirectDrawParamsD3D11")); + + let fill_vertex_buffer = core.allocator.get_buffer(fill_vertex_buffer_id); + let microlines_buffer = core.allocator.get_buffer(microlines_storage.buffer_id); + let tiles_buffer = core.allocator.get_buffer(tiles_d3d11_buffer_id); + let propagate_metadata_buffer = + core.allocator.get_buffer(propagate_metadata_buffer_ids.propagate_metadata); + let backdrops_buffer = core.allocator.get_buffer(propagate_metadata_buffer_ids.backdrops); + + let fill_indirect_draw_params_buffer = + core.allocator.get_buffer(fill_indirect_draw_params_buffer_id); + let indirect_draw_params = [6, 0, 0, 0, 0, microlines_storage.count, 0, 0]; + core.device.upload_to_buffer::(&fill_indirect_draw_params_buffer, + 0, + &indirect_draw_params, + BufferTarget::Storage); + + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + let compute_dimensions = ComputeDimensions { + x: (microlines_storage.count + 63) / 64, + y: 1, + z: 1, + }; + + core.device.dispatch_compute(compute_dimensions, &ComputeState { + program: &bin_program.program, + textures: &[], + uniforms: &[ + (&bin_program.microline_count_uniform, + UniformData::Int(microlines_storage.count as i32)), + (&bin_program.max_fill_count_uniform, + UniformData::Int(self.allocated_fill_count as i32)), + ], + images: &[], + storage_buffers: &[ + (&bin_program.microlines_storage_buffer, microlines_buffer), + (&bin_program.metadata_storage_buffer, propagate_metadata_buffer), + (&bin_program.indirect_draw_params_storage_buffer, + fill_indirect_draw_params_buffer), + (&bin_program.fills_storage_buffer, fill_vertex_buffer), + (&bin_program.tiles_storage_buffer, tiles_buffer), + (&bin_program.backdrops_storage_buffer, backdrops_buffer), + ], + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().bin_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 1; + + let indirect_draw_params_receiver = + core.device.read_buffer(fill_indirect_draw_params_buffer, + BufferTarget::Storage, + 0..32); + let indirect_draw_params = core.device.recv_buffer(&indirect_draw_params_receiver); + let indirect_draw_params: &[u32] = indirect_draw_params.as_slice_of().unwrap(); + + let needed_fill_count = + indirect_draw_params[FILL_INDIRECT_DRAW_PARAMS_INSTANCE_COUNT_INDEX]; + if needed_fill_count > self.allocated_fill_count { + self.allocated_fill_count = needed_fill_count.next_power_of_two(); + return None; + } + + core.stats.fill_count += needed_fill_count as usize; + + Some(FillBufferInfoD3D11 { fill_vertex_buffer_id, fill_indirect_draw_params_buffer_id }) + } + + pub(crate) fn upload_scene(&mut self, + core: &mut RendererCore, + draw_segments: &SegmentsD3D11, + clip_segments: &SegmentsD3D11) { + self.scene_buffers.upload(&mut core.allocator, &core.device, draw_segments, clip_segments); + } + + fn allocate_tiles(&mut self, core: &mut RendererCore, tile_count: u32) -> BufferID { + core.allocator.allocate_buffer::(&core.device, + tile_count as u64, + BufferTag("TileD3D11")) + } + + fn dice_segments(&mut self, + core: &mut RendererCore, + dice_metadata: &[DiceMetadataD3D11], + batch_segment_count: u32, + path_source: PathSource, + transform: Transform2F) + -> Option { + let dice_program = &self.programs.dice_program; + + let microlines_buffer_id = + core.allocator.allocate_buffer::(&core.device, + self.allocated_microline_count as u64, + BufferTag("MicrolineD3D11")); + let dice_metadata_buffer_id = + core.allocator.allocate_buffer::(&core.device, + dice_metadata.len() as u64, + BufferTag("DiceMetadataD3D11")); + let dice_indirect_draw_params_buffer_id = + core.allocator.allocate_buffer::(&core.device, + 8, + BufferTag("DiceIndirectDrawParamsD3D11")); + + let microlines_buffer = core.allocator.get_buffer(microlines_buffer_id); + let dice_metadata_storage_buffer = core.allocator.get_buffer(dice_metadata_buffer_id); + let dice_indirect_draw_params_buffer = + core.allocator.get_buffer(dice_indirect_draw_params_buffer_id); + + let scene_buffers = &self.scene_buffers; + let scene_source_buffers = match path_source { + PathSource::Draw => &scene_buffers.draw, + PathSource::Clip => &scene_buffers.clip, + }; + let SceneSourceBuffers { + points_buffer: points_buffer_id, + point_indices_buffer: point_indices_buffer_id, + point_indices_count, + .. + } = *scene_source_buffers; + + let points_buffer = + core.allocator.get_buffer(points_buffer_id.expect("Where's the points buffer?")); + let point_indices_buffer = + core.allocator + .get_buffer(point_indices_buffer_id.expect("Where's the point indices buffer?")); + + core.device.upload_to_buffer(dice_indirect_draw_params_buffer, + 0, + &[0, 0, 0, 0, point_indices_count, 0, 0, 0], + BufferTarget::Storage); + core.device.upload_to_buffer(dice_metadata_storage_buffer, + 0, + dice_metadata, + BufferTarget::Storage); + + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + let workgroup_count = (batch_segment_count + DICE_WORKGROUP_SIZE - 1) / + DICE_WORKGROUP_SIZE; + let compute_dimensions = ComputeDimensions { x: workgroup_count, y: 1, z: 1 }; + + core.device.dispatch_compute(compute_dimensions, &ComputeState { + program: &dice_program.program, + textures: &[], + uniforms: &[ + (&dice_program.transform_uniform, UniformData::Mat2(transform.matrix.0)), + (&dice_program.translation_uniform, UniformData::Vec2(transform.vector.0)), + (&dice_program.path_count_uniform, + UniformData::Int(dice_metadata.len() as i32)), + (&dice_program.last_batch_segment_index_uniform, + UniformData::Int(batch_segment_count as i32)), + (&dice_program.max_microline_count_uniform, + UniformData::Int(self.allocated_microline_count as i32)), + ], + images: &[], + storage_buffers: &[ + (&dice_program.compute_indirect_params_storage_buffer, + dice_indirect_draw_params_buffer), + (&dice_program.points_storage_buffer, points_buffer), + (&dice_program.input_indices_storage_buffer, point_indices_buffer), + (&dice_program.microlines_storage_buffer, microlines_buffer), + (&dice_program.dice_metadata_storage_buffer, &dice_metadata_storage_buffer), + ], + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().dice_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 1; + + let indirect_compute_params_receiver = + core.device.read_buffer(&dice_indirect_draw_params_buffer, + BufferTarget::Storage, + 0..32); + let indirect_compute_params = core.device.recv_buffer(&indirect_compute_params_receiver); + let indirect_compute_params: &[u32] = indirect_compute_params.as_slice_of().unwrap(); + + core.allocator.free_buffer(dice_metadata_buffer_id); + core.allocator.free_buffer(dice_indirect_draw_params_buffer_id); + + let microline_count = + indirect_compute_params[BIN_INDIRECT_DRAW_PARAMS_MICROLINE_COUNT_INDEX]; + if microline_count > self.allocated_microline_count { + self.allocated_microline_count = microline_count.next_power_of_two(); + return None; + } + + Some(MicrolinesBufferIDsD3D11 { buffer_id: microlines_buffer_id, count: microline_count }) + } + + fn draw_fills(&mut self, + core: &mut RendererCore, + fill_storage_info: &FillBufferInfoD3D11, + tiles_d3d11_buffer_id: BufferID, + alpha_tiles_buffer_id: BufferID, + propagate_tiles_info: &PropagateTilesInfoD3D11) { + let &FillBufferInfoD3D11 { + fill_vertex_buffer_id, + fill_indirect_draw_params_buffer_id: _, + } = fill_storage_info; + let &PropagateTilesInfoD3D11 { ref alpha_tile_range } = propagate_tiles_info; + + let fill_program = &self.programs.fill_program; + let fill_vertex_buffer = core.allocator.get_buffer(fill_vertex_buffer_id); + + let mask_storage = core.mask_storage.as_ref().expect("Where's the mask storage?"); + let mask_framebuffer_id = mask_storage.framebuffer_id; + let mask_framebuffer = core.allocator.get_framebuffer(mask_framebuffer_id); + let image_texture = core.device.framebuffer_texture(mask_framebuffer); + + let tiles_d3d11_buffer = core.allocator.get_buffer(tiles_d3d11_buffer_id); + let alpha_tiles_buffer = core.allocator.get_buffer(alpha_tiles_buffer_id); + + let area_lut_texture = core.allocator.get_texture(core.area_lut_texture_id); + + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + // This setup is an annoying workaround for the 64K limit of compute invocation in OpenGL. + let alpha_tile_count = alpha_tile_range.end - alpha_tile_range.start; + let dimensions = ComputeDimensions { + x: alpha_tile_count.min(1 << 15) as u32, + y: ((alpha_tile_count + (1 << 15) - 1) >> 15) as u32, + z: 1, + }; + + core.device.dispatch_compute(dimensions, &ComputeState { + program: &fill_program.program, + textures: &[(&fill_program.area_lut_texture, area_lut_texture)], + images: &[(&fill_program.dest_image, image_texture, ImageAccess::ReadWrite)], + uniforms: &[ + (&fill_program.alpha_tile_range_uniform, + UniformData::IVec2(I32x2::new(alpha_tile_range.start as i32, + alpha_tile_range.end as i32))), + ], + storage_buffers: &[ + (&fill_program.fills_storage_buffer, fill_vertex_buffer), + (&fill_program.tiles_storage_buffer, tiles_d3d11_buffer), + (&fill_program.alpha_tiles_storage_buffer, &alpha_tiles_buffer), + ], + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().fill_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 1; + + core.framebuffer_flags.insert(FramebufferFlags::MASK_FRAMEBUFFER_IS_DIRTY); + } + + pub(crate) fn prepare_and_draw_tiles(&mut self, + core: &mut RendererCore, + batch: &DrawTileBatchD3D11) { + let tile_batch_id = batch.tile_batch_data.batch_id; + self.prepare_tiles(core, &batch.tile_batch_data); + let batch_info = self.tile_batch_info[tile_batch_id.0 as usize].clone(); + self.draw_tiles(core, + batch_info.tiles_d3d11_buffer_id, + batch_info.first_tile_map_buffer_id, + batch.color_texture); + } + + // Computes backdrops, performs clipping, and populates Z buffers on GPU. + pub(crate) fn prepare_tiles(&mut self, + core: &mut RendererCore, + batch: &TileBatchDataD3D11) { + core.stats.total_tile_count += batch.tile_count as usize; + + // Upload tiles to GPU or allocate them as appropriate. + let tiles_d3d11_buffer_id = self.allocate_tiles(core, batch.tile_count); + + // Fetch and/or allocate clip storage as needed. + let clip_buffer_ids = match batch.clipped_path_info { + Some(ref clipped_path_info) => { + let clip_batch_id = clipped_path_info.clip_batch_id; + let clip_tile_batch_info = &self.tile_batch_info[clip_batch_id.0 as usize]; + let metadata = clip_tile_batch_info.propagate_metadata_buffer_id; + let tiles = clip_tile_batch_info.tiles_d3d11_buffer_id; + Some(ClipBufferIDs { metadata: Some(metadata), tiles }) + } + None => None, + }; + + // Allocate a Z-buffer. + let z_buffer_id = self.allocate_z_buffer(core); + + // Propagate backdrops, bin fills, render fills, and/or perform clipping on GPU if + // necessary. + // Allocate space for tile lists. + let first_tile_map_buffer_id = self.allocate_first_tile_map(core); + + let propagate_metadata_buffer_ids = + self.upload_propagate_metadata(core, + &batch.prepare_info.propagate_metadata, + &batch.prepare_info.backdrops); + + // Dice (flatten) segments into microlines. We might have to do this twice if our + // first attempt runs out of space in the storage buffer. + let mut microlines_storage = None; + for _ in 0..2 { + microlines_storage = self.dice_segments(core, + &batch.prepare_info.dice_metadata, + batch.segment_count, + batch.path_source, + batch.prepare_info.transform); + if microlines_storage.is_some() { + break; + } + } + let microlines_storage = + microlines_storage.expect("Ran out of space for microlines when dicing!"); + + // Initialize tiles, and bin segments. We might have to do this twice if our first + // attempt runs out of space in the fill buffer. + let mut fill_buffer_info = None; + for _ in 0..2 { + self.bound(core, + tiles_d3d11_buffer_id, + batch.tile_count, + &batch.prepare_info.tile_path_info); + + self.upload_initial_backdrops(core, + propagate_metadata_buffer_ids.backdrops, + &batch.prepare_info.backdrops); + + fill_buffer_info = self.bin_segments(core, + µlines_storage, + &propagate_metadata_buffer_ids, + tiles_d3d11_buffer_id); + if fill_buffer_info.is_some() { + break; + } + } + let fill_buffer_info = + fill_buffer_info.expect("Ran out of space for fills when binning!"); + + core.allocator.free_buffer(microlines_storage.buffer_id); + + // TODO(pcwalton): If we run out of space for alpha tile indices, propagate + // multiple times. + + let alpha_tiles_buffer_id = self.allocate_alpha_tile_info(core, batch.tile_count); + + let propagate_tiles_info = + self.propagate_tiles(core, + batch.prepare_info.backdrops.len() as u32, + tiles_d3d11_buffer_id, + fill_buffer_info.fill_indirect_draw_params_buffer_id, + z_buffer_id, + first_tile_map_buffer_id, + alpha_tiles_buffer_id, + &propagate_metadata_buffer_ids, + clip_buffer_ids.as_ref()); + + core.allocator.free_buffer(propagate_metadata_buffer_ids.backdrops); + + // FIXME(pcwalton): Don't unconditionally pass true for copying here. + core.reallocate_alpha_tile_pages_if_necessary(true); + self.draw_fills(core, + &fill_buffer_info, + tiles_d3d11_buffer_id, + alpha_tiles_buffer_id, + &propagate_tiles_info); + + core.allocator.free_buffer(fill_buffer_info.fill_vertex_buffer_id); + core.allocator.free_buffer(fill_buffer_info.fill_indirect_draw_params_buffer_id); + core.allocator.free_buffer(alpha_tiles_buffer_id); + + // FIXME(pcwalton): This seems like the wrong place to do this... + self.sort_tiles(core, tiles_d3d11_buffer_id, first_tile_map_buffer_id, z_buffer_id); + + // Record tile batch info. + self.tile_batch_info.insert(batch.batch_id.0 as usize, TileBatchInfoD3D11 { + tile_count: batch.tile_count, + z_buffer_id, + tiles_d3d11_buffer_id, + propagate_metadata_buffer_id: propagate_metadata_buffer_ids.propagate_metadata, + first_tile_map_buffer_id, + }); + } + + fn propagate_tiles(&mut self, + core: &mut RendererCore, + column_count: u32, + tiles_d3d11_buffer_id: BufferID, + fill_indirect_draw_params_buffer_id: BufferID, + z_buffer_id: BufferID, + first_tile_map_buffer_id: BufferID, + alpha_tiles_buffer_id: BufferID, + propagate_metadata_buffer_ids: &PropagateMetadataBufferIDsD3D11, + clip_buffer_ids: Option<&ClipBufferIDs>) + -> PropagateTilesInfoD3D11 { + let propagate_program = &self.programs.propagate_program; + + let tiles_d3d11_buffer = core.allocator.get_buffer(tiles_d3d11_buffer_id); + let propagate_metadata_storage_buffer = + core.allocator.get_buffer(propagate_metadata_buffer_ids.propagate_metadata); + let backdrops_storage_buffer = + core.allocator.get_buffer(propagate_metadata_buffer_ids.backdrops); + + // TODO(pcwalton): Zero out the Z-buffer on GPU? + let z_buffer = core.allocator.get_buffer(z_buffer_id); + let z_buffer_size = core.tile_size(); + let tile_area = z_buffer_size.area() as usize; + core.device.upload_to_buffer(z_buffer, 0, &vec![0i32; tile_area], BufferTarget::Storage); + + // TODO(pcwalton): Initialize the first tiles buffer on GPU? + let first_tile_map_storage_buffer = core.allocator.get_buffer(first_tile_map_buffer_id); + core.device.upload_to_buffer::(&first_tile_map_storage_buffer, + 0, + &vec![FirstTileD3D11::default(); tile_area], + BufferTarget::Storage); + + let alpha_tiles_storage_buffer = core.allocator.get_buffer(alpha_tiles_buffer_id); + let fill_indirect_draw_params_buffer = + core.allocator.get_buffer(fill_indirect_draw_params_buffer_id); + + let mut storage_buffers = vec![ + (&propagate_program.draw_metadata_storage_buffer, propagate_metadata_storage_buffer), + (&propagate_program.backdrops_storage_buffer, &backdrops_storage_buffer), + (&propagate_program.draw_tiles_storage_buffer, tiles_d3d11_buffer), + (&propagate_program.z_buffer_storage_buffer, z_buffer), + (&propagate_program.first_tile_map_storage_buffer, first_tile_map_storage_buffer), + (&propagate_program.indirect_draw_params_storage_buffer, + fill_indirect_draw_params_buffer), + (&propagate_program.alpha_tiles_storage_buffer, alpha_tiles_storage_buffer), + ]; + + if let Some(clip_buffer_ids) = clip_buffer_ids { + let clip_metadata_buffer_id = + clip_buffer_ids.metadata.expect("Where's the clip metadata storage?"); + let clip_metadata_buffer = core.allocator.get_buffer(clip_metadata_buffer_id); + let clip_tile_buffer = core.allocator.get_buffer(clip_buffer_ids.tiles); + storage_buffers.push((&propagate_program.clip_metadata_storage_buffer, + clip_metadata_buffer)); + storage_buffers.push((&propagate_program.clip_tiles_storage_buffer, clip_tile_buffer)); + } + + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + let dimensions = ComputeDimensions { + x: (column_count + PROPAGATE_WORKGROUP_SIZE - 1) / PROPAGATE_WORKGROUP_SIZE, + y: 1, + z: 1, + }; + core.device.dispatch_compute(dimensions, &ComputeState { + program: &propagate_program.program, + textures: &[], + images: &[], + uniforms: &[ + (&propagate_program.framebuffer_tile_size_uniform, + UniformData::IVec2(core.framebuffer_tile_size().0)), + (&propagate_program.column_count_uniform, UniformData::Int(column_count as i32)), + (&propagate_program.first_alpha_tile_index_uniform, + UniformData::Int(core.alpha_tile_count as i32)), + ], + storage_buffers: &storage_buffers, + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().other_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 1; + + let fill_indirect_draw_params_receiver = + core.device.read_buffer(&fill_indirect_draw_params_buffer, + BufferTarget::Storage, + 0..32); + let fill_indirect_draw_params = core.device + .recv_buffer(&fill_indirect_draw_params_receiver); + let fill_indirect_draw_params: &[u32] = fill_indirect_draw_params.as_slice_of().unwrap(); + + let batch_alpha_tile_count = + fill_indirect_draw_params[FILL_INDIRECT_DRAW_PARAMS_ALPHA_TILE_COUNT_INDEX]; + + let alpha_tile_start = core.alpha_tile_count; + core.alpha_tile_count += batch_alpha_tile_count; + core.stats.alpha_tile_count += batch_alpha_tile_count as usize; + let alpha_tile_end = core.alpha_tile_count; + + PropagateTilesInfoD3D11 { alpha_tile_range: alpha_tile_start..alpha_tile_end } + } + + fn sort_tiles(&mut self, + core: &mut RendererCore, + tiles_d3d11_buffer_id: BufferID, + first_tile_map_buffer_id: BufferID, + z_buffer_id: BufferID) { + let sort_program = &self.programs.sort_program; + + let tiles_d3d11_buffer = core.allocator.get_buffer(tiles_d3d11_buffer_id); + let first_tile_map_buffer = core.allocator.get_buffer(first_tile_map_buffer_id); + let z_buffer = core.allocator.get_buffer(z_buffer_id); + + let tile_count = core.framebuffer_tile_size().area(); + + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + let dimensions = ComputeDimensions { + x: (tile_count as u32 + SORT_WORKGROUP_SIZE - 1) / SORT_WORKGROUP_SIZE, + y: 1, + z: 1, + }; + core.device.dispatch_compute(dimensions, &ComputeState { + program: &sort_program.program, + textures: &[], + images: &[], + uniforms: &[(&sort_program.tile_count_uniform, UniformData::Int(tile_count))], + storage_buffers: &[ + (&sort_program.tiles_storage_buffer, tiles_d3d11_buffer), + (&sort_program.first_tile_map_storage_buffer, first_tile_map_buffer), + (&sort_program.z_buffer_storage_buffer, z_buffer), + ], + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().other_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 1; + } + + fn allocate_first_tile_map(&mut self, core: &mut RendererCore) -> BufferID { + core.allocator.allocate_buffer::(&core.device, + core.tile_size().area() as u64, + BufferTag("FirstTileD3D11")) + } + + fn allocate_alpha_tile_info(&mut self, core: &mut RendererCore, index_count: u32) + -> BufferID { + core.allocator.allocate_buffer::(&core.device, + index_count as u64, + BufferTag("AlphaTileD3D11")) + } + + fn allocate_z_buffer(&mut self, core: &mut RendererCore) -> BufferID { + core.allocator.allocate_buffer::(&core.device, + core.tile_size().area() as u64, + BufferTag("ZBufferD3D11")) + } + + pub(crate) fn draw_tiles(&mut self, + core: &mut RendererCore, + tiles_d3d11_buffer_id: BufferID, + first_tile_map_buffer_id: BufferID, + color_texture_0: Option) { + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + let tile_program = &self.programs.tile_program; + + let (mut textures, mut uniforms, mut images) = (vec![], vec![], vec![]); + + core.set_uniforms_for_drawing_tiles(&tile_program.common, + &mut textures, + &mut uniforms, + color_texture_0); + + uniforms.push((&tile_program.framebuffer_tile_size_uniform, + UniformData::IVec2(core.framebuffer_tile_size().0))); + + match core.draw_render_target() { + RenderTarget::Default => panic!("Can't draw to the default framebuffer with compute!"), + RenderTarget::Framebuffer(ref framebuffer) => { + let dest_texture = core.device.framebuffer_texture(framebuffer); + images.push((&tile_program.dest_image, dest_texture, ImageAccess::ReadWrite)); + } + } + + let clear_color = core.clear_color_for_draw_operation(); + match clear_color { + None => { + uniforms.push((&tile_program.load_action_uniform, + UniformData::Int(LOAD_ACTION_LOAD))); + uniforms.push((&tile_program.clear_color_uniform, + UniformData::Vec4(F32x4::default()))); + } + Some(clear_color) => { + uniforms.push((&tile_program.load_action_uniform, + UniformData::Int(LOAD_ACTION_CLEAR))); + uniforms.push((&tile_program.clear_color_uniform, + UniformData::Vec4(clear_color.0))); + } + } + + let tiles_d3d11_buffer = core.allocator.get_buffer(tiles_d3d11_buffer_id); + let first_tile_map_storage_buffer = core.allocator.get_buffer(first_tile_map_buffer_id); + + let framebuffer_tile_size = core.framebuffer_tile_size().0; + let compute_dimensions = ComputeDimensions { + x: framebuffer_tile_size.x() as u32, + y: framebuffer_tile_size.y() as u32, + z: 1, + }; + + core.device.dispatch_compute(compute_dimensions, &ComputeState { + program: &tile_program.common.program, + textures: &textures, + images: &images, + storage_buffers: &[ + (&tile_program.tiles_storage_buffer, tiles_d3d11_buffer), + (&tile_program.first_tile_map_storage_buffer, first_tile_map_storage_buffer), + ], + uniforms: &uniforms, + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().composite_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 1; + + core.preserve_draw_framebuffer(); + } + + pub(crate) fn end_frame(&mut self, core: &mut RendererCore) { + self.free_tile_batch_buffers(core); + } + + fn free_tile_batch_buffers(&mut self, core: &mut RendererCore) { + for (_, tile_batch_info) in self.tile_batch_info.drain() { + core.allocator.free_buffer(tile_batch_info.z_buffer_id); + core.allocator.free_buffer(tile_batch_info.tiles_d3d11_buffer_id); + core.allocator.free_buffer(tile_batch_info.propagate_metadata_buffer_id); + core.allocator.free_buffer(tile_batch_info.first_tile_map_buffer_id); + } + } +} + +// Buffer data + +#[derive(Clone)] +struct TileBatchInfoD3D11 { + tile_count: u32, + z_buffer_id: BufferID, + tiles_d3d11_buffer_id: BufferID, + propagate_metadata_buffer_id: BufferID, + first_tile_map_buffer_id: BufferID, +} + +#[derive(Clone)] +struct FillBufferInfoD3D11 { + fill_vertex_buffer_id: BufferID, + fill_indirect_draw_params_buffer_id: BufferID, +} + +#[derive(Debug)] +struct PropagateMetadataBufferIDsD3D11 { + propagate_metadata: BufferID, + backdrops: BufferID, +} + +struct MicrolinesBufferIDsD3D11 { + buffer_id: BufferID, + count: u32, +} + +#[derive(Clone, Debug)] +struct ClipBufferIDs { + metadata: Option, + tiles: BufferID, +} + +struct SceneBuffers { + draw: SceneSourceBuffers, + clip: SceneSourceBuffers, +} + +struct SceneSourceBuffers { + points_buffer: Option, + points_capacity: u32, + point_indices_buffer: Option, + point_indices_count: u32, + point_indices_capacity: u32, +} + +#[derive(Clone)] +struct PropagateTilesInfoD3D11 { + alpha_tile_range: Range, +} + +impl SceneBuffers { + fn new() -> SceneBuffers { + SceneBuffers { draw: SceneSourceBuffers::new(), clip: SceneSourceBuffers::new() } + } + + fn upload(&mut self, + allocator: &mut GPUMemoryAllocator, + device: &D, + draw_segments: &SegmentsD3D11, + clip_segments: &SegmentsD3D11) + where D: Device { + self.draw.upload(allocator, device, draw_segments); + self.clip.upload(allocator, device, clip_segments); + } +} + +impl SceneSourceBuffers { + fn new() -> SceneSourceBuffers { + SceneSourceBuffers { + points_buffer: None, + points_capacity: 0, + point_indices_buffer: None, + point_indices_count: 0, + point_indices_capacity: 0, + } + } + + fn upload(&mut self, + allocator: &mut GPUMemoryAllocator, + device: &D, + segments: &SegmentsD3D11) + where D: Device { + let needed_points_capacity = (segments.points.len() as u32).next_power_of_two(); + let needed_point_indices_capacity = (segments.indices.len() as u32).next_power_of_two(); + if self.points_capacity < needed_points_capacity { + self.points_buffer = + Some(allocator.allocate_buffer::(device, + needed_points_capacity as u64, + BufferTag("PointsD3D11"))); + self.points_capacity = needed_points_capacity; + } + if self.point_indices_capacity < needed_point_indices_capacity { + self.point_indices_buffer = Some(allocator.allocate_buffer::( + device, + needed_point_indices_capacity as u64, + BufferTag("PointIndicesD3D11"))); + self.point_indices_capacity = needed_point_indices_capacity; + } + device.upload_to_buffer(allocator.get_buffer(self.points_buffer.unwrap()), + 0, + &segments.points, + BufferTarget::Storage); + device.upload_to_buffer(allocator.get_buffer(self.point_indices_buffer.unwrap()), + 0, + &segments.indices, + BufferTarget::Storage); + self.point_indices_count = segments.indices.len() as u32; + } +} diff --git a/renderer/src/gpu/d3d11/shaders.rs b/renderer/src/gpu/d3d11/shaders.rs new file mode 100644 index 00000000..cd8249c7 --- /dev/null +++ b/renderer/src/gpu/d3d11/shaders.rs @@ -0,0 +1,321 @@ +// pathfinder/renderer/src/gpu/d3d11/shaders.rs +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::gpu::shaders::TileProgramCommon; +use crate::tiles::{TILE_HEIGHT, TILE_WIDTH}; +use pathfinder_gpu::{ComputeDimensions, Device}; +use pathfinder_resources::ResourceLoader; + +pub const BOUND_WORKGROUP_SIZE: u32 = 64; +pub const DICE_WORKGROUP_SIZE: u32 = 64; +pub const BIN_WORKGROUP_SIZE: u32 = 64; +pub const PROPAGATE_WORKGROUP_SIZE: u32 = 64; +pub const SORT_WORKGROUP_SIZE: u32 = 64; + +pub struct ProgramsD3D11 where D: Device { + pub bound_program: BoundProgramD3D11, + pub dice_program: DiceProgramD3D11, + pub bin_program: BinProgramD3D11, + pub propagate_program: PropagateProgramD3D11, + pub sort_program: SortProgramD3D11, + pub fill_program: FillProgramD3D11, + pub tile_program: TileProgramD3D11, +} + +impl ProgramsD3D11 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> ProgramsD3D11 { + ProgramsD3D11 { + bound_program: BoundProgramD3D11::new(device, resources), + dice_program: DiceProgramD3D11::new(device, resources), + bin_program: BinProgramD3D11::new(device, resources), + propagate_program: PropagateProgramD3D11::new(device, resources), + sort_program: SortProgramD3D11::new(device, resources), + fill_program: FillProgramD3D11::new(device, resources), + tile_program: TileProgramD3D11::new(device, resources), + } + } +} + +pub struct PropagateProgramD3D11 where D: Device { + pub program: D::Program, + pub framebuffer_tile_size_uniform: D::Uniform, + pub column_count_uniform: D::Uniform, + pub first_alpha_tile_index_uniform: D::Uniform, + pub draw_metadata_storage_buffer: D::StorageBuffer, + pub clip_metadata_storage_buffer: D::StorageBuffer, + pub backdrops_storage_buffer: D::StorageBuffer, + pub draw_tiles_storage_buffer: D::StorageBuffer, + pub clip_tiles_storage_buffer: D::StorageBuffer, + pub z_buffer_storage_buffer: D::StorageBuffer, + pub first_tile_map_storage_buffer: D::StorageBuffer, + pub indirect_draw_params_storage_buffer: D::StorageBuffer, + pub alpha_tiles_storage_buffer: D::StorageBuffer, +} + +impl PropagateProgramD3D11 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> PropagateProgramD3D11 { + let mut program = device.create_compute_program(resources, "d3d11/propagate"); + let local_size = ComputeDimensions { x: PROPAGATE_WORKGROUP_SIZE, y: 1, z: 1 }; + device.set_compute_program_local_size(&mut program, local_size); + + let framebuffer_tile_size_uniform = device.get_uniform(&program, "FramebufferTileSize"); + let column_count_uniform = device.get_uniform(&program, "ColumnCount"); + let first_alpha_tile_index_uniform = device.get_uniform(&program, "FirstAlphaTileIndex"); + let draw_metadata_storage_buffer = device.get_storage_buffer(&program, "DrawMetadata", 0); + let clip_metadata_storage_buffer = device.get_storage_buffer(&program, "ClipMetadata", 1); + let backdrops_storage_buffer = device.get_storage_buffer(&program, "Backdrops", 2); + let draw_tiles_storage_buffer = device.get_storage_buffer(&program, "DrawTiles", 3); + let clip_tiles_storage_buffer = device.get_storage_buffer(&program, "ClipTiles", 4); + let z_buffer_storage_buffer = device.get_storage_buffer(&program, "ZBuffer", 5); + let first_tile_map_storage_buffer = device.get_storage_buffer(&program, "FirstTileMap", 6); + let indirect_draw_params_storage_buffer = + device.get_storage_buffer(&program, "IndirectDrawParams", 7); + let alpha_tiles_storage_buffer = device.get_storage_buffer(&program, "AlphaTiles", 8); + + PropagateProgramD3D11 { + program, + framebuffer_tile_size_uniform, + column_count_uniform, + first_alpha_tile_index_uniform, + draw_metadata_storage_buffer, + clip_metadata_storage_buffer, + backdrops_storage_buffer, + draw_tiles_storage_buffer, + clip_tiles_storage_buffer, + z_buffer_storage_buffer, + first_tile_map_storage_buffer, + indirect_draw_params_storage_buffer, + alpha_tiles_storage_buffer, + } + } +} + +pub struct FillProgramD3D11 where D: Device { + pub program: D::Program, + pub dest_image: D::ImageParameter, + pub area_lut_texture: D::TextureParameter, + pub alpha_tile_range_uniform: D::Uniform, + pub fills_storage_buffer: D::StorageBuffer, + pub tiles_storage_buffer: D::StorageBuffer, + pub alpha_tiles_storage_buffer: D::StorageBuffer, +} + +impl FillProgramD3D11 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> FillProgramD3D11 { + let mut program = device.create_compute_program(resources, "d3d11/fill"); + let local_size = ComputeDimensions { x: TILE_WIDTH, y: TILE_HEIGHT / 4, z: 1 }; + device.set_compute_program_local_size(&mut program, local_size); + + let dest_image = device.get_image_parameter(&program, "Dest"); + let area_lut_texture = device.get_texture_parameter(&program, "AreaLUT"); + let alpha_tile_range_uniform = device.get_uniform(&program, "AlphaTileRange"); + let fills_storage_buffer = device.get_storage_buffer(&program, "Fills", 0); + let tiles_storage_buffer = device.get_storage_buffer(&program, "Tiles", 1); + let alpha_tiles_storage_buffer = device.get_storage_buffer(&program, "AlphaTiles", 2); + + FillProgramD3D11 { + program, + dest_image, + area_lut_texture, + alpha_tile_range_uniform, + fills_storage_buffer, + tiles_storage_buffer, + alpha_tiles_storage_buffer, + } + } +} + +pub struct TileProgramD3D11 where D: Device { + pub common: TileProgramCommon, + pub load_action_uniform: D::Uniform, + pub clear_color_uniform: D::Uniform, + pub framebuffer_tile_size_uniform: D::Uniform, + pub dest_image: D::ImageParameter, + pub tiles_storage_buffer: D::StorageBuffer, + pub first_tile_map_storage_buffer: D::StorageBuffer, +} + +impl TileProgramD3D11 where D: Device { + fn new(device: &D, resources: &dyn ResourceLoader) -> TileProgramD3D11 { + let mut program = device.create_compute_program(resources, "d3d11/tile"); + device.set_compute_program_local_size(&mut program, + ComputeDimensions { x: 16, y: 4, z: 1 }); + + let load_action_uniform = device.get_uniform(&program, "LoadAction"); + let clear_color_uniform = device.get_uniform(&program, "ClearColor"); + let framebuffer_tile_size_uniform = device.get_uniform(&program, "FramebufferTileSize"); + let dest_image = device.get_image_parameter(&program, "DestImage"); + let tiles_storage_buffer = device.get_storage_buffer(&program, "Tiles", 0); + let first_tile_map_storage_buffer = device.get_storage_buffer(&program, "FirstTileMap", 1); + + let common = TileProgramCommon::new(device, program); + TileProgramD3D11 { + common, + load_action_uniform, + clear_color_uniform, + framebuffer_tile_size_uniform, + dest_image, + tiles_storage_buffer, + first_tile_map_storage_buffer, + } + } +} + +pub struct BinProgramD3D11 where D: Device { + pub program: D::Program, + pub microline_count_uniform: D::Uniform, + pub max_fill_count_uniform: D::Uniform, + pub microlines_storage_buffer: D::StorageBuffer, + pub metadata_storage_buffer: D::StorageBuffer, + pub indirect_draw_params_storage_buffer: D::StorageBuffer, + pub fills_storage_buffer: D::StorageBuffer, + pub tiles_storage_buffer: D::StorageBuffer, + pub backdrops_storage_buffer: D::StorageBuffer, +} + +impl BinProgramD3D11 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> BinProgramD3D11 { + let mut program = device.create_compute_program(resources, "d3d11/bin"); + let dimensions = ComputeDimensions { x: BIN_WORKGROUP_SIZE, y: 1, z: 1 }; + device.set_compute_program_local_size(&mut program, dimensions); + + let microline_count_uniform = device.get_uniform(&program, "MicrolineCount"); + let max_fill_count_uniform = device.get_uniform(&program, "MaxFillCount"); + + let microlines_storage_buffer = device.get_storage_buffer(&program, "Microlines", 0); + let metadata_storage_buffer = device.get_storage_buffer(&program, "Metadata", 1); + let indirect_draw_params_storage_buffer = + device.get_storage_buffer(&program, "IndirectDrawParams", 2); + let fills_storage_buffer = device.get_storage_buffer(&program, "Fills", 3); + let tiles_storage_buffer = device.get_storage_buffer(&program, "Tiles", 4); + let backdrops_storage_buffer = device.get_storage_buffer(&program, "Backdrops", 5); + + BinProgramD3D11 { + program, + microline_count_uniform, + max_fill_count_uniform, + metadata_storage_buffer, + indirect_draw_params_storage_buffer, + fills_storage_buffer, + tiles_storage_buffer, + microlines_storage_buffer, + backdrops_storage_buffer, + } + } +} + +pub struct DiceProgramD3D11 where D: Device { + pub program: D::Program, + pub transform_uniform: D::Uniform, + pub translation_uniform: D::Uniform, + pub path_count_uniform: D::Uniform, + pub last_batch_segment_index_uniform: D::Uniform, + pub max_microline_count_uniform: D::Uniform, + pub compute_indirect_params_storage_buffer: D::StorageBuffer, + pub dice_metadata_storage_buffer: D::StorageBuffer, + pub points_storage_buffer: D::StorageBuffer, + pub input_indices_storage_buffer: D::StorageBuffer, + pub microlines_storage_buffer: D::StorageBuffer, +} + +impl DiceProgramD3D11 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> DiceProgramD3D11 { + let mut program = device.create_compute_program(resources, "d3d11/dice"); + let dimensions = ComputeDimensions { x: DICE_WORKGROUP_SIZE, y: 1, z: 1 }; + device.set_compute_program_local_size(&mut program, dimensions); + + let transform_uniform = device.get_uniform(&program, "Transform"); + let translation_uniform = device.get_uniform(&program, "Translation"); + let path_count_uniform = device.get_uniform(&program, "PathCount"); + let last_batch_segment_index_uniform = device.get_uniform(&program, + "LastBatchSegmentIndex"); + let max_microline_count_uniform = device.get_uniform(&program, "MaxMicrolineCount"); + + let compute_indirect_params_storage_buffer = + device.get_storage_buffer(&program, "ComputeIndirectParams", 0); + let dice_metadata_storage_buffer = device.get_storage_buffer(&program, "DiceMetadata", 1); + let points_storage_buffer = device.get_storage_buffer(&program, "Points", 2); + let input_indices_storage_buffer = device.get_storage_buffer(&program, "InputIndices", 3); + let microlines_storage_buffer = device.get_storage_buffer(&program, "Microlines", 4); + + DiceProgramD3D11 { + program, + transform_uniform, + translation_uniform, + path_count_uniform, + last_batch_segment_index_uniform, + max_microline_count_uniform, + compute_indirect_params_storage_buffer, + dice_metadata_storage_buffer, + points_storage_buffer, + input_indices_storage_buffer, + microlines_storage_buffer, + } + } +} + +pub struct BoundProgramD3D11 where D: Device { + pub program: D::Program, + pub path_count_uniform: D::Uniform, + pub tile_count_uniform: D::Uniform, + pub tile_path_info_storage_buffer: D::StorageBuffer, + pub tiles_storage_buffer: D::StorageBuffer, +} + +impl BoundProgramD3D11 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> BoundProgramD3D11 { + let mut program = device.create_compute_program(resources, "d3d11/bound"); + let dimensions = ComputeDimensions { x: BOUND_WORKGROUP_SIZE, y: 1, z: 1 }; + device.set_compute_program_local_size(&mut program, dimensions); + + let path_count_uniform = device.get_uniform(&program, "PathCount"); + let tile_count_uniform = device.get_uniform(&program, "TileCount"); + + let tile_path_info_storage_buffer = device.get_storage_buffer(&program, "TilePathInfo", 0); + let tiles_storage_buffer = device.get_storage_buffer(&program, "Tiles", 1); + + BoundProgramD3D11 { + program, + path_count_uniform, + tile_count_uniform, + tile_path_info_storage_buffer, + tiles_storage_buffer, + } + } +} + +pub struct SortProgramD3D11 where D: Device { + pub program: D::Program, + pub tile_count_uniform: D::Uniform, + pub tiles_storage_buffer: D::StorageBuffer, + pub first_tile_map_storage_buffer: D::StorageBuffer, + pub z_buffer_storage_buffer: D::StorageBuffer, +} + +impl SortProgramD3D11 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> SortProgramD3D11 { + let mut program = device.create_compute_program(resources, "d3d11/sort"); + let dimensions = ComputeDimensions { x: SORT_WORKGROUP_SIZE, y: 1, z: 1 }; + device.set_compute_program_local_size(&mut program, dimensions); + + let tile_count_uniform = device.get_uniform(&program, "TileCount"); + let tiles_storage_buffer = device.get_storage_buffer(&program, "Tiles", 0); + let first_tile_map_storage_buffer = device.get_storage_buffer(&program, "FirstTileMap", 1); + let z_buffer_storage_buffer = device.get_storage_buffer(&program, "ZBuffer", 2); + + SortProgramD3D11 { + program, + tile_count_uniform, + tiles_storage_buffer, + first_tile_map_storage_buffer, + z_buffer_storage_buffer, + } + } +} \ No newline at end of file diff --git a/renderer/src/gpu/d3d9/mod.rs b/renderer/src/gpu/d3d9/mod.rs new file mode 100644 index 00000000..6f67f3bc --- /dev/null +++ b/renderer/src/gpu/d3d9/mod.rs @@ -0,0 +1,12 @@ +// pathfinder/renderer/src/gpu/d3d9/mod.rs +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +pub mod renderer; +pub mod shaders; diff --git a/renderer/src/gpu/d3d9/renderer.rs b/renderer/src/gpu/d3d9/renderer.rs new file mode 100644 index 00000000..9f1e2e16 --- /dev/null +++ b/renderer/src/gpu/d3d9/renderer.rs @@ -0,0 +1,573 @@ +// pathfinder/renderer/src/gpu/d3d9/renderer.rs +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::gpu::blend::{BlendModeExt, ToBlendState}; +use crate::gpu::perf::TimerFuture; +use crate::gpu::renderer::{FramebufferFlags, MASK_FRAMEBUFFER_HEIGHT, MASK_FRAMEBUFFER_WIDTH}; +use crate::gpu::renderer::{RendererCore, RendererFlags}; +use crate::gpu::d3d9::shaders::{ClipTileCombineVertexArrayD3D9, ClipTileCopyVertexArrayD3D9}; +use crate::gpu::d3d9::shaders::{CopyTileVertexArray, FillVertexArrayD3D9}; +use crate::gpu::d3d9::shaders::{ProgramsD3D9, TileVertexArrayD3D9}; +use crate::gpu_data::{Clip, DrawTileBatchD3D9, Fill, TileBatchTexture, TileObjectPrimitive}; +use crate::tile_map::DenseTileMap; +use crate::tiles::{TILE_HEIGHT, TILE_WIDTH}; +use byte_slice_cast::AsByteSlice; +use pathfinder_color::ColorF; +use pathfinder_content::effects::BlendMode; +use pathfinder_geometry::rect::RectI; +use pathfinder_geometry::transform3d::Transform4F; +use pathfinder_geometry::vector::{Vector2I, Vector4F, vec2i}; +use pathfinder_gpu::allocator::{BufferID, BufferTag, FramebufferID, FramebufferTag}; +use pathfinder_gpu::allocator::{TextureID, TextureTag}; +use pathfinder_gpu::{BlendFactor, BlendState, BufferTarget, ClearOps, Device, Primitive}; +use pathfinder_gpu::{RenderOptions, RenderState, RenderTarget, StencilFunc, StencilState}; +use pathfinder_gpu::{TextureDataRef, TextureFormat, UniformData}; +use pathfinder_resources::ResourceLoader; +use pathfinder_simd::default::F32x2; +use std::u32; + +const MAX_FILLS_PER_BATCH: usize = 0x10000; + +pub(crate) struct RendererD3D9 where D: Device { + // Basic data + programs: ProgramsD3D9, + quads_vertex_indices_buffer_id: Option, + quads_vertex_indices_length: usize, + + // Fills. + buffered_fills: Vec, + pending_fills: Vec, + + // Temporary framebuffers + dest_blend_framebuffer_id: FramebufferID, +} + +impl RendererD3D9 where D: Device { + pub(crate) fn new(core: &mut RendererCore, resources: &dyn ResourceLoader) + -> RendererD3D9 { + let programs = ProgramsD3D9::new(&core.device, resources); + + let window_size = core.options.dest.window_size(&core.device); + let dest_blend_framebuffer_id = + core.allocator.allocate_framebuffer(&core.device, + window_size, + TextureFormat::RGBA8, + FramebufferTag("DestBlendD3D9")); + + RendererD3D9 { + programs, + quads_vertex_indices_buffer_id: None, + quads_vertex_indices_length: 0, + + buffered_fills: vec![], + pending_fills: vec![], + + dest_blend_framebuffer_id, + } + } + + pub(crate) fn upload_and_draw_tiles(&mut self, + core: &mut RendererCore, + batch: &DrawTileBatchD3D9) { + if !batch.clips.is_empty() { + let clip_buffer_info = self.upload_clip_tiles(core, &batch.clips); + self.clip_tiles(core, &clip_buffer_info); + core.allocator.free_buffer(clip_buffer_info.clip_buffer_id); + } + + let tile_buffer = self.upload_tiles(core, &batch.tiles); + let z_buffer_texture_id = self.upload_z_buffer(core, &batch.z_buffer_data); + + self.draw_tiles(core, + batch.tiles.len() as u32, + tile_buffer.tile_vertex_buffer_id, + batch.color_texture, + batch.blend_mode, + z_buffer_texture_id); + + core.allocator.free_texture(z_buffer_texture_id); + core.allocator.free_buffer(tile_buffer.tile_vertex_buffer_id); + } + + fn upload_tiles(&mut self, core: &mut RendererCore, tiles: &[TileObjectPrimitive]) + -> TileBufferD3D9 { + let tile_vertex_buffer_id = + core.allocator.allocate_buffer::(&core.device, + tiles.len() as u64, + BufferTag("TileD3D9")); + let tile_vertex_buffer = &core.allocator.get_buffer(tile_vertex_buffer_id); + core.device.upload_to_buffer(tile_vertex_buffer, 0, tiles, BufferTarget::Vertex); + self.ensure_index_buffer(core, tiles.len()); + + TileBufferD3D9 { tile_vertex_buffer_id } + } + + + fn ensure_index_buffer(&mut self, core: &mut RendererCore, mut length: usize) { + length = length.next_power_of_two(); + if self.quads_vertex_indices_length >= length { + return; + } + + // TODO(pcwalton): Generate these with SIMD. + let mut indices: Vec = Vec::with_capacity(length * 6); + for index in 0..(length as u32) { + indices.extend_from_slice(&[ + index * 4 + 0, index * 4 + 1, index * 4 + 2, + index * 4 + 1, index * 4 + 3, index * 4 + 2, + ]); + } + + if let Some(quads_vertex_indices_buffer_id) = self.quads_vertex_indices_buffer_id.take() { + core.allocator.free_buffer(quads_vertex_indices_buffer_id); + } + let quads_vertex_indices_buffer_id = + core.allocator.allocate_buffer::(&core.device, + indices.len() as u64, + BufferTag("QuadsVertexIndicesD3D9")); + let quads_vertex_indices_buffer = + core.allocator.get_buffer(quads_vertex_indices_buffer_id); + core.device.upload_to_buffer(quads_vertex_indices_buffer, + 0, + &indices, + BufferTarget::Index); + self.quads_vertex_indices_buffer_id = Some(quads_vertex_indices_buffer_id); + self.quads_vertex_indices_length = length; + } + + pub(crate) fn add_fills(&mut self, core: &mut RendererCore, fill_batch: &[Fill]) { + if fill_batch.is_empty() { + return; + } + + core.stats.fill_count += fill_batch.len(); + + let preserve_alpha_mask_contents = core.alpha_tile_count > 0; + + self.pending_fills.reserve(fill_batch.len()); + for fill in fill_batch { + core.alpha_tile_count = core.alpha_tile_count.max(fill.link + 1); + self.pending_fills.push(*fill); + } + + core.stats.alpha_tile_count = core.alpha_tile_count as usize; + + core.reallocate_alpha_tile_pages_if_necessary(preserve_alpha_mask_contents); + + if self.buffered_fills.len() + self.pending_fills.len() > MAX_FILLS_PER_BATCH { + self.draw_buffered_fills(core); + } + + self.buffered_fills.extend(self.pending_fills.drain(..)); + } + + pub(crate) fn draw_buffered_fills(&mut self, core: &mut RendererCore) { + if self.buffered_fills.is_empty() { + return; + } + + let fill_storage_info = self.upload_buffered_fills(core); + self.draw_fills(core, fill_storage_info.fill_buffer_id, fill_storage_info.fill_count); + core.allocator.free_buffer(fill_storage_info.fill_buffer_id); + } + + fn upload_buffered_fills(&mut self, core: &mut RendererCore) -> FillBufferInfoD3D9 { + let buffered_fills = &mut self.buffered_fills; + debug_assert!(!buffered_fills.is_empty()); + + let fill_buffer_id = core.allocator.allocate_buffer::(&core.device, + MAX_FILLS_PER_BATCH as u64, + BufferTag("Fill")); + let fill_vertex_buffer = core.allocator.get_buffer(fill_buffer_id); + debug_assert!(buffered_fills.len() <= u32::MAX as usize); + core.device.upload_to_buffer(fill_vertex_buffer, 0, &buffered_fills, BufferTarget::Vertex); + + let fill_count = buffered_fills.len() as u32; + buffered_fills.clear(); + + FillBufferInfoD3D9 { fill_buffer_id, fill_count } + } + + fn draw_fills(&mut self, + core: &mut RendererCore, + fill_buffer_id: BufferID, + fill_count: u32) { + let fill_raster_program = &self.programs.fill_program; + + let fill_vertex_buffer = core.allocator.get_buffer(fill_buffer_id); + let quad_vertex_positions_buffer = core.allocator + .get_buffer(core.quad_vertex_positions_buffer_id); + let quad_vertex_indices_buffer = core.allocator + .get_buffer(core.quad_vertex_indices_buffer_id); + + let area_lut_texture = core.allocator.get_texture(core.area_lut_texture_id); + + let mask_viewport = self.mask_viewport(core); + let mask_storage = core.mask_storage.as_ref().expect("Where's the mask storage?"); + let mask_framebuffer_id = mask_storage.framebuffer_id; + let mask_framebuffer = core.allocator.get_framebuffer(mask_framebuffer_id); + + let fill_vertex_array = FillVertexArrayD3D9::new(&core.device, + fill_raster_program, + fill_vertex_buffer, + quad_vertex_positions_buffer, + quad_vertex_indices_buffer); + + let mut clear_color = None; + if !core.framebuffer_flags.contains(FramebufferFlags::MASK_FRAMEBUFFER_IS_DIRTY) { + clear_color = Some(ColorF::default()); + }; + + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + core.device.draw_elements_instanced(6, fill_count, &RenderState { + target: &RenderTarget::Framebuffer(mask_framebuffer), + program: &fill_raster_program.program, + vertex_array: &fill_vertex_array.vertex_array, + primitive: Primitive::Triangles, + textures: &[(&fill_raster_program.area_lut_texture, area_lut_texture)], + uniforms: &[ + (&fill_raster_program.framebuffer_size_uniform, + UniformData::Vec2(mask_viewport.size().to_f32().0)), + (&fill_raster_program.tile_size_uniform, + UniformData::Vec2(F32x2::new(TILE_WIDTH as f32, TILE_HEIGHT as f32))), + ], + images: &[], + storage_buffers: &[], + viewport: mask_viewport, + options: RenderOptions { + blend: Some(BlendState { + src_rgb_factor: BlendFactor::One, + src_alpha_factor: BlendFactor::One, + dest_rgb_factor: BlendFactor::One, + dest_alpha_factor: BlendFactor::One, + ..BlendState::default() + }), + clear_ops: ClearOps { color: clear_color, ..ClearOps::default() }, + ..RenderOptions::default() + }, + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().fill_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 1; + + core.framebuffer_flags.insert(FramebufferFlags::MASK_FRAMEBUFFER_IS_DIRTY); + } + + fn clip_tiles(&mut self, core: &mut RendererCore, clip_buffer_info: &ClipBufferInfo) { + // Allocate temp mask framebuffer. + let mask_temp_framebuffer_id = + core.allocator.allocate_framebuffer(&core.device, + self.mask_viewport(core).size(), + core.mask_texture_format(), + FramebufferTag("TempClipMaskD3D9")); + let mask_temp_framebuffer = core.allocator.get_framebuffer(mask_temp_framebuffer_id); + + let mask_storage = core.mask_storage.as_ref().expect("Where's the mask storage?"); + let mask_framebuffer_id = mask_storage.framebuffer_id; + let mask_framebuffer = core.allocator.get_framebuffer(mask_framebuffer_id); + let mask_texture = core.device.framebuffer_texture(mask_framebuffer); + let mask_texture_size = core.device.texture_size(&mask_texture); + + let clip_vertex_buffer = core.allocator.get_buffer(clip_buffer_info.clip_buffer_id); + let quad_vertex_positions_buffer = core.allocator + .get_buffer(core.quad_vertex_positions_buffer_id); + let quad_vertex_indices_buffer = core.allocator + .get_buffer(core.quad_vertex_indices_buffer_id); + + let tile_clip_copy_vertex_array = + ClipTileCopyVertexArrayD3D9::new(&core.device, + &self.programs.tile_clip_copy_program, + clip_vertex_buffer, + quad_vertex_positions_buffer, + quad_vertex_indices_buffer); + let tile_clip_combine_vertex_array = + ClipTileCombineVertexArrayD3D9::new(&core.device, + &self.programs.tile_clip_combine_program, + clip_vertex_buffer, + quad_vertex_positions_buffer, + quad_vertex_indices_buffer); + + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + // Copy out tiles. + // + // TODO(pcwalton): Don't do this on GL4. + core.device.draw_elements_instanced(6, clip_buffer_info.clip_count * 2, &RenderState { + target: &RenderTarget::Framebuffer(mask_temp_framebuffer), + program: &self.programs.tile_clip_copy_program.program, + vertex_array: &tile_clip_copy_vertex_array.vertex_array, + primitive: Primitive::Triangles, + textures: &[ + (&self.programs.tile_clip_copy_program.src_texture, + core.device.framebuffer_texture(mask_framebuffer)), + ], + images: &[], + uniforms: &[ + (&self.programs.tile_clip_copy_program.framebuffer_size_uniform, + UniformData::Vec2(mask_texture_size.to_f32().0)), + ], + storage_buffers: &[], + viewport: RectI::new(Vector2I::zero(), mask_texture_size), + options: RenderOptions::default(), + }); + + // Combine clip tiles. + core.device.draw_elements_instanced(6, clip_buffer_info.clip_count, &RenderState { + target: &RenderTarget::Framebuffer(mask_framebuffer), + program: &self.programs.tile_clip_combine_program.program, + vertex_array: &tile_clip_combine_vertex_array.vertex_array, + primitive: Primitive::Triangles, + textures: &[ + (&self.programs.tile_clip_combine_program.src_texture, + core.device.framebuffer_texture(&mask_temp_framebuffer)), + ], + images: &[], + uniforms: &[ + (&self.programs.tile_clip_combine_program.framebuffer_size_uniform, + UniformData::Vec2(mask_texture_size.to_f32().0)), + ], + storage_buffers: &[], + viewport: RectI::new(Vector2I::zero(), mask_texture_size), + options: RenderOptions::default(), + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().other_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 2; + + core.allocator.free_framebuffer(mask_temp_framebuffer_id); + } + + fn upload_z_buffer(&mut self, core: &mut RendererCore, z_buffer_map: &DenseTileMap) + -> TextureID { + let z_buffer_texture_id = core.allocator.allocate_texture(&core.device, + z_buffer_map.rect.size(), + TextureFormat::RGBA8, + TextureTag("ZBufferD3D9")); + let z_buffer_texture = core.allocator.get_texture(z_buffer_texture_id); + debug_assert_eq!(z_buffer_map.rect.origin(), Vector2I::default()); + let z_data: &[u8] = z_buffer_map.data.as_byte_slice(); + core.device.upload_to_texture(z_buffer_texture, + z_buffer_map.rect, + TextureDataRef::U8(&z_data)); + z_buffer_texture_id + } + + // Uploads clip tiles from CPU to GPU. + fn upload_clip_tiles(&mut self, core: &mut RendererCore, clips: &[Clip]) -> ClipBufferInfo { + let clip_buffer_id = core.allocator.allocate_buffer::(&core.device, + clips.len() as u64, + BufferTag("ClipD3D9")); + let clip_buffer = core.allocator.get_buffer(clip_buffer_id); + core.device.upload_to_buffer(clip_buffer, 0, clips, BufferTarget::Vertex); + ClipBufferInfo { clip_buffer_id, clip_count: clips.len() as u32 } + } + + fn draw_tiles(&mut self, + core: &mut RendererCore, + tile_count: u32, + tile_vertex_buffer_id: BufferID, + color_texture_0: Option, + blend_mode: BlendMode, + z_buffer_texture_id: TextureID) { + // TODO(pcwalton): Disable blend for solid tiles. + + if tile_count == 0 { + return; + } + + core.stats.total_tile_count += tile_count as usize; + + let needs_readable_framebuffer = blend_mode.needs_readable_framebuffer(); + if needs_readable_framebuffer { + self.copy_alpha_tiles_to_dest_blend_texture(core, tile_count, tile_vertex_buffer_id); + } + + let clear_color = core.clear_color_for_draw_operation(); + let draw_viewport = core.draw_viewport(); + + let timer_query = core.timer_query_cache.alloc(&core.device); + core.device.begin_timer_query(&timer_query); + + let tile_raster_program = &self.programs.tile_program; + + let tile_vertex_buffer = core.allocator.get_buffer(tile_vertex_buffer_id); + let quad_vertex_positions_buffer = core.allocator + .get_buffer(core.quad_vertex_positions_buffer_id); + let quad_vertex_indices_buffer = core.allocator + .get_buffer(core.quad_vertex_indices_buffer_id); + let dest_blend_framebuffer = core.allocator + .get_framebuffer(self.dest_blend_framebuffer_id); + + let (mut textures, mut uniforms) = (vec![], vec![]); + + core.set_uniforms_for_drawing_tiles(&tile_raster_program.common, + &mut textures, + &mut uniforms, + color_texture_0); + + uniforms.push((&tile_raster_program.transform_uniform, + UniformData::Mat4(self.tile_transform(core).to_columns()))); + + if needs_readable_framebuffer { + textures.push((&tile_raster_program.dest_texture, + core.device.framebuffer_texture(dest_blend_framebuffer))); + } + + let z_buffer_texture = core.allocator.get_texture(z_buffer_texture_id); + textures.push((&tile_raster_program.common.z_buffer_texture, z_buffer_texture)); + uniforms.push((&tile_raster_program.common.z_buffer_texture_size_uniform, + UniformData::IVec2(core.device.texture_size(z_buffer_texture).0))); + + let tile_vertex_array = TileVertexArrayD3D9::new(&core.device, + &self.programs.tile_program, + tile_vertex_buffer, + quad_vertex_positions_buffer, + quad_vertex_indices_buffer); + + core.device.draw_elements_instanced(6, tile_count, &RenderState { + target: &core.draw_render_target(), + program: &tile_raster_program.common.program, + vertex_array: &tile_vertex_array.vertex_array, + primitive: Primitive::Triangles, + textures: &textures, + images: &[], + storage_buffers: &[], + uniforms: &uniforms, + viewport: draw_viewport, + options: RenderOptions { + blend: blend_mode.to_blend_state(), + stencil: self.stencil_state(core), + clear_ops: ClearOps { color: clear_color, ..ClearOps::default() }, + ..RenderOptions::default() + }, + }); + + core.device.end_timer_query(&timer_query); + core.current_timer.as_mut().unwrap().composite_times.push(TimerFuture::new(timer_query)); + core.stats.drawcall_count += 1; + + core.preserve_draw_framebuffer(); + } + + fn copy_alpha_tiles_to_dest_blend_texture(&mut self, + core: &mut RendererCore, + tile_count: u32, + vertex_buffer_id: BufferID) { + let draw_viewport = core.draw_viewport(); + + let mut textures = vec![]; + let mut uniforms = vec![ + (&self.programs.tile_copy_program.transform_uniform, + UniformData::Mat4(self.tile_transform(core).to_columns())), + (&self.programs.tile_copy_program.tile_size_uniform, + UniformData::Vec2(F32x2::new(TILE_WIDTH as f32, TILE_HEIGHT as f32))), + ]; + + let draw_framebuffer = match core.draw_render_target() { + RenderTarget::Framebuffer(framebuffer) => framebuffer, + RenderTarget::Default => panic!("Can't copy alpha tiles from default framebuffer!"), + }; + let draw_texture = core.device.framebuffer_texture(&draw_framebuffer); + + textures.push((&self.programs.tile_copy_program.src_texture, draw_texture)); + uniforms.push((&self.programs.tile_copy_program.framebuffer_size_uniform, + UniformData::Vec2(draw_viewport.size().to_f32().0))); + + let quads_vertex_indices_buffer_id = self.quads_vertex_indices_buffer_id + .expect("Where's the quads vertex buffer?"); + let quads_vertex_indices_buffer = core.allocator + .get_buffer(quads_vertex_indices_buffer_id); + let vertex_buffer = core.allocator.get_buffer(vertex_buffer_id); + + let tile_copy_vertex_array = CopyTileVertexArray::new(&core.device, + &self.programs.tile_copy_program, + vertex_buffer, + quads_vertex_indices_buffer); + + let dest_blend_framebuffer = core.allocator + .get_framebuffer(self.dest_blend_framebuffer_id); + + core.device.draw_elements(tile_count * 6, &RenderState { + target: &RenderTarget::Framebuffer(dest_blend_framebuffer), + program: &self.programs.tile_copy_program.program, + vertex_array: &tile_copy_vertex_array.vertex_array, + primitive: Primitive::Triangles, + textures: &textures, + images: &[], + storage_buffers: &[], + uniforms: &uniforms, + viewport: draw_viewport, + options: RenderOptions { + clear_ops: ClearOps { + color: Some(ColorF::new(1.0, 0.0, 0.0, 1.0)), + ..ClearOps::default() + }, + ..RenderOptions::default() + }, + }); + + core.stats.drawcall_count += 1; + } + + fn stencil_state(&self, core: &RendererCore) -> Option { + if !core.renderer_flags.contains(RendererFlags::USE_DEPTH) { + return None; + } + + Some(StencilState { + func: StencilFunc::Equal, + reference: 1, + mask: 1, + write: false, + }) + } + + fn mask_viewport(&self, core: &RendererCore) -> RectI { + let page_count = match core.mask_storage { + Some(ref mask_storage) => mask_storage.allocated_page_count as i32, + None => 0, + }; + let height = MASK_FRAMEBUFFER_HEIGHT * page_count; + RectI::new(Vector2I::default(), vec2i(MASK_FRAMEBUFFER_WIDTH, height)) + } + + fn tile_transform(&self, core: &RendererCore) -> Transform4F { + let draw_viewport = core.draw_viewport().size().to_f32(); + let scale = Vector4F::new(2.0 / draw_viewport.x(), -2.0 / draw_viewport.y(), 1.0, 1.0); + Transform4F::from_scale(scale).translate(Vector4F::new(-1.0, 1.0, 0.0, 1.0)) + } +} + +#[derive(Clone)] +pub(crate) struct TileBatchInfoD3D9 { + pub(crate) tile_count: u32, + pub(crate) z_buffer_id: BufferID, + tile_vertex_buffer_id: BufferID, +} + +#[derive(Clone)] +struct FillBufferInfoD3D9 { + fill_buffer_id: BufferID, + fill_count: u32, +} + +struct TileBufferD3D9 { + tile_vertex_buffer_id: BufferID, +} + +struct ClipBufferInfo { + clip_buffer_id: BufferID, + clip_count: u32, +} diff --git a/renderer/src/gpu/d3d9/shaders.rs b/renderer/src/gpu/d3d9/shaders.rs new file mode 100644 index 00000000..82aabd7d --- /dev/null +++ b/renderer/src/gpu/d3d9/shaders.rs @@ -0,0 +1,441 @@ +// pathfinder/renderer/src/gpu/d3d9/shaders.rs +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +use crate::gpu::shaders::{TILE_INSTANCE_SIZE, TileProgramCommon}; +use pathfinder_gpu::{BufferTarget, Device, VertexAttrClass, VertexAttrDescriptor, VertexAttrType}; +use pathfinder_resources::ResourceLoader; + +const FILL_INSTANCE_SIZE: usize = 12; +const CLIP_TILE_INSTANCE_SIZE: usize = 16; + +pub struct FillVertexArrayD3D9 where D: Device { + pub vertex_array: D::VertexArray, +} + +impl FillVertexArrayD3D9 where D: Device { + pub fn new(device: &D, + fill_program: &FillProgramD3D9, + vertex_buffer: &D::Buffer, + quad_vertex_positions_buffer: &D::Buffer, + quad_vertex_indices_buffer: &D::Buffer) + -> FillVertexArrayD3D9 { + let vertex_array = device.create_vertex_array(); + + let tess_coord_attr = device.get_vertex_attr(&fill_program.program, "TessCoord").unwrap(); + let line_segment_attr = device.get_vertex_attr(&fill_program.program, "LineSegment") + .unwrap(); + let tile_index_attr = device.get_vertex_attr(&fill_program.program, "TileIndex").unwrap(); + + device.bind_buffer(&vertex_array, quad_vertex_positions_buffer, BufferTarget::Vertex); + device.configure_vertex_attr(&vertex_array, &tess_coord_attr, &VertexAttrDescriptor { + size: 2, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::U16, + stride: 4, + offset: 0, + divisor: 0, + buffer_index: 0, + }); + device.bind_buffer(&vertex_array, &vertex_buffer, BufferTarget::Vertex); + device.configure_vertex_attr(&vertex_array, &line_segment_attr, &VertexAttrDescriptor { + size: 4, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::U16, + stride: FILL_INSTANCE_SIZE, + offset: 0, + divisor: 1, + buffer_index: 1, + }); + device.configure_vertex_attr(&vertex_array, &tile_index_attr, &VertexAttrDescriptor { + size: 1, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::I32, + stride: FILL_INSTANCE_SIZE, + offset: 8, + divisor: 1, + buffer_index: 1, + }); + device.bind_buffer(&vertex_array, quad_vertex_indices_buffer, BufferTarget::Index); + + FillVertexArrayD3D9 { vertex_array } + } +} + +pub struct TileVertexArrayD3D9 where D: Device { + pub vertex_array: D::VertexArray, +} + +impl TileVertexArrayD3D9 where D: Device { + pub fn new(device: &D, + tile_program: &TileProgramD3D9, + tile_vertex_buffer: &D::Buffer, + quad_vertex_positions_buffer: &D::Buffer, + quad_vertex_indices_buffer: &D::Buffer) + -> TileVertexArrayD3D9 { + let vertex_array = device.create_vertex_array(); + + let tile_offset_attr = + device.get_vertex_attr(&tile_program.common.program, "TileOffset").unwrap(); + let tile_origin_attr = + device.get_vertex_attr(&tile_program.common.program, "TileOrigin").unwrap(); + let mask_0_tex_coord_attr = + device.get_vertex_attr(&tile_program.common.program, "MaskTexCoord0").unwrap(); + let ctrl_backdrop_attr = + device.get_vertex_attr(&tile_program.common.program, "CtrlBackdrop").unwrap(); + let color_attr = device.get_vertex_attr(&tile_program.common.program, "Color").unwrap(); + let path_index_attr = device.get_vertex_attr(&tile_program.common.program, "PathIndex") + .unwrap(); + + device.bind_buffer(&vertex_array, quad_vertex_positions_buffer, BufferTarget::Vertex); + device.configure_vertex_attr(&vertex_array, &tile_offset_attr, &VertexAttrDescriptor { + size: 2, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::I16, + stride: 4, + offset: 0, + divisor: 0, + buffer_index: 0, + }); + device.bind_buffer(&vertex_array, tile_vertex_buffer, BufferTarget::Vertex); + device.configure_vertex_attr(&vertex_array, &tile_origin_attr, &VertexAttrDescriptor { + size: 2, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::I16, + stride: TILE_INSTANCE_SIZE, + offset: 0, + divisor: 1, + buffer_index: 1, + }); + device.configure_vertex_attr(&vertex_array, &mask_0_tex_coord_attr, &VertexAttrDescriptor { + size: 4, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::U8, + stride: TILE_INSTANCE_SIZE, + offset: 4, + divisor: 1, + buffer_index: 1, + }); + device.configure_vertex_attr(&vertex_array, &path_index_attr, &VertexAttrDescriptor { + size: 1, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::I32, + stride: TILE_INSTANCE_SIZE, + offset: 8, + divisor: 1, + buffer_index: 1, + }); + device.configure_vertex_attr(&vertex_array, &color_attr, &VertexAttrDescriptor { + size: 1, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::I16, + stride: TILE_INSTANCE_SIZE, + offset: 12, + divisor: 1, + buffer_index: 1, + }); + device.configure_vertex_attr(&vertex_array, &ctrl_backdrop_attr, &VertexAttrDescriptor { + size: 2, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::I8, + stride: TILE_INSTANCE_SIZE, + offset: 14, + divisor: 1, + buffer_index: 1, + }); + device.bind_buffer(&vertex_array, quad_vertex_indices_buffer, BufferTarget::Index); + + TileVertexArrayD3D9 { vertex_array } + } +} + +pub struct ClipTileCopyVertexArrayD3D9 where D: Device { + pub vertex_array: D::VertexArray, +} + +impl ClipTileCopyVertexArrayD3D9 where D: Device { + pub fn new(device: &D, + clip_tile_copy_program: &ClipTileCopyProgramD3D9, + vertex_buffer: &D::Buffer, + quad_vertex_positions_buffer: &D::Buffer, + quad_vertex_indices_buffer: &D::Buffer) + -> ClipTileCopyVertexArrayD3D9 { + let vertex_array = device.create_vertex_array(); + + let tile_offset_attr = + device.get_vertex_attr(&clip_tile_copy_program.program, "TileOffset").unwrap(); + let tile_index_attr = + device.get_vertex_attr(&clip_tile_copy_program.program, "TileIndex").unwrap(); + + device.bind_buffer(&vertex_array, quad_vertex_positions_buffer, BufferTarget::Vertex); + device.configure_vertex_attr(&vertex_array, &tile_offset_attr, &VertexAttrDescriptor { + size: 2, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::I16, + stride: 4, + offset: 0, + divisor: 0, + buffer_index: 0, + }); + device.bind_buffer(&vertex_array, &vertex_buffer, BufferTarget::Vertex); + device.configure_vertex_attr(&vertex_array, &tile_index_attr, &VertexAttrDescriptor { + size: 1, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::I32, + stride: CLIP_TILE_INSTANCE_SIZE / 2, + offset: 0, + divisor: 1, + buffer_index: 1, + }); + device.bind_buffer(&vertex_array, quad_vertex_indices_buffer, BufferTarget::Index); + + ClipTileCopyVertexArrayD3D9 { vertex_array } + } +} + +pub struct ClipTileCombineVertexArrayD3D9 where D: Device { + pub vertex_array: D::VertexArray, +} + +impl ClipTileCombineVertexArrayD3D9 where D: Device { + pub fn new(device: &D, + clip_tile_combine_program: &ClipTileCombineProgramD3D9, + vertex_buffer: &D::Buffer, + quad_vertex_positions_buffer: &D::Buffer, + quad_vertex_indices_buffer: &D::Buffer) + -> ClipTileCombineVertexArrayD3D9 { + let vertex_array = device.create_vertex_array(); + + let tile_offset_attr = + device.get_vertex_attr(&clip_tile_combine_program.program, "TileOffset").unwrap(); + let dest_tile_index_attr = + device.get_vertex_attr(&clip_tile_combine_program.program, "DestTileIndex").unwrap(); + let dest_backdrop_attr = + device.get_vertex_attr(&clip_tile_combine_program.program, "DestBackdrop").unwrap(); + let src_tile_index_attr = + device.get_vertex_attr(&clip_tile_combine_program.program, "SrcTileIndex").unwrap(); + let src_backdrop_attr = + device.get_vertex_attr(&clip_tile_combine_program.program, "SrcBackdrop").unwrap(); + + device.bind_buffer(&vertex_array, quad_vertex_positions_buffer, BufferTarget::Vertex); + device.configure_vertex_attr(&vertex_array, &tile_offset_attr, &VertexAttrDescriptor { + size: 2, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::I16, + stride: 4, + offset: 0, + divisor: 0, + buffer_index: 0, + }); + device.bind_buffer(&vertex_array, &vertex_buffer, BufferTarget::Vertex); + device.configure_vertex_attr(&vertex_array, &dest_tile_index_attr, &VertexAttrDescriptor { + size: 1, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::I32, + stride: CLIP_TILE_INSTANCE_SIZE, + offset: 0, + divisor: 1, + buffer_index: 1, + }); + device.configure_vertex_attr(&vertex_array, &dest_backdrop_attr, &VertexAttrDescriptor { + size: 1, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::I32, + stride: CLIP_TILE_INSTANCE_SIZE, + offset: 4, + divisor: 1, + buffer_index: 1, + }); + device.configure_vertex_attr(&vertex_array, &src_tile_index_attr, &VertexAttrDescriptor { + size: 1, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::I32, + stride: CLIP_TILE_INSTANCE_SIZE, + offset: 8, + divisor: 1, + buffer_index: 1, + }); + device.configure_vertex_attr(&vertex_array, &src_backdrop_attr, &VertexAttrDescriptor { + size: 1, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::I32, + stride: CLIP_TILE_INSTANCE_SIZE, + offset: 12, + divisor: 1, + buffer_index: 1, + }); + device.bind_buffer(&vertex_array, quad_vertex_indices_buffer, BufferTarget::Index); + + ClipTileCombineVertexArrayD3D9 { vertex_array } + } +} + +pub struct CopyTileVertexArray where D: Device { + pub vertex_array: D::VertexArray, +} + +impl CopyTileVertexArray where D: Device { + pub fn new(device: &D, + copy_tile_program: &CopyTileProgram, + copy_tile_vertex_buffer: &D::Buffer, + quads_vertex_indices_buffer: &D::Buffer) + -> CopyTileVertexArray { + let vertex_array = device.create_vertex_array(); + + let tile_position_attr = + device.get_vertex_attr(©_tile_program.program, "TilePosition").unwrap(); + + device.bind_buffer(&vertex_array, copy_tile_vertex_buffer, BufferTarget::Vertex); + device.configure_vertex_attr(&vertex_array, &tile_position_attr, &VertexAttrDescriptor { + size: 2, + class: VertexAttrClass::Int, + attr_type: VertexAttrType::I16, + stride: TILE_INSTANCE_SIZE, + offset: 0, + divisor: 0, + buffer_index: 0, + }); + device.bind_buffer(&vertex_array, quads_vertex_indices_buffer, BufferTarget::Index); + + CopyTileVertexArray { vertex_array } + } +} + +pub struct FillProgramD3D9 where D: Device { + pub program: D::Program, + pub framebuffer_size_uniform: D::Uniform, + pub tile_size_uniform: D::Uniform, + pub area_lut_texture: D::TextureParameter, +} + +impl FillProgramD3D9 where D: Device { + fn new(device: &D, resources: &dyn ResourceLoader) -> FillProgramD3D9 { + let program = device.create_raster_program(resources, "d3d9/fill"); + let framebuffer_size_uniform = device.get_uniform(&program, "FramebufferSize"); + let tile_size_uniform = device.get_uniform(&program, "TileSize"); + let area_lut_texture = device.get_texture_parameter(&program, "AreaLUT"); + FillProgramD3D9 { + program, + framebuffer_size_uniform, + tile_size_uniform, + area_lut_texture, + } + } +} + +pub struct TileProgramD3D9 where D: Device { + pub common: TileProgramCommon, + pub dest_texture: D::TextureParameter, + pub transform_uniform: D::Uniform, +} + +impl TileProgramD3D9 where D: Device { + fn new(device: &D, resources: &dyn ResourceLoader) -> TileProgramD3D9 { + let program = device.create_raster_program(resources, "d3d9/tile"); + let dest_texture = device.get_texture_parameter(&program, "DestTexture"); + let transform_uniform = device.get_uniform(&program, "Transform"); + let common = TileProgramCommon::new(device, program); + TileProgramD3D9 { common, dest_texture, transform_uniform } + } +} + +pub struct ClipTileCombineProgramD3D9 where D: Device { + pub program: D::Program, + pub src_texture: D::TextureParameter, + pub framebuffer_size_uniform: D::Uniform, +} + +impl ClipTileCombineProgramD3D9 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> ClipTileCombineProgramD3D9 { + let program = device.create_raster_program(resources, "d3d9/tile_clip_combine"); + let src_texture = device.get_texture_parameter(&program, "Src"); + let framebuffer_size_uniform = device.get_uniform(&program, "FramebufferSize"); + ClipTileCombineProgramD3D9 { program, src_texture, framebuffer_size_uniform } + } +} + +pub struct ClipTileCopyProgramD3D9 where D: Device { + pub program: D::Program, + pub src_texture: D::TextureParameter, + pub framebuffer_size_uniform: D::Uniform, +} + +impl ClipTileCopyProgramD3D9 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> ClipTileCopyProgramD3D9 { + let program = device.create_raster_program(resources, "d3d9/tile_clip_copy"); + let src_texture = device.get_texture_parameter(&program, "Src"); + let framebuffer_size_uniform = device.get_uniform(&program, "FramebufferSize"); + ClipTileCopyProgramD3D9 { program, src_texture, framebuffer_size_uniform } + } +} + +pub struct CopyTileProgram where D: Device { + pub program: D::Program, + pub transform_uniform: D::Uniform, + pub tile_size_uniform: D::Uniform, + pub framebuffer_size_uniform: D::Uniform, + pub src_texture: D::TextureParameter, +} + +impl CopyTileProgram where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> CopyTileProgram { + let program = device.create_raster_program(resources, "d3d9/tile_copy"); + let transform_uniform = device.get_uniform(&program, "Transform"); + let tile_size_uniform = device.get_uniform(&program, "TileSize"); + let framebuffer_size_uniform = device.get_uniform(&program, "FramebufferSize"); + let src_texture = device.get_texture_parameter(&program, "Src"); + CopyTileProgram { + program, + transform_uniform, + tile_size_uniform, + framebuffer_size_uniform, + src_texture, + } + } +} + +pub struct D3D9Programs where D: Device { + pub fill_program: FillProgramD3D9, + pub tile_program: TileProgramD3D9, + pub tile_clip_combine_program: ClipTileCombineProgramD3D9, + pub tile_clip_copy_program: ClipTileCopyProgramD3D9, + pub tile_copy_program: CopyTileProgram, +} + +impl D3D9Programs where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> D3D9Programs { + D3D9Programs { + fill_program: FillProgramD3D9::new(device, resources), + tile_program: TileProgramD3D9::new(device, resources), + tile_clip_combine_program: ClipTileCombineProgramD3D9::new(device, resources), + tile_clip_copy_program: ClipTileCopyProgramD3D9::new(device, resources), + tile_copy_program: CopyTileProgram::new(device, resources), + } + } +} + +pub struct ProgramsD3D9 where D: Device { + pub fill_program: FillProgramD3D9, + pub tile_program: TileProgramD3D9, + pub tile_clip_copy_program: ClipTileCopyProgramD3D9, + pub tile_clip_combine_program: ClipTileCombineProgramD3D9, + pub tile_copy_program: CopyTileProgram, +} + +impl ProgramsD3D9 where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> ProgramsD3D9 { + ProgramsD3D9 { + fill_program: FillProgramD3D9::new(device, resources), + tile_program: TileProgramD3D9::new(device, resources), + tile_clip_copy_program: ClipTileCopyProgramD3D9::new(device, resources), + tile_clip_combine_program: ClipTileCombineProgramD3D9::new(device, resources), + tile_copy_program: CopyTileProgram::new(device, resources), + } + } +} diff --git a/renderer/src/gpu/debug.rs b/renderer/src/gpu/debug.rs index 1f85035a..ea7e9e31 100644 --- a/renderer/src/gpu/debug.rs +++ b/renderer/src/gpu/debug.rs @@ -15,10 +15,12 @@ //! //! The debug font atlas was generated using: https://evanw.github.io/font-texture-generator/ -use crate::gpu::renderer::{RenderStats, RenderTime}; -use pathfinder_geometry::vector::{Vector2I, vec2i}; +use crate::gpu::options::RendererLevel; +use crate::gpu::perf::{RenderStats, RenderTime}; use pathfinder_geometry::rect::RectI; +use pathfinder_geometry::vector::{Vector2I, vec2i}; use pathfinder_gpu::Device; +use pathfinder_gpu::allocator::GPUMemoryAllocator; use pathfinder_resources::ResourceLoader; use pathfinder_ui::{FONT_ASCENT, LINE_HEIGHT, PADDING, UIPresenter, WINDOW_COLOR}; use std::collections::VecDeque; @@ -27,35 +29,39 @@ use std::time::Duration; const SAMPLE_BUFFER_SIZE: usize = 60; -const STATS_WINDOW_WIDTH: i32 = 325; +const STATS_WINDOW_WIDTH: i32 = 275; const STATS_WINDOW_HEIGHT: i32 = LINE_HEIGHT * 4 + PADDING + 2; const PERFORMANCE_WINDOW_WIDTH: i32 = 400; -const PERFORMANCE_WINDOW_HEIGHT: i32 = LINE_HEIGHT * 4 + PADDING + 2; +const PERFORMANCE_WINDOW_HEIGHT_D3D9: i32 = LINE_HEIGHT * 8 + PADDING + 2; +const PERFORMANCE_WINDOW_HEIGHT_D3D11: i32 = LINE_HEIGHT * 10 + PADDING + 2; -pub struct DebugUIPresenter -where - D: Device, -{ +const INFO_WINDOW_WIDTH: i32 = 425; +const INFO_WINDOW_HEIGHT: i32 = LINE_HEIGHT * 2 + PADDING + 2; + +pub struct DebugUIPresenter where D: Device { pub ui_presenter: UIPresenter, cpu_samples: SampleBuffer, gpu_samples: SampleBuffer, + backend_name: &'static str, + device_name: String, + renderer_level: RendererLevel, } -impl DebugUIPresenter -where - D: Device, -{ - pub fn new( - device: &D, - resources: &dyn ResourceLoader, - framebuffer_size: Vector2I, - ) -> DebugUIPresenter { +impl DebugUIPresenter where D: Device { + pub fn new(device: &D, + resources: &dyn ResourceLoader, + framebuffer_size: Vector2I, + renderer_level: RendererLevel) + -> DebugUIPresenter { let ui_presenter = UIPresenter::new(device, resources, framebuffer_size); DebugUIPresenter { ui_presenter, cpu_samples: SampleBuffer::new(), gpu_samples: SampleBuffer::new(), + backend_name: device.backend_name(), + device_name: device.device_name(), + renderer_level, } } @@ -64,84 +70,222 @@ where self.gpu_samples.push(rendering_time); } - pub fn draw(&self, device: &D) { - self.draw_stats_window(device); - self.draw_performance_window(device); + pub fn draw(&self, device: &D, allocator: &mut GPUMemoryAllocator) { + self.draw_stats_window(device, allocator); + self.draw_performance_window(device, allocator); + self.draw_info_window(device, allocator); } - fn draw_stats_window(&self, device: &D) { + #[inline] + pub fn set_framebuffer_size(&mut self, new_framebuffer_size: Vector2I) { + self.ui_presenter.set_framebuffer_size(new_framebuffer_size) + } + + fn draw_info_window(&self, device: &D, allocator: &mut GPUMemoryAllocator) { + let framebuffer_size = self.ui_presenter.framebuffer_size(); + let bottom = framebuffer_size.y() - PADDING; + let window_rect = RectI::new( + vec2i(framebuffer_size.x() - PADDING - INFO_WINDOW_WIDTH, + bottom - INFO_WINDOW_HEIGHT), + vec2i(INFO_WINDOW_WIDTH, INFO_WINDOW_HEIGHT), + ); + + self.ui_presenter.draw_solid_rounded_rect(device, allocator, window_rect, WINDOW_COLOR); + + let origin = window_rect.origin() + vec2i(PADDING, PADDING + FONT_ASCENT); + let level = match self.renderer_level { + RendererLevel::D3D9 => "D3D9", + RendererLevel::D3D11 => "D3D11", + }; + self.ui_presenter.draw_text(device, + allocator, + &format!("{} ({} level)", self.backend_name, level), + origin + vec2i(0, LINE_HEIGHT * 0), + false); + self.ui_presenter.draw_text(device, + allocator, + &self.device_name, + origin + vec2i(0, LINE_HEIGHT * 1), + false); + + } + + fn performance_window_size(&self) -> Vector2I { + match self.renderer_level { + RendererLevel::D3D9 => vec2i(PERFORMANCE_WINDOW_WIDTH, PERFORMANCE_WINDOW_HEIGHT_D3D9), + RendererLevel::D3D11 => { + vec2i(PERFORMANCE_WINDOW_WIDTH, PERFORMANCE_WINDOW_HEIGHT_D3D11) + } + } + } + + fn draw_stats_window(&self, device: &D, allocator: &mut GPUMemoryAllocator) { + let performance_window_height = self.performance_window_size().y(); + let framebuffer_size = self.ui_presenter.framebuffer_size(); let bottom = framebuffer_size.y() - PADDING; let window_rect = RectI::new( vec2i(framebuffer_size.x() - PADDING - STATS_WINDOW_WIDTH, - bottom - PERFORMANCE_WINDOW_HEIGHT - PADDING - STATS_WINDOW_HEIGHT), - vec2i(STATS_WINDOW_WIDTH, STATS_WINDOW_HEIGHT), - ); + bottom - + PADDING - + INFO_WINDOW_HEIGHT - + performance_window_height - + PADDING - + STATS_WINDOW_HEIGHT), + vec2i(STATS_WINDOW_WIDTH, STATS_WINDOW_HEIGHT)); - self.ui_presenter.draw_solid_rounded_rect(device, window_rect, WINDOW_COLOR); + self.ui_presenter.draw_solid_rounded_rect(device, allocator, window_rect, WINDOW_COLOR); let mean_cpu_sample = self.cpu_samples.mean(); let origin = window_rect.origin() + vec2i(PADDING, PADDING + FONT_ASCENT); self.ui_presenter.draw_text( device, + allocator, &format!("Paths: {}", mean_cpu_sample.path_count), origin, false, ); self.ui_presenter.draw_text( device, - &format!("Solid Tiles: {}", mean_cpu_sample.solid_tile_count), + allocator, + &format!("Tiles: {}", mean_cpu_sample.total_tile_count), origin + vec2i(0, LINE_HEIGHT * 1), false, ); self.ui_presenter.draw_text( device, - &format!("Alpha Tiles: {}", mean_cpu_sample.alpha_tile_count), + allocator, + &format!("Masks: {}", mean_cpu_sample.alpha_tile_count), origin + vec2i(0, LINE_HEIGHT * 2), false, ); self.ui_presenter.draw_text( device, + allocator, &format!("Fills: {}", mean_cpu_sample.fill_count), origin + vec2i(0, LINE_HEIGHT * 3), false, ); } - fn draw_performance_window(&self, device: &D) { + fn draw_performance_window(&self, device: &D, allocator: &mut GPUMemoryAllocator) { + let performance_window_size = self.performance_window_size(); + let framebuffer_size = self.ui_presenter.framebuffer_size(); let bottom = framebuffer_size.y() - PADDING; let window_rect = RectI::new( - vec2i(framebuffer_size.x() - PADDING - PERFORMANCE_WINDOW_WIDTH, - bottom - PERFORMANCE_WINDOW_HEIGHT), - vec2i(PERFORMANCE_WINDOW_WIDTH, PERFORMANCE_WINDOW_HEIGHT), - ); + vec2i(framebuffer_size.x() - PADDING - performance_window_size.x(), + bottom - INFO_WINDOW_HEIGHT - PADDING - performance_window_size.y()), + performance_window_size); - self.ui_presenter.draw_solid_rounded_rect(device, window_rect, WINDOW_COLOR); + self.ui_presenter.draw_solid_rounded_rect(device, allocator, window_rect, WINDOW_COLOR); let mean_cpu_sample = self.cpu_samples.mean(); - let origin = window_rect.origin() + vec2i(PADDING, PADDING + FONT_ASCENT); - self.ui_presenter.draw_text( - device, - &format!("CPU: {:.3} ms", duration_to_ms(mean_cpu_sample.cpu_build_time)), - origin, - false, - ); - let mean_gpu_sample = self.gpu_samples.mean(); + let origin = window_rect.origin() + vec2i(PADDING, PADDING + FONT_ASCENT); + + let mut current_y = 0; self.ui_presenter.draw_text( device, - &format!("GPU: {:.3} ms", duration_to_ms(mean_gpu_sample.gpu_time)), - origin + vec2i(0, LINE_HEIGHT * 1), + allocator, + &format!("Drawcalls: {}", mean_cpu_sample.drawcall_count), + origin + vec2i(0, current_y), false, ); - - let wallclock_time = f64::max(duration_to_ms(mean_gpu_sample.gpu_time), - duration_to_ms(mean_cpu_sample.cpu_build_time)); + current_y += LINE_HEIGHT; self.ui_presenter.draw_text( device, + allocator, + &format!("VRAM Alloc.: {:.1} MB", + mean_cpu_sample.gpu_bytes_allocated as f64 / (1024.0 * 1024.0)), + origin + vec2i(0, current_y), + false, + ); + current_y += LINE_HEIGHT; + self.ui_presenter.draw_text( + device, + allocator, + &format!("VRAM Commit: {:.1} MB", + mean_cpu_sample.gpu_bytes_committed as f64 / (1024.0 * 1024.0)), + origin + vec2i(0, current_y), + false, + ); + current_y += LINE_HEIGHT; + + self.ui_presenter.draw_text( + device, + allocator, + &format!("CPU: {:.3} ms", duration_to_ms(mean_cpu_sample.cpu_build_time)), + origin + vec2i(0, current_y), + false, + ); + current_y += LINE_HEIGHT; + + match self.renderer_level { + RendererLevel::D3D11 => { + self.ui_presenter.draw_text( + device, + allocator, + &format!("GPU Dice: {:.3} ms", duration_to_ms(mean_gpu_sample.dice_time)), + origin + vec2i(0, current_y), + false, + ); + current_y += LINE_HEIGHT; + self.ui_presenter.draw_text( + device, + allocator, + &format!("GPU Bin: {:.3} ms", duration_to_ms(mean_gpu_sample.bin_time)), + origin + vec2i(0, current_y), + false, + ); + current_y += LINE_HEIGHT; + } + RendererLevel::D3D9 => {} + } + self.ui_presenter.draw_text( + device, + allocator, + &format!("GPU Fill: {:.3} ms", duration_to_ms(mean_gpu_sample.fill_time)), + origin + vec2i(0, current_y), + false, + ); + current_y += LINE_HEIGHT; + self.ui_presenter.draw_text( + device, + allocator, + &format!("GPU Comp.: {:.3} ms", duration_to_ms(mean_gpu_sample.composite_time)), + origin + vec2i(0, current_y), + false, + ); + current_y += LINE_HEIGHT; + self.ui_presenter.draw_text( + device, + allocator, + &format!("GPU Other: {:.3} ms", duration_to_ms(mean_gpu_sample.other_time)), + origin + vec2i(0, current_y), + false, + ); + current_y += LINE_HEIGHT; + + let mut wallclock_time = match self.renderer_level { + RendererLevel::D3D11 => { + duration_to_ms(mean_cpu_sample.cpu_build_time) + + duration_to_ms(mean_gpu_sample.fill_time) + } + RendererLevel::D3D9 => { + f64::max(duration_to_ms(mean_cpu_sample.cpu_build_time), + duration_to_ms(mean_gpu_sample.fill_time)) + } + }; + wallclock_time += duration_to_ms(mean_gpu_sample.composite_time) + + duration_to_ms(mean_gpu_sample.dice_time) + + duration_to_ms(mean_gpu_sample.bin_time) + + duration_to_ms(mean_gpu_sample.other_time); + self.ui_presenter.draw_text( + device, + allocator, &format!("Wallclock: {:.3} ms", wallclock_time), - origin + vec2i(0, LINE_HEIGHT * 3), + origin + vec2i(0, current_y), false, ); } diff --git a/renderer/src/gpu/mod.rs b/renderer/src/gpu/mod.rs index 9d005dec..322bb99b 100644 --- a/renderer/src/gpu/mod.rs +++ b/renderer/src/gpu/mod.rs @@ -10,8 +10,12 @@ //! The GPU renderer for Pathfinder 3. +pub mod d3d9; +pub mod d3d11; pub mod debug; pub mod options; +pub mod perf; pub mod renderer; +pub(crate) mod blend; pub(crate) mod shaders; diff --git a/renderer/src/gpu/options.rs b/renderer/src/gpu/options.rs index 435a6317..e5b6aa97 100644 --- a/renderer/src/gpu/options.rs +++ b/renderer/src/gpu/options.rs @@ -11,13 +11,58 @@ use pathfinder_color::ColorF; use pathfinder_geometry::rect::RectI; use pathfinder_geometry::vector::Vector2I; -use pathfinder_gpu::Device; +use pathfinder_gpu::{Device, FeatureLevel}; -/// Options that influence rendering. -#[derive(Default)] -pub struct RendererOptions { +/// Renderer options that can't be changed after the renderer is created. +pub struct RendererMode { + /// The level of hardware features that the renderer will attempt to use. + pub level: RendererLevel, +} + +/// Options that influence rendering that can be changed at runtime. +pub struct RendererOptions where D: Device { + /// Where the rendering should go: either to the default framebuffer (i.e. screen) or to a + /// custom framebuffer. + pub dest: DestFramebuffer, + /// The background color. If not present, transparent is assumed. pub background_color: Option, - pub no_compute: bool, + /// Whether to display the debug UI. + pub show_debug_ui: bool, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +pub enum RendererLevel { + /// Direct3D 9/OpenGL 3.0/WebGL 2.0 compatibility. Bin on CPU, fill and composite on GPU. + D3D9, + /// Direct3D 11/OpenGL 4.3/Metal/Vulkan/WebGPU compatibility. Bin, fill, and composite on GPU. + D3D11, +} + +impl RendererMode { + #[inline] + pub fn default_for_device(device: &D) -> RendererMode where D: Device { + RendererMode { level: RendererLevel::default_for_device(device) } + } +} + +impl Default for RendererOptions where D: Device { + #[inline] + fn default() -> RendererOptions { + RendererOptions { + dest: DestFramebuffer::default(), + background_color: None, + show_debug_ui: false, + } + } +} + +impl RendererLevel { + pub fn default_for_device(device: &D) -> RendererLevel where D: Device { + match device.feature_level() { + FeatureLevel::D3D10 => RendererLevel::D3D9, + FeatureLevel::D3D11 => RendererLevel::D3D11, + } + } } #[derive(Clone)] diff --git a/renderer/src/gpu/perf.rs b/renderer/src/gpu/perf.rs new file mode 100644 index 00000000..d3ab4254 --- /dev/null +++ b/renderer/src/gpu/perf.rs @@ -0,0 +1,227 @@ +// pathfinder/renderer/src/gpu/perf.rs +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! Performance monitoring infrastructure. + +use pathfinder_gpu::Device; +use std::mem; +use std::ops::{Add, Div}; +use std::time::Duration; + +#[derive(Clone, Copy, Debug, Default)] +pub struct RenderStats { + pub path_count: usize, + pub fill_count: usize, + pub alpha_tile_count: usize, + pub total_tile_count: usize, + pub cpu_build_time: Duration, + pub drawcall_count: u32, + pub gpu_bytes_allocated: u64, + pub gpu_bytes_committed: u64, +} + +impl Add for RenderStats { + type Output = RenderStats; + fn add(self, other: RenderStats) -> RenderStats { + RenderStats { + path_count: self.path_count + other.path_count, + alpha_tile_count: self.alpha_tile_count + other.alpha_tile_count, + total_tile_count: self.total_tile_count + other.total_tile_count, + fill_count: self.fill_count + other.fill_count, + cpu_build_time: self.cpu_build_time + other.cpu_build_time, + drawcall_count: self.drawcall_count + other.drawcall_count, + gpu_bytes_allocated: self.gpu_bytes_allocated + other.gpu_bytes_allocated, + gpu_bytes_committed: self.gpu_bytes_committed + other.gpu_bytes_committed, + } + } +} + +impl Div for RenderStats { + type Output = RenderStats; + fn div(self, divisor: usize) -> RenderStats { + RenderStats { + path_count: self.path_count / divisor, + alpha_tile_count: self.alpha_tile_count / divisor, + total_tile_count: self.total_tile_count / divisor, + fill_count: self.fill_count / divisor, + cpu_build_time: self.cpu_build_time / divisor as u32, + drawcall_count: self.drawcall_count / divisor as u32, + gpu_bytes_allocated: self.gpu_bytes_allocated / divisor as u64, + gpu_bytes_committed: self.gpu_bytes_committed / divisor as u64, + } + } +} + +pub(crate) struct TimerQueryCache where D: Device { + free_queries: Vec, +} + +pub(crate) struct PendingTimer where D: Device { + pub(crate) dice_times: Vec>, + pub(crate) bin_times: Vec>, + pub(crate) fill_times: Vec>, + pub(crate) composite_times: Vec>, + pub(crate) other_times: Vec>, +} + +pub(crate) enum TimerFuture where D: Device { + Pending(D::TimerQuery), + Resolved(Duration), +} + +impl TimerQueryCache where D: Device { + pub(crate) fn new() -> TimerQueryCache { + TimerQueryCache { free_queries: vec![] } + } + + pub(crate) fn alloc(&mut self, device: &D) -> D::TimerQuery { + self.free_queries.pop().unwrap_or_else(|| device.create_timer_query()) + } + + pub(crate) fn free(&mut self, old_query: D::TimerQuery) { + self.free_queries.push(old_query); + } +} + +impl PendingTimer where D: Device { + pub(crate) fn new() -> PendingTimer { + PendingTimer { + dice_times: vec![], + bin_times: vec![], + fill_times: vec![], + composite_times: vec![], + other_times: vec![], + } + } + + pub(crate) fn poll(&mut self, device: &D) -> Vec { + let mut old_queries = vec![]; + for future in self.dice_times.iter_mut().chain(self.bin_times.iter_mut()) + .chain(self.fill_times.iter_mut()) + .chain(self.composite_times.iter_mut()) + .chain(self.other_times.iter_mut()) { + if let Some(old_query) = future.poll(device) { + old_queries.push(old_query) + } + } + old_queries + } + + pub(crate) fn total_time(&self) -> Option { + let dice_time = total_time_of_timer_futures(&self.dice_times); + let bin_time = total_time_of_timer_futures(&self.bin_times); + let fill_time = total_time_of_timer_futures(&self.fill_times); + let composite_time = total_time_of_timer_futures(&self.composite_times); + let other_time = total_time_of_timer_futures(&self.other_times); + match (dice_time, bin_time, fill_time, composite_time, other_time) { + (Some(dice_time), + Some(bin_time), + Some(fill_time), + Some(composite_time), + Some(other_time)) => { + Some(RenderTime { dice_time, bin_time, fill_time, composite_time, other_time }) + } + _ => None, + } + } +} + +impl TimerFuture where D: Device { + pub(crate) fn new(query: D::TimerQuery) -> TimerFuture { + TimerFuture::Pending(query) + } + + fn poll(&mut self, device: &D) -> Option { + let duration = match *self { + TimerFuture::Pending(ref query) => device.try_recv_timer_query(query), + TimerFuture::Resolved(_) => None, + }; + match duration { + None => None, + Some(duration) => { + match mem::replace(self, TimerFuture::Resolved(duration)) { + TimerFuture::Resolved(_) => unreachable!(), + TimerFuture::Pending(old_query) => Some(old_query), + } + } + } + } +} + +fn total_time_of_timer_futures(futures: &[TimerFuture]) -> Option where D: Device { + let mut total = Duration::default(); + for future in futures { + match *future { + TimerFuture::Pending(_) => return None, + TimerFuture::Resolved(time) => total += time, + } + } + Some(total) +} + +#[derive(Clone, Copy, Debug)] +pub struct RenderTime { + pub dice_time: Duration, + pub bin_time: Duration, + pub fill_time: Duration, + pub composite_time: Duration, + pub other_time: Duration, +} + +impl RenderTime { + #[inline] + pub fn total_time(&self) -> Duration { + self.dice_time + self.bin_time + self.fill_time + self.composite_time + self.other_time + } +} + +impl Default for RenderTime { + #[inline] + fn default() -> RenderTime { + RenderTime { + dice_time: Duration::new(0, 0), + bin_time: Duration::new(0, 0), + fill_time: Duration::new(0, 0), + composite_time: Duration::new(0, 0), + other_time: Duration::new(0, 0), + } + } +} + +impl Add for RenderTime { + type Output = RenderTime; + + #[inline] + fn add(self, other: RenderTime) -> RenderTime { + RenderTime { + dice_time: self.dice_time + other.dice_time, + bin_time: self.bin_time + other.bin_time, + fill_time: self.fill_time + other.fill_time, + composite_time: self.composite_time + other.composite_time, + other_time: self.other_time + other.other_time, + } + } +} + +impl Div for RenderTime { + type Output = RenderTime; + + #[inline] + fn div(self, divisor: usize) -> RenderTime { + let divisor = divisor as u32; + RenderTime { + dice_time: self.dice_time / divisor, + bin_time: self.bin_time / divisor, + fill_time: self.fill_time / divisor, + composite_time: self.composite_time / divisor, + other_time: self.other_time / divisor, + } + } +} diff --git a/renderer/src/gpu/renderer.rs b/renderer/src/gpu/renderer.rs index 4ddc7ff5..d97dba19 100644 --- a/renderer/src/gpu/renderer.rs +++ b/renderer/src/gpu/renderer.rs @@ -8,43 +8,36 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +use crate::gpu::blend::{ToBlendState, ToCompositeCtrl}; +use crate::gpu::d3d9::renderer::RendererD3D9; +use crate::gpu::d3d11::renderer::RendererD3D11; use crate::gpu::debug::DebugUIPresenter; -use crate::gpu::options::{DestFramebuffer, RendererOptions}; +use crate::gpu::options::{DestFramebuffer, RendererLevel, RendererMode, RendererOptions}; +use crate::gpu::perf::{PendingTimer, RenderStats, RenderTime, TimerFuture, TimerQueryCache}; use crate::gpu::shaders::{BlitProgram, BlitVertexArray, ClearProgram, ClearVertexArray}; -use crate::gpu::shaders::{ClipTileProgram, ClipTileVertexArray}; -use crate::gpu::shaders::{CopyTileProgram, CopyTileVertexArray, FillProgram, FillVertexArray}; -use crate::gpu::shaders::{MAX_FILLS_PER_BATCH, MAX_TILES_PER_BATCH, ReprojectionProgram}; -use crate::gpu::shaders::{ReprojectionVertexArray, StencilProgram, StencilVertexArray}; -use crate::gpu::shaders::{TileProgram, TileVertexArray}; -use crate::gpu_data::{ClipBatch, ClipBatchKey, ClipBatchKind, Fill, FillBatchEntry, RenderCommand}; -use crate::gpu_data::{TextureLocation, TextureMetadataEntry, TexturePageDescriptor, TexturePageId}; -use crate::gpu_data::{Tile, TileBatchTexture}; +use crate::gpu::shaders::{ProgramsCore, ReprojectionProgram, ReprojectionVertexArray}; +use crate::gpu::shaders::{StencilProgram, StencilVertexArray, TileProgramCommon, VertexArraysCore}; +use crate::gpu_data::{ColorCombineMode, RenderCommand, TextureLocation, TextureMetadataEntry}; +use crate::gpu_data::{TexturePageDescriptor, TexturePageId, TileBatchTexture}; use crate::options::BoundingQuad; -use crate::paint::PaintCompositeOp; use crate::tiles::{TILE_HEIGHT, TILE_WIDTH}; -use fxhash::FxHashMap; use half::f16; use pathfinder_color::{self as color, ColorF, ColorU}; -use pathfinder_content::effects::{BlendMode, BlurDirection, DefringingKernel}; -use pathfinder_content::effects::{Filter, PatternFilter}; +use pathfinder_content::effects::{BlendMode, BlurDirection, Filter, PatternFilter}; use pathfinder_content::render_target::RenderTargetId; -use pathfinder_geometry::line_segment::LineSegment2F; -use pathfinder_geometry::rect::RectI; +use pathfinder_geometry::rect::{RectF, RectI}; use pathfinder_geometry::transform3d::Transform4F; use pathfinder_geometry::util; use pathfinder_geometry::vector::{Vector2F, Vector2I, Vector4F, vec2f, vec2i}; -use pathfinder_gpu::{BlendFactor, BlendOp, BlendState, BufferData, BufferTarget, BufferUploadMode}; -use pathfinder_gpu::{ClearOps, ComputeDimensions, ComputeState, DepthFunc, DepthState, Device}; -use pathfinder_gpu::{ImageAccess, Primitive, RenderOptions, RenderState, RenderTarget}; -use pathfinder_gpu::{StencilFunc, StencilState, TextureBinding, TextureDataRef, TextureFormat}; -use pathfinder_gpu::{UniformBinding, UniformData}; +use pathfinder_gpu::allocator::{BufferID, BufferTag, FramebufferID, FramebufferTag}; +use pathfinder_gpu::allocator::{GPUMemoryAllocator, TextureID, TextureTag}; +use pathfinder_gpu::{BufferData, BufferTarget, ClearOps, DepthFunc, DepthState, Device, Primitive}; +use pathfinder_gpu::{RenderOptions, RenderState, RenderTarget, StencilFunc, StencilState}; +use pathfinder_gpu::{TextureBinding, TextureDataRef, TextureFormat, UniformBinding, UniformData}; use pathfinder_resources::ResourceLoader; use pathfinder_simd::default::{F32x2, F32x4, I32x2}; use std::collections::VecDeque; use std::f32; -use std::marker::PhantomData; -use std::mem; -use std::ops::{Add, Div}; use std::time::Duration; use std::u32; @@ -57,214 +50,254 @@ pub(crate) const MASK_TILES_DOWN: u32 = 256; // 1.0 / sqrt(2*pi) const SQRT_2_PI_INV: f32 = 0.3989422804014327; -const TEXTURE_CACHE_SIZE: usize = 8; - -const MIN_FILL_STORAGE_CLASS: usize = 14; // 0x4000 entries, 128kB -const MIN_TILE_STORAGE_CLASS: usize = 10; // 1024 entries, 12kB - const TEXTURE_METADATA_ENTRIES_PER_ROW: i32 = 128; -const TEXTURE_METADATA_TEXTURE_WIDTH: i32 = TEXTURE_METADATA_ENTRIES_PER_ROW * 4; +const TEXTURE_METADATA_TEXTURE_WIDTH: i32 = TEXTURE_METADATA_ENTRIES_PER_ROW * 8; const TEXTURE_METADATA_TEXTURE_HEIGHT: i32 = 65536 / TEXTURE_METADATA_ENTRIES_PER_ROW; // FIXME(pcwalton): Shrink this again! -const MASK_FRAMEBUFFER_WIDTH: i32 = TILE_WIDTH as i32 * MASK_TILES_ACROSS as i32; -const MASK_FRAMEBUFFER_HEIGHT: i32 = TILE_HEIGHT as i32 / 4 * MASK_TILES_DOWN as i32; - -const COMBINER_CTRL_COLOR_COMBINE_SRC_IN: i32 = 0x1; -const COMBINER_CTRL_COLOR_COMBINE_DEST_IN: i32 = 0x2; +pub(crate) const MASK_FRAMEBUFFER_WIDTH: i32 = TILE_WIDTH as i32 * MASK_TILES_ACROSS as i32; +pub(crate) const MASK_FRAMEBUFFER_HEIGHT: i32 = TILE_HEIGHT as i32 / 4 * MASK_TILES_DOWN as i32; const COMBINER_CTRL_FILTER_RADIAL_GRADIENT: i32 = 0x1; const COMBINER_CTRL_FILTER_TEXT: i32 = 0x2; const COMBINER_CTRL_FILTER_BLUR: i32 = 0x3; -const COMBINER_CTRL_COMPOSITE_NORMAL: i32 = 0x0; -const COMBINER_CTRL_COMPOSITE_MULTIPLY: i32 = 0x1; -const COMBINER_CTRL_COMPOSITE_SCREEN: i32 = 0x2; -const COMBINER_CTRL_COMPOSITE_OVERLAY: i32 = 0x3; -const COMBINER_CTRL_COMPOSITE_DARKEN: i32 = 0x4; -const COMBINER_CTRL_COMPOSITE_LIGHTEN: i32 = 0x5; -const COMBINER_CTRL_COMPOSITE_COLOR_DODGE: i32 = 0x6; -const COMBINER_CTRL_COMPOSITE_COLOR_BURN: i32 = 0x7; -const COMBINER_CTRL_COMPOSITE_HARD_LIGHT: i32 = 0x8; -const COMBINER_CTRL_COMPOSITE_SOFT_LIGHT: i32 = 0x9; -const COMBINER_CTRL_COMPOSITE_DIFFERENCE: i32 = 0xa; -const COMBINER_CTRL_COMPOSITE_EXCLUSION: i32 = 0xb; -const COMBINER_CTRL_COMPOSITE_HUE: i32 = 0xc; -const COMBINER_CTRL_COMPOSITE_SATURATION: i32 = 0xd; -const COMBINER_CTRL_COMPOSITE_COLOR: i32 = 0xe; -const COMBINER_CTRL_COMPOSITE_LUMINOSITY: i32 = 0xf; - const COMBINER_CTRL_COLOR_FILTER_SHIFT: i32 = 4; const COMBINER_CTRL_COLOR_COMBINE_SHIFT: i32 = 6; const COMBINER_CTRL_COMPOSITE_SHIFT: i32 = 8; pub struct Renderer where D: Device { - // Device - pub device: D, + // Basic data + pub(crate) core: RendererCore, + level_impl: RendererLevelImpl, - // Core data - dest_framebuffer: DestFramebuffer, - options: RendererOptions, + // Shaders blit_program: BlitProgram, clear_program: ClearProgram, - fill_program: FillProgram, - tile_program: TileProgram, - tile_copy_program: CopyTileProgram, - tile_clip_program: ClipTileProgram, stencil_program: StencilProgram, reprojection_program: ReprojectionProgram, - quad_vertex_positions_buffer: D::Buffer, - quad_vertex_indices_buffer: D::Buffer, - next_fills: Vec, - fill_tile_map: Vec, - texture_pages: Vec>>, - render_targets: Vec, - render_target_stack: Vec, - area_lut_texture: D::Texture, - gamma_lut_texture: D::Texture, // Frames - front_frame: Frame, - back_frame: Frame, - front_frame_fence: Option, - - // Rendering state - texture_cache: TextureCache, + frame: Frame, // Debug - pub stats: RenderStats, current_cpu_build_time: Option, - current_timer: Option>, pending_timers: VecDeque>, - timer_query_cache: TimerQueryCache, - pub debug_ui_presenter: DebugUIPresenter, - - // Extra info - flags: RendererFlags, + debug_ui_presenter: Option>, + last_stats: VecDeque, + last_rendering_time: Option, } +enum RendererLevelImpl where D: Device { + D3D9(RendererD3D9), + D3D11(RendererD3D11), +} + +pub(crate) struct RendererCore where D: Device { + // Basic data + pub(crate) device: D, + pub(crate) allocator: GPUMemoryAllocator, + pub(crate) mode: RendererMode, + pub(crate) options: RendererOptions, + pub(crate) renderer_flags: RendererFlags, + + // Performance monitoring + pub(crate) stats: RenderStats, + pub(crate) current_timer: Option>, + pub(crate) timer_query_cache: TimerQueryCache, + + // Core shaders + pub(crate) programs: ProgramsCore, + pub(crate) vertex_arrays: VertexArraysCore, + + // Read-only static core resources + pub(crate) quad_vertex_positions_buffer_id: BufferID, + pub(crate) quad_vertex_indices_buffer_id: BufferID, + pub(crate) area_lut_texture_id: TextureID, + pub(crate) gamma_lut_texture_id: TextureID, + + // Read-write static core resources + intermediate_dest_framebuffer_id: FramebufferID, + intermediate_dest_framebuffer_size: Vector2I, + pub(crate) texture_metadata_texture_id: TextureID, + + // Dynamic resources and associated metadata + render_targets: Vec, + pub(crate) render_target_stack: Vec, + pub(crate) pattern_texture_pages: Vec>, + pub(crate) mask_storage: Option, + pub(crate) alpha_tile_count: u32, + pub(crate) framebuffer_flags: FramebufferFlags, +} + +// TODO(pcwalton): Remove this. struct Frame where D: Device { - framebuffer_flags: FramebufferFlags, blit_vertex_array: BlitVertexArray, clear_vertex_array: ClearVertexArray, - fill_vertex_storage_allocator: StorageAllocator>, - tile_vertex_storage_allocator: StorageAllocator>, - quads_vertex_indices_buffer: D::Buffer, - quads_vertex_indices_length: usize, - alpha_tile_pages: FxHashMap>, - tile_clip_vertex_array: ClipTileVertexArray, stencil_vertex_array: StencilVertexArray, reprojection_vertex_array: ReprojectionVertexArray, - dest_blend_framebuffer: D::Framebuffer, - intermediate_dest_framebuffer: D::Framebuffer, - texture_metadata_texture: D::Texture, +} + +pub(crate) struct MaskStorage { + pub(crate) framebuffer_id: FramebufferID, + pub(crate) allocated_page_count: u32, } impl Renderer where D: Device { pub fn new(device: D, resources: &dyn ResourceLoader, - dest_framebuffer: DestFramebuffer, - options: RendererOptions) + mode: RendererMode, + options: RendererOptions) -> Renderer { - let blit_program = BlitProgram::new(&device, resources); - let clear_program = ClearProgram::new(&device, resources); - let fill_program = FillProgram::new(&device, resources, &options); - let tile_program = TileProgram::new(&device, resources); - let tile_copy_program = CopyTileProgram::new(&device, resources); - let tile_clip_program = ClipTileProgram::new(&device, resources); - let stencil_program = StencilProgram::new(&device, resources); - let reprojection_program = ReprojectionProgram::new(&device, resources); + let mut allocator = GPUMemoryAllocator::new(); - let area_lut_texture = - device.create_texture_from_png(resources, "area-lut", TextureFormat::RGBA8); - let gamma_lut_texture = - device.create_texture_from_png(resources, "gamma-lut", TextureFormat::R8); + device.begin_commands(); - let quad_vertex_positions_buffer = device.create_buffer(BufferUploadMode::Static); - device.allocate_buffer(&quad_vertex_positions_buffer, - BufferData::Memory(&QUAD_VERTEX_POSITIONS), - BufferTarget::Vertex); - let quad_vertex_indices_buffer = device.create_buffer(BufferUploadMode::Static); - device.allocate_buffer(&quad_vertex_indices_buffer, - BufferData::Memory(&QUAD_VERTEX_INDICES), - BufferTarget::Index); + let quad_vertex_positions_buffer_id = + allocator.allocate_buffer::(&device, + QUAD_VERTEX_POSITIONS.len() as u64, + BufferTag("QuadVertexPositions")); + device.upload_to_buffer(allocator.get_buffer(quad_vertex_positions_buffer_id), + 0, + &QUAD_VERTEX_POSITIONS, + BufferTarget::Vertex); + let quad_vertex_indices_buffer_id = + allocator.allocate_buffer::(&device, + QUAD_VERTEX_INDICES.len() as u64, + BufferTag("QuadVertexIndices")); + device.upload_to_buffer(allocator.get_buffer(quad_vertex_indices_buffer_id), + 0, + &QUAD_VERTEX_INDICES, + BufferTarget::Index); - let window_size = dest_framebuffer.window_size(&device); + let area_lut_texture_id = allocator.allocate_texture(&device, + Vector2I::splat(256), + TextureFormat::RGBA8, + TextureTag("AreaLUT")); + let gamma_lut_texture_id = allocator.allocate_texture(&device, + vec2i(256, 8), + TextureFormat::R8, + TextureTag("GammaLUT")); + device.upload_png_to_texture(resources, + "area-lut", + allocator.get_texture(area_lut_texture_id), + TextureFormat::RGBA8); + device.upload_png_to_texture(resources, + "gamma-lut", + allocator.get_texture(gamma_lut_texture_id), + TextureFormat::R8); - let timer_query_cache = TimerQueryCache::new(&device); - let debug_ui_presenter = DebugUIPresenter::new(&device, resources, window_size); + let window_size = options.dest.window_size(&device); + let intermediate_dest_framebuffer_id = + allocator.allocate_framebuffer(&device, + window_size, + TextureFormat::RGBA8, + FramebufferTag("IntermediateDest")); - let front_frame = Frame::new(&device, - &blit_program, - &clear_program, - &tile_clip_program, - &reprojection_program, - &stencil_program, - &quad_vertex_positions_buffer, - &quad_vertex_indices_buffer, - window_size); - let back_frame = Frame::new(&device, - &blit_program, - &clear_program, - &tile_clip_program, - &reprojection_program, - &stencil_program, - &quad_vertex_positions_buffer, - &quad_vertex_indices_buffer, - window_size); + let texture_metadata_texture_size = vec2i(TEXTURE_METADATA_TEXTURE_WIDTH, + TEXTURE_METADATA_TEXTURE_HEIGHT); + let texture_metadata_texture_id = + allocator.allocate_texture(&device, + texture_metadata_texture_size, + TextureFormat::RGBA16F, + TextureTag("TextureMetadata")); - Renderer { + let core_programs = ProgramsCore::new(&device, resources); + let core_vertex_arrays = + VertexArraysCore::new(&device, + &core_programs, + allocator.get_buffer(quad_vertex_positions_buffer_id), + allocator.get_buffer(quad_vertex_indices_buffer_id)); + + let mut core = RendererCore { device, - - dest_framebuffer, + allocator, + mode, options, - blit_program, - clear_program, - fill_program, - tile_program, - tile_copy_program, - tile_clip_program, - quad_vertex_positions_buffer, - quad_vertex_indices_buffer, - next_fills: vec![], - fill_tile_map: vec![-1; 256 * 256], - texture_pages: vec![], + stats: RenderStats::default(), + current_timer: None, + timer_query_cache: TimerQueryCache::new(), + renderer_flags: RendererFlags::empty(), + + programs: core_programs, + vertex_arrays: core_vertex_arrays, + + quad_vertex_positions_buffer_id, + quad_vertex_indices_buffer_id, + area_lut_texture_id, + gamma_lut_texture_id, + + intermediate_dest_framebuffer_id, + intermediate_dest_framebuffer_size: window_size, + + texture_metadata_texture_id, render_targets: vec![], render_target_stack: vec![], + pattern_texture_pages: vec![], + mask_storage: None, + alpha_tile_count: 0, + framebuffer_flags: FramebufferFlags::empty(), + }; - front_frame, - back_frame, - front_frame_fence: None, + let level_impl = match core.mode.level { + RendererLevel::D3D9 => { + RendererLevelImpl::D3D9(RendererD3D9::new(&mut core, resources)) + } + RendererLevel::D3D11 => { + RendererLevelImpl::D3D11(RendererD3D11::new(&mut core, resources)) + } + }; - area_lut_texture, - gamma_lut_texture, + let blit_program = BlitProgram::new(&core.device, resources); + let clear_program = ClearProgram::new(&core.device, resources); + let stencil_program = StencilProgram::new(&core.device, resources); + let reprojection_program = ReprojectionProgram::new(&core.device, resources); + + let debug_ui_presenter = if core.options.show_debug_ui { + Some(DebugUIPresenter::new(&core.device, resources, window_size, core.mode.level)) + } else { + None + }; + + let frame = Frame::new(&core.device, + &mut core.allocator, + &blit_program, + &clear_program, + &reprojection_program, + &stencil_program, + quad_vertex_positions_buffer_id, + quad_vertex_indices_buffer_id); + + core.device.end_commands(); + + Renderer { + core, + level_impl, + + blit_program, + clear_program, + + frame, stencil_program, - reprojection_program, - stats: RenderStats::default(), current_cpu_build_time: None, - current_timer: None, pending_timers: VecDeque::new(), - timer_query_cache, debug_ui_presenter, - - texture_cache: TextureCache::new(), - - flags: RendererFlags::empty(), + last_stats: VecDeque::new(), + last_rendering_time: None, } } pub fn begin_scene(&mut self) { - self.back_frame.framebuffer_flags = FramebufferFlags::empty(); - for alpha_tile_page in self.back_frame.alpha_tile_pages.values_mut() { - alpha_tile_page.framebuffer_is_dirty = false; - } + self.core.framebuffer_flags = FramebufferFlags::empty(); - self.device.begin_commands(); - self.current_timer = Some(PendingTimer::new()); - self.stats = RenderStats::default(); + self.core.device.begin_commands(); + self.core.current_timer = Some(PendingTimer::new()); + self.core.stats = RenderStats::default(); + + self.core.alpha_tile_count = 0; } pub fn render_command(&mut self, command: &RenderCommand) { @@ -274,7 +307,7 @@ impl Renderer where D: Device { self.start_rendering(bounding_quad, path_count, needs_readable_framebuffer); } RenderCommand::AllocateTexturePage { page_id, ref descriptor } => { - self.allocate_texture_page(page_id, descriptor) + self.allocate_pattern_texture_page(page_id, descriptor) } RenderCommand::UploadTexelData { ref texels, location } => { self.upload_texel_data(texels, location) @@ -285,35 +318,32 @@ impl Renderer where D: Device { RenderCommand::UploadTextureMetadata(ref metadata) => { self.upload_texture_metadata(metadata) } - RenderCommand::AddFills(ref fills) => self.add_fills(fills), - RenderCommand::FlushFills => { - let page_indices: Vec<_> = - self.back_frame.alpha_tile_pages.keys().cloned().collect(); - for page_index in page_indices { - self.draw_buffered_fills(page_index) - } + RenderCommand::AddFillsD3D9(ref fills) => { + self.level_impl.require_d3d9().add_fills(&mut self.core, fills) } - RenderCommand::ClipTiles(ref batches) => { - batches.iter().for_each(|batch| self.draw_clip_batch(batch)) + RenderCommand::FlushFillsD3D9 => { + self.level_impl.require_d3d9().draw_buffered_fills(&mut self.core); + } + RenderCommand::UploadSceneD3D11 { ref draw_segments, ref clip_segments } => { + self.level_impl + .require_d3d11() + .upload_scene(&mut self.core, draw_segments, clip_segments) } - RenderCommand::BeginTileDrawing => {} RenderCommand::PushRenderTarget(render_target_id) => { self.push_render_target(render_target_id) } RenderCommand::PopRenderTarget => self.pop_render_target(), - RenderCommand::DrawTiles(ref batch) => { - let count = batch.tiles.len(); - self.stats.alpha_tile_count += count; - let storage_id = self.upload_tiles(&batch.tiles); - self.draw_tiles(batch.tile_page, - count as u32, - storage_id, - batch.color_texture, - batch.blend_mode, - batch.filter) + RenderCommand::PrepareClipTilesD3D11(ref batch) => { + self.level_impl.require_d3d11().prepare_tiles(&mut self.core, batch) + } + RenderCommand::DrawTilesD3D9(ref batch) => { + self.level_impl.require_d3d9().upload_and_draw_tiles(&mut self.core, batch) + } + RenderCommand::DrawTilesD3D11(ref batch) => { + self.level_impl.require_d3d11().prepare_and_draw_tiles(&mut self.core, batch) } RenderCommand::Finish { cpu_build_time } => { - self.stats.cpu_build_time = cpu_build_time; + self.core.stats.cpu_build_time = cpu_build_time; } } } @@ -322,150 +352,221 @@ impl Renderer where D: Device { self.clear_dest_framebuffer_if_necessary(); self.blit_intermediate_dest_framebuffer_if_necessary(); - let old_front_frame_fence = self.front_frame_fence.take(); - self.front_frame_fence = Some(self.device.add_fence()); - self.device.end_commands(); + self.core.stats.gpu_bytes_allocated = self.core.allocator.bytes_allocated(); + self.core.stats.gpu_bytes_committed = self.core.allocator.bytes_committed(); - self.back_frame.fill_vertex_storage_allocator.end_frame(); - self.back_frame.tile_vertex_storage_allocator.end_frame(); + match self.level_impl { + RendererLevelImpl::D3D9(_) => {} + RendererLevelImpl::D3D11(ref mut d3d11_renderer) => { + d3d11_renderer.end_frame(&mut self.core) + } + } - if let Some(timer) = self.current_timer.take() { + if let Some(timer) = self.core.current_timer.take() { self.pending_timers.push_back(timer); } self.current_cpu_build_time = None; - if let Some(old_front_frame_fence) = old_front_frame_fence { - self.device.wait_for_fence(&old_front_frame_fence); + self.update_debug_ui(); + if self.core.options.show_debug_ui { + self.draw_debug_ui(); } - mem::swap(&mut self.front_frame, &mut self.back_frame); + self.core.allocator.purge_if_needed(); + + self.core.device.end_commands(); } fn start_rendering(&mut self, bounding_quad: BoundingQuad, path_count: usize, - mut needs_readable_framebuffer: bool) { - if let DestFramebuffer::Other(_) = self.dest_framebuffer { - needs_readable_framebuffer = false; + needs_readable_framebuffer: bool) { + match (&self.core.options.dest, self.core.mode.level) { + (&DestFramebuffer::Other(_), _) => { + self.core + .renderer_flags + .remove(RendererFlags::INTERMEDIATE_DEST_FRAMEBUFFER_NEEDED); + } + (&DestFramebuffer::Default { .. }, RendererLevel::D3D11) => { + self.core + .renderer_flags + .insert(RendererFlags::INTERMEDIATE_DEST_FRAMEBUFFER_NEEDED); + } + _ => { + self.core + .renderer_flags + .set(RendererFlags::INTERMEDIATE_DEST_FRAMEBUFFER_NEEDED, + needs_readable_framebuffer); + } } - if self.flags.contains(RendererFlags::USE_DEPTH) { + if self.core.renderer_flags.contains(RendererFlags::USE_DEPTH) { self.draw_stencil(&bounding_quad); } - self.stats.path_count = path_count; - self.flags.set(RendererFlags::INTERMEDIATE_DEST_FRAMEBUFFER_NEEDED, - needs_readable_framebuffer); + self.core.stats.path_count = path_count; - self.render_targets.clear(); + self.core.render_targets.clear(); } - pub fn draw_debug_ui(&self) { - self.debug_ui_presenter.draw(&self.device); + fn update_debug_ui(&mut self) { + self.last_stats.push_back(self.core.stats); + self.shift_rendering_time(); + + if !self.core.options.show_debug_ui || self.debug_ui_presenter.is_none() { + return; + } + + if let Some(last_rendering_time) = self.last_rendering_time { + self.debug_ui_presenter + .as_mut() + .unwrap() + .add_sample(self.last_stats.pop_front().unwrap(), last_rendering_time); + } } - pub fn shift_rendering_time(&mut self) -> Option { + fn draw_debug_ui(&mut self) { + if let Some(ref mut debug_ui_presenter) = self.debug_ui_presenter { + let window_size = self.core.options.dest.window_size(&self.core.device); + debug_ui_presenter.set_framebuffer_size(window_size); + debug_ui_presenter.draw(&self.core.device, &mut self.core.allocator); + } + } + + fn shift_rendering_time(&mut self) { if let Some(mut pending_timer) = self.pending_timers.pop_front() { - for old_query in pending_timer.poll(&self.device) { - self.timer_query_cache.free(old_query); + for old_query in pending_timer.poll(&self.core.device) { + self.core.timer_query_cache.free(old_query); } - if let Some(gpu_time) = pending_timer.total_time() { - return Some(RenderTime { gpu_time }) + if let Some(render_time) = pending_timer.total_time() { + self.last_rendering_time = Some(render_time); + return; } self.pending_timers.push_front(pending_timer); } - None + self.last_rendering_time = None; + } + + pub fn last_rendering_time(&self) -> Option { + self.last_rendering_time } #[inline] - pub fn dest_framebuffer(&self) -> &DestFramebuffer { - &self.dest_framebuffer + pub fn device(&self) -> &D { + &self.core.device } #[inline] - pub fn replace_dest_framebuffer( - &mut self, - new_dest_framebuffer: DestFramebuffer, - ) -> DestFramebuffer { - mem::replace(&mut self.dest_framebuffer, new_dest_framebuffer) + pub fn device_mut(&mut self) -> &mut D { + &mut self.core.device } #[inline] - pub fn set_options(&mut self, new_options: RendererOptions) { - self.options = new_options + pub fn mode(&self) -> &RendererMode { + &self.core.mode + } + + #[inline] + pub fn options(&self) -> &RendererOptions { + &self.core.options + } + + pub fn options_mut(&mut self) -> &mut RendererOptions { + &mut self.core.options + } + + #[inline] + pub fn debug_ui_presenter_mut(&mut self) -> DebugUIPresenterInfo { + DebugUIPresenterInfo { + device: &mut self.core.device, + allocator: &mut self.core.allocator, + debug_ui_presenter: self.debug_ui_presenter.as_mut().expect("Debug UI disabled!"), + } } #[inline] pub fn set_main_framebuffer_size(&mut self, new_framebuffer_size: Vector2I) { - self.debug_ui_presenter.ui_presenter.set_framebuffer_size(new_framebuffer_size); + if let Some(ref mut debug_ui_presenter) = self.debug_ui_presenter { + debug_ui_presenter.ui_presenter.set_framebuffer_size(new_framebuffer_size); + } } #[inline] pub fn disable_depth(&mut self) { - self.flags.remove(RendererFlags::USE_DEPTH); + self.core.renderer_flags.remove(RendererFlags::USE_DEPTH); } #[inline] pub fn enable_depth(&mut self) { - self.flags.insert(RendererFlags::USE_DEPTH); + self.core.renderer_flags.insert(RendererFlags::USE_DEPTH); + } + + #[inline] + pub fn stats(&self) -> &RenderStats { + &self.core.stats } #[inline] pub fn quad_vertex_positions_buffer(&self) -> &D::Buffer { - &self.quad_vertex_positions_buffer + self.core.allocator.get_buffer(self.core.quad_vertex_positions_buffer_id) } #[inline] pub fn quad_vertex_indices_buffer(&self) -> &D::Buffer { - &self.quad_vertex_indices_buffer + self.core.allocator.get_buffer(self.core.quad_vertex_indices_buffer_id) } - fn allocate_texture_page(&mut self, - page_id: TexturePageId, - descriptor: &TexturePageDescriptor) { + fn allocate_pattern_texture_page(&mut self, + page_id: TexturePageId, + descriptor: &TexturePageDescriptor) { // Fill in IDs up to the requested page ID. let page_index = page_id.0 as usize; - while self.texture_pages.len() < page_index + 1 { - self.texture_pages.push(None); + while self.core.pattern_texture_pages.len() < page_index + 1 { + self.core.pattern_texture_pages.push(None); } // Clear out any existing texture. - if let Some(old_texture_page) = self.texture_pages[page_index].take() { - let old_texture = self.device.destroy_framebuffer(old_texture_page.framebuffer); - self.texture_cache.release_texture(old_texture); + if let Some(old_texture_page) = self.core.pattern_texture_pages[page_index].take() { + self.core.allocator.free_framebuffer(old_texture_page.framebuffer_id); } // Allocate texture. let texture_size = descriptor.size; - let texture = self.texture_cache.create_texture(&mut self.device, - TextureFormat::RGBA8, - texture_size); - let framebuffer = self.device.create_framebuffer(texture); - self.texture_pages[page_index] = Some(TexturePage { - framebuffer, + let framebuffer_id = self.core + .allocator + .allocate_framebuffer(&self.core.device, + texture_size, + TextureFormat::RGBA8, + FramebufferTag("PatternPage")); + self.core.pattern_texture_pages[page_index] = Some(PatternTexturePage { + framebuffer_id, must_preserve_contents: false, }); } fn upload_texel_data(&mut self, texels: &[ColorU], location: TextureLocation) { - let texture_page = self.texture_pages[location.page.0 as usize] + let texture_page = self.core + .pattern_texture_pages[location.page.0 as usize] .as_mut() .expect("Texture page not allocated yet!"); - let texture = self.device.framebuffer_texture(&texture_page.framebuffer); + let framebuffer_id = texture_page.framebuffer_id; + let framebuffer = self.core.allocator.get_framebuffer(framebuffer_id); + let texture = self.core.device.framebuffer_texture(framebuffer); let texels = color::color_slice_to_u8_slice(texels); - self.device.upload_to_texture(texture, location.rect, TextureDataRef::U8(texels)); + self.core.device.upload_to_texture(texture, location.rect, TextureDataRef::U8(texels)); texture_page.must_preserve_contents = true; } fn declare_render_target(&mut self, render_target_id: RenderTargetId, location: TextureLocation) { - while self.render_targets.len() < render_target_id.render_target as usize + 1 { - self.render_targets.push(RenderTargetInfo { + while self.core.render_targets.len() < render_target_id.render_target as usize + 1 { + self.core.render_targets.push(RenderTargetInfo { location: TextureLocation { page: TexturePageId(!0), rect: RectI::default() }, }); } - let mut render_target = &mut self.render_targets[render_target_id.render_target as usize]; + let mut render_target = + &mut self.core.render_targets[render_target_id.render_target as usize]; debug_assert_eq!(render_target.location.page, TexturePageId(!0)); render_target.location = location; } @@ -477,19 +578,46 @@ impl Renderer where D: Device { let mut texels = Vec::with_capacity(padded_texel_size); for entry in metadata { let base_color = entry.base_color.to_f32(); + let filter_params = self.compute_filter_params(&entry.filter, + entry.blend_mode, + entry.color_0_combine_mode); texels.extend_from_slice(&[ + // 0 f16::from_f32(entry.color_0_transform.m11()), f16::from_f32(entry.color_0_transform.m21()), f16::from_f32(entry.color_0_transform.m12()), f16::from_f32(entry.color_0_transform.m22()), + // 1 f16::from_f32(entry.color_0_transform.m13()), f16::from_f32(entry.color_0_transform.m23()), f16::default(), f16::default(), + // 2 f16::from_f32(base_color.r()), f16::from_f32(base_color.g()), f16::from_f32(base_color.b()), f16::from_f32(base_color.a()), + // 3 + f16::from_f32(filter_params.p0.x()), + f16::from_f32(filter_params.p0.y()), + f16::from_f32(filter_params.p0.z()), + f16::from_f32(filter_params.p0.w()), + // 4 + f16::from_f32(filter_params.p1.x()), + f16::from_f32(filter_params.p1.y()), + f16::from_f32(filter_params.p1.z()), + f16::from_f32(filter_params.p1.w()), + // 5 + f16::from_f32(filter_params.p2.x()), + f16::from_f32(filter_params.p2.y()), + f16::from_f32(filter_params.p2.z()), + f16::from_f32(filter_params.p2.w()), + // 6 + f16::from_f32(filter_params.ctrl as f32), + f16::default(), + f16::default(), + f16::default(), + // 7 f16::default(), f16::default(), f16::default(), @@ -500,554 +628,18 @@ impl Renderer where D: Device { texels.push(f16::default()) } - let texture = &mut self.back_frame.texture_metadata_texture; + let texture_id = self.core.texture_metadata_texture_id; + let texture = self.core.allocator.get_texture(texture_id); let width = TEXTURE_METADATA_TEXTURE_WIDTH; let height = texels.len() as i32 / (4 * TEXTURE_METADATA_TEXTURE_WIDTH); let rect = RectI::new(Vector2I::zero(), Vector2I::new(width, height)); - self.device.upload_to_texture(texture, rect, TextureDataRef::F16(&texels)); - } - - fn upload_tiles(&mut self, tiles: &[Tile]) -> StorageID { - debug_assert!(tiles.len() <= MAX_TILES_PER_BATCH); - - let tile_program = &self.tile_program; - let tile_copy_program = &self.tile_copy_program; - let quad_vertex_positions_buffer = &self.quad_vertex_positions_buffer; - let quad_vertex_indices_buffer = &self.quad_vertex_indices_buffer; - let storage_id = self.back_frame.tile_vertex_storage_allocator.allocate(&self.device, - tiles.len() as u64, - |device, size| { - TileVertexStorage::new(size, - device, - tile_program, - tile_copy_program, - quad_vertex_positions_buffer, - quad_vertex_indices_buffer) - }); - - let vertex_buffer = &self.back_frame - .tile_vertex_storage_allocator - .get(storage_id) - .vertex_buffer; - self.device.upload_to_buffer(vertex_buffer, 0, tiles, BufferTarget::Vertex); - - self.ensure_index_buffer(tiles.len()); - - storage_id - } - - fn ensure_index_buffer(&mut self, mut length: usize) { - length = length.next_power_of_two(); - if self.back_frame.quads_vertex_indices_length >= length { - return; - } - - // TODO(pcwalton): Generate these with SIMD. - let mut indices: Vec = Vec::with_capacity(length * 6); - for index in 0..(length as u32) { - indices.extend_from_slice(&[ - index * 4 + 0, index * 4 + 1, index * 4 + 2, - index * 4 + 1, index * 4 + 3, index * 4 + 2, - ]); - } - - self.device.allocate_buffer(&self.back_frame.quads_vertex_indices_buffer, - BufferData::Memory(&indices), - BufferTarget::Index); - - self.back_frame.quads_vertex_indices_length = length; - } - - fn add_fills(&mut self, fill_batch: &[FillBatchEntry]) { - if fill_batch.is_empty() { - return; - } - - self.stats.fill_count += fill_batch.len(); - - // We have to make sure we don't split batches across draw calls, or else the compute - // shader path, which expects to see all the fills belonging to one tile in the same - // batch, will break. - - let mut pages_touched = vec![]; - for fill_batch_entry in fill_batch { - let page_index = fill_batch_entry.page; - if !self.back_frame.alpha_tile_pages.contains_key(&page_index) { - let alpha_tile_page = AlphaTilePage::new(&mut self.device); - self.back_frame.alpha_tile_pages.insert(page_index, alpha_tile_page); - } - - let page = self.back_frame.alpha_tile_pages.get_mut(&page_index).unwrap(); - if page.pending_fills.is_empty() { - pages_touched.push(page_index); - } - page.pending_fills.push(fill_batch_entry.fill); - } - - for page_index in pages_touched { - if self.back_frame.alpha_tile_pages[&page_index].buffered_fills.len() + - self.back_frame.alpha_tile_pages[&page_index].pending_fills.len() > - MAX_FILLS_PER_BATCH { - self.draw_buffered_fills(page_index); - } - - let page = self.back_frame.alpha_tile_pages.get_mut(&page_index).unwrap(); - for fill in &page.pending_fills { - page.buffered_fills.push(*fill); - } - page.pending_fills.clear(); - } - } - - fn draw_buffered_fills(&mut self, page: u16) { - match self.fill_program { - FillProgram::Raster(_) => self.draw_buffered_fills_via_raster(page), - FillProgram::Compute(_) => self.draw_buffered_fills_via_compute(page), - } - } - - fn draw_buffered_fills_via_raster(&mut self, page: u16) { - let fill_raster_program = match self.fill_program { - FillProgram::Raster(ref fill_raster_program) => fill_raster_program, - _ => unreachable!(), - }; - - let mask_viewport = self.mask_viewport(); - - let alpha_tile_page = self.back_frame - .alpha_tile_pages - .get_mut(&page) - .expect("Where's the alpha tile page?"); - let buffered_fills = &mut alpha_tile_page.buffered_fills; - if buffered_fills.is_empty() { - return; - } - - let storage_id = { - let fill_program = &self.fill_program; - let quad_vertex_positions_buffer = &self.quad_vertex_positions_buffer; - let quad_vertex_indices_buffer = &self.quad_vertex_indices_buffer; - self.back_frame - .fill_vertex_storage_allocator - .allocate(&self.device, MAX_FILLS_PER_BATCH as u64, |device, size| { - FillVertexStorage::new(size, - device, - fill_program, - quad_vertex_positions_buffer, - quad_vertex_indices_buffer) - }) - }; - let fill_vertex_storage = self.back_frame.fill_vertex_storage_allocator.get(storage_id); - - let fill_vertex_array = match fill_vertex_storage.auxiliary { - FillVertexStorageAuxiliary::Raster { ref vertex_array } => vertex_array, - _ => unreachable!(), - }; - - self.device.upload_to_buffer(&fill_vertex_storage.vertex_buffer, - 0, - &buffered_fills, - BufferTarget::Vertex); - - let mut clear_color = None; - if !alpha_tile_page.framebuffer_is_dirty { - clear_color = Some(ColorF::default()); - }; - - let timer_query = self.timer_query_cache.alloc(&self.device); - self.device.begin_timer_query(&timer_query); - - debug_assert!(buffered_fills.len() <= u32::MAX as usize); - self.device.draw_elements_instanced(6, buffered_fills.len() as u32, &RenderState { - target: &RenderTarget::Framebuffer(&alpha_tile_page.framebuffer), - program: &fill_raster_program.program, - vertex_array: &fill_vertex_array.vertex_array, - primitive: Primitive::Triangles, - textures: &[(&fill_raster_program.area_lut_texture, &self.area_lut_texture)], - uniforms: &[ - (&fill_raster_program.framebuffer_size_uniform, - UniformData::Vec2(F32x2::new(MASK_FRAMEBUFFER_WIDTH as f32, - MASK_FRAMEBUFFER_HEIGHT as f32))), - (&fill_raster_program.tile_size_uniform, - UniformData::Vec2(F32x2::new(TILE_WIDTH as f32, TILE_HEIGHT as f32))), - ], - images: &[], - viewport: mask_viewport, - options: RenderOptions { - blend: Some(BlendState { - src_rgb_factor: BlendFactor::One, - src_alpha_factor: BlendFactor::One, - dest_rgb_factor: BlendFactor::One, - dest_alpha_factor: BlendFactor::One, - ..BlendState::default() - }), - clear_ops: ClearOps { color: clear_color, ..ClearOps::default() }, - ..RenderOptions::default() - }, - }); - - self.device.end_timer_query(&timer_query); - self.current_timer.as_mut().unwrap().fill_times.push(TimerFuture::new(timer_query)); - - alpha_tile_page.framebuffer_is_dirty = true; - buffered_fills.clear(); - } - - fn draw_buffered_fills_via_compute(&mut self, page: u16) { - let fill_compute_program = match self.fill_program { - FillProgram::Compute(ref fill_compute_program) => fill_compute_program, - _ => unreachable!(), - }; - - let alpha_tile_page = self.back_frame - .alpha_tile_pages - .get_mut(&page) - .expect("Where's the alpha tile page?"); - let buffered_fills = &mut alpha_tile_page.buffered_fills; - if buffered_fills.is_empty() { - return; - } - - let storage_id = { - let fill_program = &self.fill_program; - let quad_vertex_positions_buffer = &self.quad_vertex_positions_buffer; - let quad_vertex_indices_buffer = &self.quad_vertex_indices_buffer; - self.back_frame.fill_vertex_storage_allocator.allocate(&self.device, - MAX_FILLS_PER_BATCH as u64, - |device, size| { - FillVertexStorage::new(size, - device, - fill_program, - quad_vertex_positions_buffer, - quad_vertex_indices_buffer) - }) - }; - let fill_vertex_storage = self.back_frame.fill_vertex_storage_allocator.get(storage_id); - - let (tile_map_buffer, next_fills_buffer) = match fill_vertex_storage.auxiliary { - FillVertexStorageAuxiliary::Compute { ref tile_map_buffer, ref next_fills_buffer } => { - (tile_map_buffer, next_fills_buffer) - } - _ => unreachable!(), - }; - - // Initialize the tile map and fill linked list buffers. - self.fill_tile_map.iter_mut().for_each(|entry| *entry = -1); - while self.next_fills.len() < buffered_fills.len() { - self.next_fills.push(-1); - } - - // Create a linked list running through all our fills. - let (mut first_fill_tile, mut last_fill_tile) = (256 * 256, 0); - for (fill_index, fill) in buffered_fills.iter().enumerate() { - let fill_tile_index = fill.alpha_tile_index as usize; - self.next_fills[fill_index as usize] = self.fill_tile_map[fill_tile_index]; - self.fill_tile_map[fill_tile_index] = fill_index as i32; - first_fill_tile = first_fill_tile.min(fill_tile_index as u32); - last_fill_tile = last_fill_tile.max(fill_tile_index as u32); - } - let fill_tile_count = last_fill_tile - first_fill_tile + 1; - - self.device.upload_to_buffer(&fill_vertex_storage.vertex_buffer, - 0, - &buffered_fills, - BufferTarget::Storage); - self.device.upload_to_buffer(next_fills_buffer, - 0, - &self.next_fills, - BufferTarget::Storage); - self.device.upload_to_buffer(tile_map_buffer, - 0, - &self.fill_tile_map, - BufferTarget::Storage); - - let image_texture = self.device.framebuffer_texture(&alpha_tile_page.framebuffer); - - let timer_query = self.timer_query_cache.alloc(&self.device); - self.device.begin_timer_query(&timer_query); - - debug_assert!(buffered_fills.len() <= u32::MAX as usize); - let dimensions = ComputeDimensions { x: 1, y: 1, z: fill_tile_count as u32 }; - self.device.dispatch_compute(dimensions, &ComputeState { - program: &fill_compute_program.program, - textures: &[(&fill_compute_program.area_lut_texture, &self.area_lut_texture)], - images: &[(&fill_compute_program.dest_image, image_texture, ImageAccess::Write)], - uniforms: &[ - (&fill_compute_program.first_tile_index_uniform, - UniformData::Int(first_fill_tile as i32)), - ], - storage_buffers: &[ - (&fill_compute_program.fills_storage_buffer, &fill_vertex_storage.vertex_buffer), - (&fill_compute_program.next_fills_storage_buffer, next_fills_buffer), - (&fill_compute_program.fill_tile_map_storage_buffer, tile_map_buffer), - ], - }); - - self.device.end_timer_query(&timer_query); - self.current_timer.as_mut().unwrap().fill_times.push(TimerFuture::new(timer_query)); - - alpha_tile_page.framebuffer_is_dirty = true; - buffered_fills.clear(); - } - - fn draw_clip_batch(&mut self, batch: &ClipBatch) { - if batch.clips.is_empty() { - return; - } - - let ClipBatchKey { dest_page, src_page, kind } = batch.key; - - self.device.allocate_buffer(&self.back_frame.tile_clip_vertex_array.vertex_buffer, - BufferData::Memory(&batch.clips), - BufferTarget::Vertex); - - if !self.back_frame.alpha_tile_pages.contains_key(&dest_page) { - let alpha_tile_page = AlphaTilePage::new(&mut self.device); - self.back_frame.alpha_tile_pages.insert(dest_page, alpha_tile_page); - } - - let mut clear_color = None; - if !self.back_frame.alpha_tile_pages[&dest_page].framebuffer_is_dirty { - clear_color = Some(ColorF::default()); - }; - - let blend = match kind { - ClipBatchKind::Draw => None, - ClipBatchKind::Clip => { - Some(BlendState { - src_rgb_factor: BlendFactor::One, - src_alpha_factor: BlendFactor::One, - dest_rgb_factor: BlendFactor::One, - dest_alpha_factor: BlendFactor::One, - op: BlendOp::Min, - }) - } - }; - - let mask_viewport = self.mask_viewport(); - - let timer_query = self.timer_query_cache.alloc(&self.device); - self.device.begin_timer_query(&timer_query); - - { - let dest_framebuffer = &self.back_frame.alpha_tile_pages[&dest_page].framebuffer; - let src_framebuffer = &self.back_frame.alpha_tile_pages[&src_page].framebuffer; - let src_texture = self.device.framebuffer_texture(&src_framebuffer); - - debug_assert!(batch.clips.len() <= u32::MAX as usize); - self.device.draw_elements_instanced(6, batch.clips.len() as u32, &RenderState { - target: &RenderTarget::Framebuffer(dest_framebuffer), - program: &self.tile_clip_program.program, - vertex_array: &self.back_frame.tile_clip_vertex_array.vertex_array, - primitive: Primitive::Triangles, - textures: &[(&self.tile_clip_program.src_texture, src_texture)], - images: &[], - uniforms: &[], - viewport: mask_viewport, - options: RenderOptions { - blend, - clear_ops: ClearOps { color: clear_color, ..ClearOps::default() }, - ..RenderOptions::default() - }, - }); - - self.device.end_timer_query(&timer_query); - self.current_timer.as_mut().unwrap().fill_times.push(TimerFuture::new(timer_query)); - } - - self.back_frame - .alpha_tile_pages - .get_mut(&dest_page) - .unwrap() - .framebuffer_is_dirty = true; - } - - fn tile_transform(&self) -> Transform4F { - let draw_viewport = self.draw_viewport().size().to_f32(); - let scale = Vector4F::new(2.0 / draw_viewport.x(), -2.0 / draw_viewport.y(), 1.0, 1.0); - Transform4F::from_scale(scale).translate(Vector4F::new(-1.0, 1.0, 0.0, 1.0)) - } - - fn draw_tiles(&mut self, - tile_page: u16, - tile_count: u32, - storage_id: StorageID, - color_texture_0: Option, - blend_mode: BlendMode, - filter: Filter) { - // TODO(pcwalton): Disable blend for solid tiles. - - let needs_readable_framebuffer = blend_mode.needs_readable_framebuffer(); - if needs_readable_framebuffer { - self.copy_alpha_tiles_to_dest_blend_texture(tile_count, storage_id); - } - - let clear_color = self.clear_color_for_draw_operation(); - let draw_viewport = self.draw_viewport(); - - let timer_query = self.timer_query_cache.alloc(&self.device); - self.device.begin_timer_query(&timer_query); - - let mut textures = vec![ - (&self.tile_program.texture_metadata_texture, - &self.back_frame.texture_metadata_texture), - ]; - let mut uniforms = vec![ - (&self.tile_program.transform_uniform, - UniformData::Mat4(self.tile_transform().to_columns())), - (&self.tile_program.tile_size_uniform, - UniformData::Vec2(F32x2::new(TILE_WIDTH as f32, TILE_HEIGHT as f32))), - (&self.tile_program.framebuffer_size_uniform, - UniformData::Vec2(draw_viewport.size().to_f32().0)), - (&self.tile_program.texture_metadata_size_uniform, - UniformData::IVec2(I32x2::new(TEXTURE_METADATA_TEXTURE_WIDTH, - TEXTURE_METADATA_TEXTURE_HEIGHT))), - ]; - - if needs_readable_framebuffer { - textures.push((&self.tile_program.dest_texture, - self.device - .framebuffer_texture(&self.back_frame.dest_blend_framebuffer))); - } - - if let Some(alpha_tile_page) = self.back_frame.alpha_tile_pages.get(&tile_page) { - uniforms.push((&self.tile_program.mask_texture_size_0_uniform, - UniformData::Vec2(F32x2::new(MASK_FRAMEBUFFER_WIDTH as f32, - MASK_FRAMEBUFFER_HEIGHT as f32)))); - textures.push((&self.tile_program.mask_texture_0, - self.device.framebuffer_texture(&alpha_tile_page.framebuffer))); - } - - // TODO(pcwalton): Refactor. - let mut ctrl = 0; - match color_texture_0 { - Some(color_texture) => { - let color_texture_page = self.texture_page(color_texture.page); - let color_texture_size = self.device.texture_size(color_texture_page).to_f32(); - self.device.set_texture_sampling_mode(color_texture_page, - color_texture.sampling_flags); - textures.push((&self.tile_program.color_texture_0, color_texture_page)); - uniforms.push((&self.tile_program.color_texture_size_0_uniform, - UniformData::Vec2(color_texture_size.0))); - - ctrl |= color_texture.composite_op.to_combine_mode() << - COMBINER_CTRL_COLOR_COMBINE_SHIFT; - } - None => { - uniforms.push((&self.tile_program.color_texture_size_0_uniform, - UniformData::Vec2(F32x2::default()))); - } - } - - ctrl |= blend_mode.to_composite_ctrl() << COMBINER_CTRL_COMPOSITE_SHIFT; - - match filter { - Filter::None => self.set_uniforms_for_no_filter(&mut uniforms), - Filter::RadialGradient { line, radii, uv_origin } => { - ctrl |= COMBINER_CTRL_FILTER_RADIAL_GRADIENT << COMBINER_CTRL_COLOR_FILTER_SHIFT; - self.set_uniforms_for_radial_gradient_filter(&mut uniforms, line, radii, uv_origin) - } - Filter::PatternFilter(PatternFilter::Text { - fg_color, - bg_color, - defringing_kernel, - gamma_correction, - }) => { - ctrl |= COMBINER_CTRL_FILTER_TEXT << COMBINER_CTRL_COLOR_FILTER_SHIFT; - self.set_uniforms_for_text_filter(&mut textures, - &mut uniforms, - fg_color, - bg_color, - defringing_kernel, - gamma_correction); - } - Filter::PatternFilter(PatternFilter::Blur { direction, sigma }) => { - ctrl |= COMBINER_CTRL_FILTER_BLUR << COMBINER_CTRL_COLOR_FILTER_SHIFT; - self.set_uniforms_for_blur_filter(&mut uniforms, direction, sigma); - } - } - - uniforms.push((&self.tile_program.ctrl_uniform, UniformData::Int(ctrl))); - - let vertex_array = &self.back_frame - .tile_vertex_storage_allocator - .get(storage_id) - .tile_vertex_array - .vertex_array; - - self.device.draw_elements_instanced(6, tile_count, &RenderState { - target: &self.draw_render_target(), - program: &self.tile_program.program, - vertex_array, - primitive: Primitive::Triangles, - textures: &textures, - images: &[], - uniforms: &uniforms, - viewport: draw_viewport, - options: RenderOptions { - blend: blend_mode.to_blend_state(), - stencil: self.stencil_state(), - clear_ops: ClearOps { color: clear_color, ..ClearOps::default() }, - ..RenderOptions::default() - }, - }); - - self.device.end_timer_query(&timer_query); - self.current_timer.as_mut().unwrap().tile_times.push(TimerFuture::new(timer_query)); - - self.preserve_draw_framebuffer(); - } - - fn copy_alpha_tiles_to_dest_blend_texture(&mut self, tile_count: u32, storage_id: StorageID) { - let draw_viewport = self.draw_viewport(); - - let mut textures = vec![]; - let mut uniforms = vec![ - (&self.tile_copy_program.transform_uniform, - UniformData::Mat4(self.tile_transform().to_columns())), - (&self.tile_copy_program.tile_size_uniform, - UniformData::Vec2(F32x2::new(TILE_WIDTH as f32, TILE_HEIGHT as f32))), - ]; - - let draw_framebuffer = match self.draw_render_target() { - RenderTarget::Framebuffer(framebuffer) => framebuffer, - RenderTarget::Default => panic!("Can't copy alpha tiles from default framebuffer!"), - }; - let draw_texture = self.device.framebuffer_texture(&draw_framebuffer); - - textures.push((&self.tile_copy_program.src_texture, draw_texture)); - uniforms.push((&self.tile_copy_program.framebuffer_size_uniform, - UniformData::Vec2(draw_viewport.size().to_f32().0))); - - let vertex_array = &self.back_frame - .tile_vertex_storage_allocator - .get(storage_id) - .tile_copy_vertex_array - .vertex_array; - - self.device.draw_elements(tile_count * 6, &RenderState { - target: &RenderTarget::Framebuffer(&self.back_frame.dest_blend_framebuffer), - program: &self.tile_copy_program.program, - vertex_array, - primitive: Primitive::Triangles, - textures: &textures, - images: &[], - uniforms: &uniforms, - viewport: draw_viewport, - options: RenderOptions { - clear_ops: ClearOps { - color: Some(ColorF::new(1.0, 0.0, 0.0, 1.0)), - ..ClearOps::default() - }, - ..RenderOptions::default() - }, - }); + self.core.device.upload_to_texture(texture, rect, TextureDataRef::F16(&texels)); } fn draw_stencil(&mut self, quad_positions: &[Vector4F]) { - self.device.allocate_buffer(&self.back_frame.stencil_vertex_array.vertex_buffer, - BufferData::Memory(quad_positions), - BufferTarget::Vertex); + self.core.device.allocate_buffer(&self.frame.stencil_vertex_array.vertex_buffer, + BufferData::Memory(quad_positions), + BufferTarget::Vertex); // Create indices for a triangle fan. (This is OK because the clipped quad should always be // convex.) @@ -1055,19 +647,20 @@ impl Renderer where D: Device { for index in 1..(quad_positions.len() as u32 - 1) { indices.extend_from_slice(&[0, index as u32, index + 1]); } - self.device.allocate_buffer(&self.back_frame.stencil_vertex_array.index_buffer, + self.core.device.allocate_buffer(&self.frame.stencil_vertex_array.index_buffer, BufferData::Memory(&indices), BufferTarget::Index); - self.device.draw_elements(indices.len() as u32, &RenderState { - target: &self.draw_render_target(), + self.core.device.draw_elements(indices.len() as u32, &RenderState { + target: &self.core.draw_render_target(), program: &self.stencil_program.program, - vertex_array: &self.back_frame.stencil_vertex_array.vertex_array, + vertex_array: &self.frame.stencil_vertex_array.vertex_array, primitive: Primitive::Triangles, textures: &[], images: &[], + storage_buffers: &[], uniforms: &[], - viewport: self.draw_viewport(), + viewport: self.core.draw_viewport(), options: RenderOptions { // FIXME(pcwalton): Should we really write to the depth buffer? depth: Some(DepthState { func: DepthFunc::Less, write: true }), @@ -1082,30 +675,31 @@ impl Renderer where D: Device { ..RenderOptions::default() }, }); + + self.core.stats.drawcall_count += 1; } - pub fn reproject_texture( - &mut self, - texture: &D::Texture, - old_transform: &Transform4F, - new_transform: &Transform4F, - ) { - let clear_color = self.clear_color_for_draw_operation(); + pub fn reproject_texture(&mut self, + texture: &D::Texture, + old_transform: &Transform4F, + new_transform: &Transform4F) { + let clear_color = self.core.clear_color_for_draw_operation(); - self.device.draw_elements(6, &RenderState { - target: &self.draw_render_target(), + self.core.device.draw_elements(6, &RenderState { + target: &self.core.draw_render_target(), program: &self.reprojection_program.program, - vertex_array: &self.back_frame.reprojection_vertex_array.vertex_array, + vertex_array: &self.frame.reprojection_vertex_array.vertex_array, primitive: Primitive::Triangles, textures: &[(&self.reprojection_program.texture, texture)], images: &[], + storage_buffers: &[], uniforms: &[ (&self.reprojection_program.old_transform_uniform, UniformData::from_transform_3d(old_transform)), (&self.reprojection_program.new_transform_uniform, UniformData::from_transform_3d(new_transform)), ], - viewport: self.draw_viewport(), + viewport: self.core.draw_viewport(), options: RenderOptions { blend: BlendMode::SrcOver.to_blend_state(), depth: Some(DepthState { func: DepthFunc::Less, write: false, }), @@ -1114,131 +708,30 @@ impl Renderer where D: Device { }, }); - self.preserve_draw_framebuffer(); - } + self.core.stats.drawcall_count += 1; - pub fn draw_render_target(&self) -> RenderTarget { - match self.render_target_stack.last() { - Some(&render_target_id) => { - let texture_page_id = self.render_target_location(render_target_id).page; - let framebuffer = self.texture_page_framebuffer(texture_page_id); - RenderTarget::Framebuffer(framebuffer) - } - None => { - if self.flags.contains(RendererFlags::INTERMEDIATE_DEST_FRAMEBUFFER_NEEDED) { - RenderTarget::Framebuffer(&self.back_frame.intermediate_dest_framebuffer) - } else { - match self.dest_framebuffer { - DestFramebuffer::Default { .. } => RenderTarget::Default, - DestFramebuffer::Other(ref framebuffer) => { - RenderTarget::Framebuffer(framebuffer) - } - } - } - } - } + self.core.preserve_draw_framebuffer(); } fn push_render_target(&mut self, render_target_id: RenderTargetId) { - self.render_target_stack.push(render_target_id); + self.core.render_target_stack.push(render_target_id); } fn pop_render_target(&mut self) { - self.render_target_stack.pop().expect("Render target stack underflow!"); - } - - fn set_uniforms_for_no_filter<'a>(&'a self, - uniforms: &mut Vec<(&'a D::Uniform, UniformData)>) { - uniforms.extend_from_slice(&[ - (&self.tile_program.filter_params_0_uniform, UniformData::Vec4(F32x4::default())), - (&self.tile_program.filter_params_1_uniform, UniformData::Vec4(F32x4::default())), - (&self.tile_program.filter_params_2_uniform, UniformData::Vec4(F32x4::default())), - ]); - } - - fn set_uniforms_for_radial_gradient_filter<'a>( - &'a self, - uniforms: &mut Vec<(&'a D::Uniform, UniformData)>, - line: LineSegment2F, - radii: F32x2, - uv_origin: Vector2F) { - uniforms.extend_from_slice(&[ - (&self.tile_program.filter_params_0_uniform, - UniformData::Vec4(line.from().0.concat_xy_xy(line.vector().0))), - (&self.tile_program.filter_params_1_uniform, - UniformData::Vec4(radii.concat_xy_xy(uv_origin.0))), - (&self.tile_program.filter_params_2_uniform, UniformData::Vec4(F32x4::default())), - ]); - } - - fn set_uniforms_for_text_filter<'a>( - &'a self, - textures: &mut Vec>, - uniforms: &mut Vec>, - fg_color: ColorF, - bg_color: ColorF, - defringing_kernel: Option, - gamma_correction: bool) { - textures.push((&self.tile_program.gamma_lut_texture, &self.gamma_lut_texture)); - - match defringing_kernel { - Some(ref kernel) => { - uniforms.push((&self.tile_program.filter_params_0_uniform, - UniformData::Vec4(F32x4::from_slice(&kernel.0)))); - } - None => { - uniforms.push((&self.tile_program.filter_params_0_uniform, - UniformData::Vec4(F32x4::default()))); - } - } - - let mut params_2 = fg_color.0; - params_2.set_w(gamma_correction as i32 as f32); - - uniforms.extend_from_slice(&[ - (&self.tile_program.filter_params_1_uniform, UniformData::Vec4(bg_color.0)), - (&self.tile_program.filter_params_2_uniform, UniformData::Vec4(params_2)), - ]); - } - - fn set_uniforms_for_blur_filter<'a>(&'a self, - uniforms: &mut Vec<(&'a D::Uniform, UniformData)>, - direction: BlurDirection, - sigma: f32) { - let sigma_inv = 1.0 / sigma; - let gauss_coeff_x = SQRT_2_PI_INV * sigma_inv; - let gauss_coeff_y = f32::exp(-0.5 * sigma_inv * sigma_inv); - let gauss_coeff_z = gauss_coeff_y * gauss_coeff_y; - - let src_offset = match direction { - BlurDirection::X => vec2f(1.0, 0.0), - BlurDirection::Y => vec2f(0.0, 1.0), - }; - - let support = f32::ceil(1.5 * sigma) * 2.0; - - uniforms.extend_from_slice(&[ - (&self.tile_program.filter_params_0_uniform, - UniformData::Vec4(src_offset.0.concat_xy_xy(F32x2::new(support, 0.0)))), - (&self.tile_program.filter_params_1_uniform, - UniformData::Vec4(F32x4::new(gauss_coeff_x, gauss_coeff_y, gauss_coeff_z, 0.0))), - (&self.tile_program.filter_params_2_uniform, UniformData::Vec4(F32x4::default())), - ]); + self.core.render_target_stack.pop().expect("Render target stack underflow!"); } fn clear_dest_framebuffer_if_necessary(&mut self) { - let background_color = match self.options.background_color { + let background_color = match self.core.options.background_color { None => return, Some(background_color) => background_color, }; - if self.back_frame - .framebuffer_flags - .contains(FramebufferFlags::DEST_FRAMEBUFFER_IS_DIRTY) { + if self.core.framebuffer_flags.contains(FramebufferFlags::DEST_FRAMEBUFFER_IS_DIRTY) { return; } - let main_viewport = self.main_viewport(); + let main_viewport = self.core.main_viewport(); let uniforms = [ (&self.clear_program.rect_uniform, UniformData::Vec4(main_viewport.to_f32().0)), (&self.clear_program.framebuffer_size_uniform, @@ -1246,39 +739,63 @@ impl Renderer where D: Device { (&self.clear_program.color_uniform, UniformData::Vec4(background_color.0)), ]; - self.device.draw_elements(6, &RenderState { + self.core.device.draw_elements(6, &RenderState { target: &RenderTarget::Default, program: &self.clear_program.program, - vertex_array: &self.back_frame.clear_vertex_array.vertex_array, + vertex_array: &self.frame.clear_vertex_array.vertex_array, primitive: Primitive::Triangles, textures: &[], images: &[], + storage_buffers: &[], uniforms: &uniforms[..], viewport: main_viewport, options: RenderOptions::default(), }); + + self.core.stats.drawcall_count += 1; } fn blit_intermediate_dest_framebuffer_if_necessary(&mut self) { - if !self.flags.contains(RendererFlags::INTERMEDIATE_DEST_FRAMEBUFFER_NEEDED) { + if !self.core + .renderer_flags + .contains(RendererFlags::INTERMEDIATE_DEST_FRAMEBUFFER_NEEDED) { return; } - let main_viewport = self.main_viewport(); + let main_viewport = self.core.main_viewport(); + + if self.core.intermediate_dest_framebuffer_size != main_viewport.size() { + self.core.allocator.free_framebuffer(self.core.intermediate_dest_framebuffer_id); + self.core.intermediate_dest_framebuffer_id = + self.core.allocator.allocate_framebuffer(&self.core.device, + main_viewport.size(), + TextureFormat::RGBA8, + FramebufferTag("IntermediateDest")); + self.core.intermediate_dest_framebuffer_size = main_viewport.size(); + } + + let intermediate_dest_framebuffer = + self.core.allocator.get_framebuffer(self.core.intermediate_dest_framebuffer_id); let textures = [ (&self.blit_program.src_texture, - self.device.framebuffer_texture(&self.back_frame.intermediate_dest_framebuffer)) + self.core.device.framebuffer_texture(intermediate_dest_framebuffer)) ]; - self.device.draw_elements(6, &RenderState { + self.core.device.draw_elements(6, &RenderState { target: &RenderTarget::Default, program: &self.blit_program.program, - vertex_array: &self.back_frame.blit_vertex_array.vertex_array, + vertex_array: &self.frame.blit_vertex_array.vertex_array, primitive: Primitive::Triangles, textures: &textures[..], images: &[], - uniforms: &[], + storage_buffers: &[], + uniforms: &[ + (&self.blit_program.framebuffer_size_uniform, + UniformData::Vec2(main_viewport.size().to_f32().0)), + (&self.blit_program.dest_rect_uniform, + UniformData::Vec4(RectF::new(Vector2F::zero(), main_viewport.size().to_f32()).0)), + ], viewport: main_viewport, options: RenderOptions { clear_ops: ClearOps { @@ -1288,34 +805,242 @@ impl Renderer where D: Device { ..RenderOptions::default() }, }); + + self.core.stats.drawcall_count += 1; } - fn stencil_state(&self) -> Option { - if !self.flags.contains(RendererFlags::USE_DEPTH) { - return None; + #[inline] + pub fn draw_viewport(&self) -> RectI { + self.core.draw_viewport() + } + + #[inline] + pub fn draw_render_target(&self) -> RenderTarget { + self.core.draw_render_target() + } + + #[inline] + pub fn render_stats(&self) -> &RenderStats { + &self.core.stats + } + + fn compute_filter_params(&self, + filter: &Filter, + blend_mode: BlendMode, + color_0_combine_mode: ColorCombineMode) + -> FilterParams { + let mut ctrl = 0; + ctrl |= blend_mode.to_composite_ctrl() << COMBINER_CTRL_COMPOSITE_SHIFT; + ctrl |= color_0_combine_mode.to_composite_ctrl() << COMBINER_CTRL_COLOR_COMBINE_SHIFT; + + match *filter { + Filter::RadialGradient { line, radii, uv_origin } => { + FilterParams { + p0: line.from().0.concat_xy_xy(line.vector().0), + p1: radii.concat_xy_xy(uv_origin.0), + p2: F32x4::default(), + ctrl: ctrl | (COMBINER_CTRL_FILTER_RADIAL_GRADIENT << + COMBINER_CTRL_COLOR_FILTER_SHIFT) + } + } + Filter::PatternFilter(PatternFilter::Blur { sigma, direction }) => { + let sigma_inv = 1.0 / sigma; + let gauss_coeff_x = SQRT_2_PI_INV * sigma_inv; + let gauss_coeff_y = f32::exp(-0.5 * sigma_inv * sigma_inv); + let gauss_coeff_z = gauss_coeff_y * gauss_coeff_y; + + let src_offset = match direction { + BlurDirection::X => vec2f(1.0, 0.0), + BlurDirection::Y => vec2f(0.0, 1.0), + }; + + let support = f32::ceil(1.5 * sigma) * 2.0; + + FilterParams { + p0: src_offset.0.concat_xy_xy(F32x2::new(support, 0.0)), + p1: F32x4::new(gauss_coeff_x, gauss_coeff_y, gauss_coeff_z, 0.0), + p2: F32x4::default(), + ctrl: ctrl | (COMBINER_CTRL_FILTER_BLUR << COMBINER_CTRL_COLOR_FILTER_SHIFT), + } + } + Filter::PatternFilter(PatternFilter::Text { + fg_color, + bg_color, + defringing_kernel, + gamma_correction, + }) => { + let mut p2 = fg_color.0; + p2.set_w(gamma_correction as i32 as f32); + + FilterParams { + p0: match defringing_kernel { + Some(ref kernel) => F32x4::from_slice(&kernel.0), + None => F32x4::default(), + }, + p1: bg_color.0, + p2, + ctrl: ctrl | (COMBINER_CTRL_FILTER_TEXT << COMBINER_CTRL_COLOR_FILTER_SHIFT), + } + } + Filter::None => { + FilterParams { + p0: F32x4::default(), + p1: F32x4::default(), + p2: F32x4::default(), + ctrl, + } + } + } + } +} + +impl RendererCore where D: Device { + pub(crate) fn mask_texture_format(&self) -> TextureFormat { + match self.mode.level { + RendererLevel::D3D9 => TextureFormat::RGBA16F, + RendererLevel::D3D11 => TextureFormat::RGBA8, + } + } + + pub(crate) fn reallocate_alpha_tile_pages_if_necessary(&mut self, copy_existing: bool) { + let alpha_tile_pages_needed = ((self.alpha_tile_count + 0xffff) >> 16) as u32; + if let Some(ref mask_storage) = self.mask_storage { + if alpha_tile_pages_needed <= mask_storage.allocated_page_count { + return; + } } - Some(StencilState { - func: StencilFunc::Equal, - reference: 1, - mask: 1, - write: false, - }) + let new_size = vec2i(MASK_FRAMEBUFFER_WIDTH, + MASK_FRAMEBUFFER_HEIGHT * alpha_tile_pages_needed as i32); + let format = self.mask_texture_format(); + let mask_framebuffer_id = + self.allocator.allocate_framebuffer(&self.device, + new_size, + format, + FramebufferTag("TileAlphaMask")); + let mask_framebuffer = self.allocator.get_framebuffer(mask_framebuffer_id); + let old_mask_storage = self.mask_storage.take(); + self.mask_storage = Some(MaskStorage { + framebuffer_id: mask_framebuffer_id, + allocated_page_count: alpha_tile_pages_needed, + }); + + // Copy over existing content if needed. + let old_mask_framebuffer_id = match old_mask_storage { + Some(old_storage) if copy_existing => old_storage.framebuffer_id, + Some(_) | None => return, + }; + let old_mask_framebuffer = self.allocator.get_framebuffer(old_mask_framebuffer_id); + let old_mask_texture = self.device.framebuffer_texture(old_mask_framebuffer); + let old_size = self.device.texture_size(old_mask_texture); + + let timer_query = self.timer_query_cache.alloc(&self.device); + self.device.begin_timer_query(&timer_query); + + self.device.draw_elements(6, &RenderState { + target: &RenderTarget::Framebuffer(mask_framebuffer), + program: &self.programs.blit_program.program, + vertex_array: &self.vertex_arrays.blit_vertex_array.vertex_array, + primitive: Primitive::Triangles, + textures: &[(&self.programs.blit_program.src_texture, old_mask_texture)], + images: &[], + storage_buffers: &[], + uniforms: &[ + (&self.programs.blit_program.framebuffer_size_uniform, + UniformData::Vec2(new_size.to_f32().0)), + (&self.programs.blit_program.dest_rect_uniform, + UniformData::Vec4(RectF::new(Vector2F::zero(), old_size.to_f32()).0)), + ], + viewport: RectI::new(Vector2I::default(), new_size), + options: RenderOptions { + clear_ops: ClearOps { + color: Some(ColorF::new(0.0, 0.0, 0.0, 1.0)), + ..ClearOps::default() + }, + ..RenderOptions::default() + }, + }); + + self.device.end_timer_query(&timer_query); + self.current_timer.as_mut().unwrap().other_times.push(TimerFuture::new(timer_query)); + self.stats.drawcall_count += 1; } - fn clear_color_for_draw_operation(&self) -> Option { + pub(crate) fn set_uniforms_for_drawing_tiles<'a>( + &'a self, + tile_program: &'a TileProgramCommon, + textures: &mut Vec>, + uniforms: &mut Vec>, + color_texture_0: Option) { + let draw_viewport = self.draw_viewport(); + + let gamma_lut_texture = self.allocator.get_texture(self.gamma_lut_texture_id); + self.allocator.get_texture(self.texture_metadata_texture_id); + textures.push((&tile_program.gamma_lut_texture, gamma_lut_texture)); + + let texture_metadata_texture = + self.allocator.get_texture(self.texture_metadata_texture_id); + textures.push((&tile_program.texture_metadata_texture, texture_metadata_texture)); + + uniforms.push((&tile_program.tile_size_uniform, + UniformData::Vec2(F32x2::new(TILE_WIDTH as f32, TILE_HEIGHT as f32)))); + uniforms.push((&tile_program.framebuffer_size_uniform, + UniformData::Vec2(draw_viewport.size().to_f32().0))); + uniforms.push((&tile_program.texture_metadata_size_uniform, + UniformData::IVec2(I32x2::new(TEXTURE_METADATA_TEXTURE_WIDTH, + TEXTURE_METADATA_TEXTURE_HEIGHT)))); + + if let Some(ref mask_storage) = self.mask_storage { + let mask_framebuffer_id = mask_storage.framebuffer_id; + let mask_framebuffer = self.allocator.get_framebuffer(mask_framebuffer_id); + let mask_texture = self.device.framebuffer_texture(mask_framebuffer); + uniforms.push((&tile_program.mask_texture_size_0_uniform, + UniformData::Vec2(self.device.texture_size(mask_texture).to_f32().0))); + textures.push((&tile_program.mask_texture_0, mask_texture)); + } + + match color_texture_0 { + Some(color_texture) => { + let color_texture_page = self.texture_page(color_texture.page); + let color_texture_size = self.device.texture_size(color_texture_page).to_f32(); + self.device.set_texture_sampling_mode(color_texture_page, + color_texture.sampling_flags); + textures.push((&tile_program.color_texture_0, color_texture_page)); + uniforms.push((&tile_program.color_texture_size_0_uniform, + UniformData::Vec2(color_texture_size.0))); + } + None => { + uniforms.push((&tile_program.color_texture_size_0_uniform, + UniformData::Vec2(F32x2::default()))); + } + } + } + + // Pattern textures + + fn texture_page(&self, id: TexturePageId) -> &D::Texture { + self.device.framebuffer_texture(&self.texture_page_framebuffer(id)) + } + + fn texture_page_framebuffer(&self, id: TexturePageId) -> &D::Framebuffer { + let framebuffer_id = self.pattern_texture_pages[id.0 as usize] + .as_ref() + .expect("Texture page not allocated!") + .framebuffer_id; + self.allocator.get_framebuffer(framebuffer_id) + } + + pub(crate) fn clear_color_for_draw_operation(&self) -> Option { let must_preserve_contents = match self.render_target_stack.last() { Some(&render_target_id) => { let texture_page = self.render_target_location(render_target_id).page; - self.texture_pages[texture_page.0 as usize] + self.pattern_texture_pages[texture_page.0 as usize] .as_ref() .expect("Draw target texture page not allocated!") .must_preserve_contents } None => { - self.back_frame - .framebuffer_flags - .contains(FramebufferFlags::DEST_FRAMEBUFFER_IS_DIRTY) + self.framebuffer_flags.contains(FramebufferFlags::DEST_FRAMEBUFFER_IS_DIRTY) } }; @@ -1328,19 +1053,27 @@ impl Renderer where D: Device { } } - fn preserve_draw_framebuffer(&mut self) { - match self.render_target_stack.last() { - Some(&render_target_id) => { - let texture_page = self.render_target_location(render_target_id).page; - self.texture_pages[texture_page.0 as usize] - .as_mut() - .expect("Draw target texture page not allocated!") - .must_preserve_contents = true; - } - None => { - self.back_frame - .framebuffer_flags - .insert(FramebufferFlags::DEST_FRAMEBUFFER_IS_DIRTY); + // Sizing + + pub(crate) fn tile_size(&self) -> Vector2I { + let temp = self.draw_viewport().size() + + vec2i(TILE_WIDTH as i32 - 1, TILE_HEIGHT as i32 - 1); + vec2i(temp.x() / TILE_WIDTH as i32, temp.y() / TILE_HEIGHT as i32) + } + + pub(crate) fn framebuffer_tile_size(&self) -> Vector2I { + pixel_size_to_tile_size(self.options.dest.window_size(&self.device)) + } + + // Viewport calculation + + fn main_viewport(&self) -> RectI { + match self.options.dest { + DestFramebuffer::Default { viewport, .. } => viewport, + DestFramebuffer::Other(ref framebuffer) => { + let texture = self.device.framebuffer_texture(framebuffer); + let size = self.device.texture_size(texture); + RectI::new(Vector2I::default(), size) } } } @@ -1352,51 +1085,64 @@ impl Renderer where D: Device { } } - fn main_viewport(&self) -> RectI { - match self.dest_framebuffer { - DestFramebuffer::Default { viewport, .. } => viewport, - DestFramebuffer::Other(ref framebuffer) => { - let size = self - .device - .texture_size(self.device.framebuffer_texture(framebuffer)); - RectI::new(Vector2I::default(), size) + pub fn draw_render_target(&self) -> RenderTarget { + match self.render_target_stack.last() { + Some(&render_target_id) => { + let texture_page_id = self.render_target_location(render_target_id).page; + let framebuffer = self.texture_page_framebuffer(texture_page_id); + RenderTarget::Framebuffer(framebuffer) + } + None => { + if self.renderer_flags + .contains(RendererFlags::INTERMEDIATE_DEST_FRAMEBUFFER_NEEDED) { + let intermediate_dest_framebuffer = + self.allocator.get_framebuffer(self.intermediate_dest_framebuffer_id); + RenderTarget::Framebuffer(intermediate_dest_framebuffer) + } else { + match self.options.dest { + DestFramebuffer::Default { .. } => RenderTarget::Default, + DestFramebuffer::Other(ref framebuffer) => { + RenderTarget::Framebuffer(framebuffer) + } + } + } } } } - fn mask_viewport(&self) -> RectI { - RectI::new(Vector2I::zero(), vec2i(MASK_FRAMEBUFFER_WIDTH, MASK_FRAMEBUFFER_HEIGHT)) + pub(crate) fn preserve_draw_framebuffer(&mut self) { + match self.render_target_stack.last() { + Some(&render_target_id) => { + let texture_page = self.render_target_location(render_target_id).page; + self.pattern_texture_pages[texture_page.0 as usize] + .as_mut() + .expect("Draw target texture page not allocated!") + .must_preserve_contents = true; + } + None => { + self.framebuffer_flags.insert(FramebufferFlags::DEST_FRAMEBUFFER_IS_DIRTY); + } + } } fn render_target_location(&self, render_target_id: RenderTargetId) -> TextureLocation { self.render_targets[render_target_id.render_target as usize].location } - - fn texture_page_framebuffer(&self, id: TexturePageId) -> &D::Framebuffer { - &self.texture_pages[id.0 as usize] - .as_ref() - .expect("Texture page not allocated!") - .framebuffer - } - - fn texture_page(&self, id: TexturePageId) -> &D::Texture { - self.device.framebuffer_texture(&self.texture_page_framebuffer(id)) - } } impl Frame where D: Device { // FIXME(pcwalton): This signature shouldn't be so big. Make a struct. fn new(device: &D, + allocator: &mut GPUMemoryAllocator, blit_program: &BlitProgram, clear_program: &ClearProgram, - tile_clip_program: &ClipTileProgram, reprojection_program: &ReprojectionProgram, stencil_program: &StencilProgram, - quad_vertex_positions_buffer: &D::Buffer, - quad_vertex_indices_buffer: &D::Buffer, - window_size: Vector2I) + quad_vertex_positions_buffer_id: BufferID, + quad_vertex_indices_buffer_id: BufferID) -> Frame { - let quads_vertex_indices_buffer = device.create_buffer(BufferUploadMode::Dynamic); + let quad_vertex_positions_buffer = allocator.get_buffer(quad_vertex_positions_buffer_id); + let quad_vertex_indices_buffer = allocator.get_buffer(quad_vertex_indices_buffer_id); let blit_vertex_array = BlitVertexArray::new(device, &blit_program, @@ -1406,565 +1152,58 @@ impl Frame where D: Device { &clear_program, &quad_vertex_positions_buffer, &quad_vertex_indices_buffer); - let tile_clip_vertex_array = ClipTileVertexArray::new(device, - &tile_clip_program, - &quad_vertex_positions_buffer, - &quad_vertex_indices_buffer); let reprojection_vertex_array = ReprojectionVertexArray::new(device, &reprojection_program, &quad_vertex_positions_buffer, &quad_vertex_indices_buffer); let stencil_vertex_array = StencilVertexArray::new(device, &stencil_program); - let fill_vertex_storage_allocator = StorageAllocator::new(MIN_FILL_STORAGE_CLASS); - let tile_vertex_storage_allocator = StorageAllocator::new(MIN_TILE_STORAGE_CLASS); - - let texture_metadata_texture_size = vec2i(TEXTURE_METADATA_TEXTURE_WIDTH, - TEXTURE_METADATA_TEXTURE_HEIGHT); - let texture_metadata_texture = device.create_texture(TextureFormat::RGBA16F, - texture_metadata_texture_size); - - let intermediate_dest_texture = device.create_texture(TextureFormat::RGBA8, window_size); - let intermediate_dest_framebuffer = device.create_framebuffer(intermediate_dest_texture); - - let dest_blend_texture = device.create_texture(TextureFormat::RGBA8, window_size); - let dest_blend_framebuffer = device.create_framebuffer(dest_blend_texture); - Frame { blit_vertex_array, clear_vertex_array, - tile_vertex_storage_allocator, - fill_vertex_storage_allocator, - tile_clip_vertex_array, reprojection_vertex_array, stencil_vertex_array, - quads_vertex_indices_buffer, - quads_vertex_indices_length: 0, - alpha_tile_pages: FxHashMap::default(), - texture_metadata_texture, - intermediate_dest_framebuffer, - dest_blend_framebuffer, - framebuffer_flags: FramebufferFlags::empty(), } } } -// Buffer management - -struct StorageAllocator where D: Device { - buckets: Vec>, - min_size_class: usize, - phantom: PhantomData, -} - -struct StorageAllocatorBucket { - free: Vec, - in_use: Vec, -} - -#[derive(Clone, Copy, Debug, PartialEq)] -struct StorageID { - bucket: usize, - index: usize, -} - -impl StorageAllocator where D: Device { - fn new(min_size_class: usize) -> StorageAllocator { - StorageAllocator { buckets: vec![], min_size_class, phantom: PhantomData } - } - - fn allocate(&mut self, device: &D, size: u64, allocator: F) -> StorageID - where D: Device, F: FnOnce(&D, u64) -> S { - let size_class = (64 - (size.leading_zeros() as usize)).max(self.min_size_class); - let bucket_index = size_class - self.min_size_class; - while self.buckets.len() < bucket_index + 1 { - self.buckets.push(StorageAllocatorBucket { free: vec![], in_use: vec![] }); - } - - let bucket = &mut self.buckets[bucket_index]; - match bucket.free.pop() { - Some(storage) => bucket.in_use.push(storage), - None => bucket.in_use.push(allocator(device, 1 << size_class as u64)), - } - StorageID { bucket: bucket_index, index: bucket.in_use.len() - 1 } - } - - fn get(&self, storage_id: StorageID) -> &S { - &self.buckets[storage_id.bucket].in_use[storage_id.index] - } - - fn end_frame(&mut self) { - for bucket in &mut self.buckets { - bucket.free.extend(mem::replace(&mut bucket.in_use, vec![]).into_iter()) - } - } -} - -struct FillVertexStorage where D: Device { - vertex_buffer: D::Buffer, - auxiliary: FillVertexStorageAuxiliary, -} - -enum FillVertexStorageAuxiliary where D: Device { - Raster { vertex_array: FillVertexArray }, - Compute { - next_fills_buffer: D::Buffer, - tile_map_buffer: D::Buffer, - }, -} - -struct TileVertexStorage where D: Device { - tile_vertex_array: TileVertexArray, - tile_copy_vertex_array: CopyTileVertexArray, - vertex_buffer: D::Buffer, -} - -impl FillVertexStorage where D: Device { - fn new(size: u64, - device: &D, - fill_program: &FillProgram, - quad_vertex_positions_buffer: &D::Buffer, - quad_vertex_indices_buffer: &D::Buffer) - -> FillVertexStorage { - let vertex_buffer = device.create_buffer(BufferUploadMode::Dynamic); - let vertex_buffer_data: BufferData = BufferData::Uninitialized(size as usize); - device.allocate_buffer(&vertex_buffer, vertex_buffer_data, BufferTarget::Vertex); - - let auxiliary = match *fill_program { - FillProgram::Raster(ref fill_raster_program) => { - FillVertexStorageAuxiliary::Raster { - vertex_array: FillVertexArray::new(device, - fill_raster_program, - &vertex_buffer, - quad_vertex_positions_buffer, - quad_vertex_indices_buffer), - } +impl RendererLevelImpl where D: Device { + #[inline] + fn require_d3d9(&mut self) -> &mut RendererD3D9 { + match *self { + RendererLevelImpl::D3D9(ref mut d3d9_renderer) => d3d9_renderer, + RendererLevelImpl::D3D11(_) => { + panic!("Tried to enter the D3D9 path with a D3D11 renderer!") } - FillProgram::Compute(_) => { - let next_fills_buffer = device.create_buffer(BufferUploadMode::Dynamic); - let tile_map_buffer = device.create_buffer(BufferUploadMode::Dynamic); - let next_fills_buffer_data: BufferData = - BufferData::Uninitialized(size as usize); - let tile_map_buffer_data: BufferData = - BufferData::Uninitialized(256 * 256); - device.allocate_buffer(&next_fills_buffer, - next_fills_buffer_data, - BufferTarget::Storage); - device.allocate_buffer(&tile_map_buffer, - tile_map_buffer_data, - BufferTarget::Storage); - FillVertexStorageAuxiliary::Compute { next_fills_buffer, tile_map_buffer } - } - }; - - FillVertexStorage { vertex_buffer, auxiliary } + } } -} -impl TileVertexStorage where D: Device { - fn new(size: u64, - device: &D, - tile_program: &TileProgram, - tile_copy_program: &CopyTileProgram, - quad_vertex_positions_buffer: &D::Buffer, - quad_vertex_indices_buffer: &D::Buffer) - -> TileVertexStorage { - let vertex_buffer = device.create_buffer(BufferUploadMode::Dynamic); - device.allocate_buffer::(&vertex_buffer, - BufferData::Uninitialized(size as usize), - BufferTarget::Vertex); - let tile_vertex_array = TileVertexArray::new(device, - &tile_program, - &vertex_buffer, - &quad_vertex_positions_buffer, - &quad_vertex_indices_buffer); - let tile_copy_vertex_array = CopyTileVertexArray::new(device, - &tile_copy_program, - &vertex_buffer, - &quad_vertex_indices_buffer); - TileVertexStorage { vertex_buffer, tile_vertex_array, tile_copy_vertex_array } + #[inline] + fn require_d3d11(&mut self) -> &mut RendererD3D11 { + match *self { + RendererLevelImpl::D3D11(ref mut d3d11_renderer) => d3d11_renderer, + RendererLevelImpl::D3D9(_) => { + panic!("Tried to enter the D3D11 path with a D3D9 renderer!") + } + } } } // Render stats -#[derive(Clone, Copy, Debug, Default)] -pub struct RenderStats { - pub path_count: usize, - pub fill_count: usize, - pub alpha_tile_count: usize, - pub solid_tile_count: usize, - pub cpu_build_time: Duration, -} - -impl Add for RenderStats { - type Output = RenderStats; - fn add(self, other: RenderStats) -> RenderStats { - RenderStats { - path_count: self.path_count + other.path_count, - solid_tile_count: self.solid_tile_count + other.solid_tile_count, - alpha_tile_count: self.alpha_tile_count + other.alpha_tile_count, - fill_count: self.fill_count + other.fill_count, - cpu_build_time: self.cpu_build_time + other.cpu_build_time, - } - } -} - -impl Div for RenderStats { - type Output = RenderStats; - fn div(self, divisor: usize) -> RenderStats { - RenderStats { - path_count: self.path_count / divisor, - solid_tile_count: self.solid_tile_count / divisor, - alpha_tile_count: self.alpha_tile_count / divisor, - fill_count: self.fill_count / divisor, - cpu_build_time: self.cpu_build_time / divisor as u32, - } - } -} - -struct TimerQueryCache where D: Device { - free_queries: Vec, -} - -struct PendingTimer where D: Device { - fill_times: Vec>, - tile_times: Vec>, -} - -enum TimerFuture where D: Device { - Pending(D::TimerQuery), - Resolved(Duration), -} - -impl TimerQueryCache where D: Device { - fn new(_: &D) -> TimerQueryCache { - TimerQueryCache { free_queries: vec![] } - } - - fn alloc(&mut self, device: &D) -> D::TimerQuery { - self.free_queries.pop().unwrap_or_else(|| device.create_timer_query()) - } - - fn free(&mut self, old_query: D::TimerQuery) { - self.free_queries.push(old_query); - } -} - -impl PendingTimer where D: Device { - fn new() -> PendingTimer { - PendingTimer { fill_times: vec![], tile_times: vec![] } - } - - fn poll(&mut self, device: &D) -> Vec { - let mut old_queries = vec![]; - for future in self.fill_times.iter_mut().chain(self.tile_times.iter_mut()) { - if let Some(old_query) = future.poll(device) { - old_queries.push(old_query) - } - } - old_queries - } - - fn total_time(&self) -> Option { - let mut total = Duration::default(); - for future in self.fill_times.iter().chain(self.tile_times.iter()) { - match *future { - TimerFuture::Pending(_) => return None, - TimerFuture::Resolved(time) => total += time, - } - } - Some(total) - } -} - -impl TimerFuture where D: Device { - fn new(query: D::TimerQuery) -> TimerFuture { - TimerFuture::Pending(query) - } - - fn poll(&mut self, device: &D) -> Option { - let duration = match *self { - TimerFuture::Pending(ref query) => device.try_recv_timer_query(query), - TimerFuture::Resolved(_) => None, - }; - match duration { - None => None, - Some(duration) => { - match mem::replace(self, TimerFuture::Resolved(duration)) { - TimerFuture::Resolved(_) => unreachable!(), - TimerFuture::Pending(old_query) => Some(old_query), - } - } - } - } -} - -#[derive(Clone, Copy, Debug)] -pub struct RenderTime { - pub gpu_time: Duration, -} - -impl Default for RenderTime { - #[inline] - fn default() -> RenderTime { - RenderTime { gpu_time: Duration::new(0, 0) } - } -} - -impl Add for RenderTime { - type Output = RenderTime; - - #[inline] - fn add(self, other: RenderTime) -> RenderTime { - RenderTime { gpu_time: self.gpu_time + other.gpu_time } - } -} - -impl Div for RenderTime { - type Output = RenderTime; - - #[inline] - fn div(self, divisor: usize) -> RenderTime { - RenderTime { gpu_time: self.gpu_time / divisor as u32 } - } -} - bitflags! { - struct FramebufferFlags: u8 { + pub(crate) struct FramebufferFlags: u8 { const MASK_FRAMEBUFFER_IS_DIRTY = 0x01; const DEST_FRAMEBUFFER_IS_DIRTY = 0x02; } } -struct TextureCache where D: Device { - textures: Vec, -} - -impl TextureCache where D: Device { - fn new() -> TextureCache { - TextureCache { textures: vec![] } - } - - fn create_texture(&mut self, device: &mut D, format: TextureFormat, size: Vector2I) - -> D::Texture { - for index in 0..self.textures.len() { - if device.texture_size(&self.textures[index]) == size && - device.texture_format(&self.textures[index]) == format { - return self.textures.remove(index); - } - } - - device.create_texture(format, size) - } - - fn release_texture(&mut self, texture: D::Texture) { - if self.textures.len() == TEXTURE_CACHE_SIZE { - self.textures.pop(); - } - self.textures.insert(0, texture); - } -} - -struct TexturePage where D: Device { - framebuffer: D::Framebuffer, - must_preserve_contents: bool, -} - struct RenderTargetInfo { location: TextureLocation, } -trait ToBlendState { - fn to_blend_state(self) -> Option; -} - -impl ToBlendState for BlendMode { - fn to_blend_state(self) -> Option { - match self { - BlendMode::Clear => { - Some(BlendState { - src_rgb_factor: BlendFactor::Zero, - dest_rgb_factor: BlendFactor::Zero, - src_alpha_factor: BlendFactor::Zero, - dest_alpha_factor: BlendFactor::Zero, - ..BlendState::default() - }) - } - BlendMode::SrcOver => { - Some(BlendState { - src_rgb_factor: BlendFactor::One, - dest_rgb_factor: BlendFactor::OneMinusSrcAlpha, - src_alpha_factor: BlendFactor::One, - dest_alpha_factor: BlendFactor::OneMinusSrcAlpha, - ..BlendState::default() - }) - } - BlendMode::DestOver => { - Some(BlendState { - src_rgb_factor: BlendFactor::OneMinusDestAlpha, - dest_rgb_factor: BlendFactor::One, - src_alpha_factor: BlendFactor::OneMinusDestAlpha, - dest_alpha_factor: BlendFactor::One, - ..BlendState::default() - }) - } - BlendMode::SrcIn => { - Some(BlendState { - src_rgb_factor: BlendFactor::DestAlpha, - dest_rgb_factor: BlendFactor::Zero, - src_alpha_factor: BlendFactor::DestAlpha, - dest_alpha_factor: BlendFactor::Zero, - ..BlendState::default() - }) - } - BlendMode::DestIn => { - Some(BlendState { - src_rgb_factor: BlendFactor::Zero, - dest_rgb_factor: BlendFactor::SrcAlpha, - src_alpha_factor: BlendFactor::Zero, - dest_alpha_factor: BlendFactor::SrcAlpha, - ..BlendState::default() - }) - } - BlendMode::SrcOut => { - Some(BlendState { - src_rgb_factor: BlendFactor::OneMinusDestAlpha, - dest_rgb_factor: BlendFactor::Zero, - src_alpha_factor: BlendFactor::OneMinusDestAlpha, - dest_alpha_factor: BlendFactor::Zero, - ..BlendState::default() - }) - } - BlendMode::DestOut => { - Some(BlendState { - src_rgb_factor: BlendFactor::Zero, - dest_rgb_factor: BlendFactor::OneMinusSrcAlpha, - src_alpha_factor: BlendFactor::Zero, - dest_alpha_factor: BlendFactor::OneMinusSrcAlpha, - ..BlendState::default() - }) - } - BlendMode::SrcAtop => { - Some(BlendState { - src_rgb_factor: BlendFactor::DestAlpha, - dest_rgb_factor: BlendFactor::OneMinusSrcAlpha, - src_alpha_factor: BlendFactor::DestAlpha, - dest_alpha_factor: BlendFactor::OneMinusSrcAlpha, - ..BlendState::default() - }) - } - BlendMode::DestAtop => { - Some(BlendState { - src_rgb_factor: BlendFactor::OneMinusDestAlpha, - dest_rgb_factor: BlendFactor::SrcAlpha, - src_alpha_factor: BlendFactor::OneMinusDestAlpha, - dest_alpha_factor: BlendFactor::SrcAlpha, - ..BlendState::default() - }) - } - BlendMode::Xor => { - Some(BlendState { - src_rgb_factor: BlendFactor::OneMinusDestAlpha, - dest_rgb_factor: BlendFactor::OneMinusSrcAlpha, - src_alpha_factor: BlendFactor::OneMinusDestAlpha, - dest_alpha_factor: BlendFactor::OneMinusSrcAlpha, - ..BlendState::default() - }) - } - BlendMode::Lighter => { - Some(BlendState { - src_rgb_factor: BlendFactor::One, - dest_rgb_factor: BlendFactor::One, - src_alpha_factor: BlendFactor::One, - dest_alpha_factor: BlendFactor::One, - ..BlendState::default() - }) - } - BlendMode::Copy | - BlendMode::Darken | - BlendMode::Lighten | - BlendMode::Multiply | - BlendMode::Screen | - BlendMode::HardLight | - BlendMode::Overlay | - BlendMode::ColorDodge | - BlendMode::ColorBurn | - BlendMode::SoftLight | - BlendMode::Difference | - BlendMode::Exclusion | - BlendMode::Hue | - BlendMode::Saturation | - BlendMode::Color | - BlendMode::Luminosity => { - // Blending is done manually in the shader. - None - } - } - } -} - -pub trait BlendModeExt { - fn needs_readable_framebuffer(self) -> bool; -} - -impl BlendModeExt for BlendMode { - fn needs_readable_framebuffer(self) -> bool { - match self { - BlendMode::Clear | - BlendMode::SrcOver | - BlendMode::DestOver | - BlendMode::SrcIn | - BlendMode::DestIn | - BlendMode::SrcOut | - BlendMode::DestOut | - BlendMode::SrcAtop | - BlendMode::DestAtop | - BlendMode::Xor | - BlendMode::Lighter | - BlendMode::Copy => false, - BlendMode::Lighten | - BlendMode::Darken | - BlendMode::Multiply | - BlendMode::Screen | - BlendMode::HardLight | - BlendMode::Overlay | - BlendMode::ColorDodge | - BlendMode::ColorBurn | - BlendMode::SoftLight | - BlendMode::Difference | - BlendMode::Exclusion | - BlendMode::Hue | - BlendMode::Saturation | - BlendMode::Color | - BlendMode::Luminosity => true, - } - } -} - -struct AlphaTilePage where D: Device { - buffered_fills: Vec, - pending_fills: Vec, - framebuffer: D::Framebuffer, - framebuffer_is_dirty: bool, -} - -impl AlphaTilePage where D: Device { - fn new(device: &mut D) -> AlphaTilePage { - let framebuffer_size = vec2i(MASK_FRAMEBUFFER_WIDTH, MASK_FRAMEBUFFER_HEIGHT); - let framebuffer_texture = device.create_texture(TextureFormat::RGBA16F, framebuffer_size); - let framebuffer = device.create_framebuffer(framebuffer_texture); - AlphaTilePage { - buffered_fills: vec![], - pending_fills: vec![], - framebuffer, - framebuffer_is_dirty: false, - } - } -} - bitflags! { - struct RendererFlags: u8 { + pub(crate) struct RendererFlags: u8 { // Whether we need a depth buffer. const USE_DEPTH = 0x01; // Whether an intermediate destination framebuffer is needed. @@ -1975,53 +1214,27 @@ bitflags! { } } -trait ToCompositeCtrl { - fn to_composite_ctrl(&self) -> i32; +fn pixel_size_to_tile_size(pixel_size: Vector2I) -> Vector2I { + // Round up. + let tile_size = vec2i(TILE_WIDTH as i32 - 1, TILE_HEIGHT as i32 - 1); + let size = pixel_size + tile_size; + vec2i(size.x() / TILE_WIDTH as i32, size.y() / TILE_HEIGHT as i32) } -impl ToCompositeCtrl for BlendMode { - fn to_composite_ctrl(&self) -> i32 { - match *self { - BlendMode::SrcOver | - BlendMode::SrcAtop | - BlendMode::DestOver | - BlendMode::DestOut | - BlendMode::Xor | - BlendMode::Lighter | - BlendMode::Clear | - BlendMode::Copy | - BlendMode::SrcIn | - BlendMode::SrcOut | - BlendMode::DestIn | - BlendMode::DestAtop => COMBINER_CTRL_COMPOSITE_NORMAL, - BlendMode::Multiply => COMBINER_CTRL_COMPOSITE_MULTIPLY, - BlendMode::Darken => COMBINER_CTRL_COMPOSITE_DARKEN, - BlendMode::Lighten => COMBINER_CTRL_COMPOSITE_LIGHTEN, - BlendMode::Screen => COMBINER_CTRL_COMPOSITE_SCREEN, - BlendMode::Overlay => COMBINER_CTRL_COMPOSITE_OVERLAY, - BlendMode::ColorDodge => COMBINER_CTRL_COMPOSITE_COLOR_DODGE, - BlendMode::ColorBurn => COMBINER_CTRL_COMPOSITE_COLOR_BURN, - BlendMode::HardLight => COMBINER_CTRL_COMPOSITE_HARD_LIGHT, - BlendMode::SoftLight => COMBINER_CTRL_COMPOSITE_SOFT_LIGHT, - BlendMode::Difference => COMBINER_CTRL_COMPOSITE_DIFFERENCE, - BlendMode::Exclusion => COMBINER_CTRL_COMPOSITE_EXCLUSION, - BlendMode::Hue => COMBINER_CTRL_COMPOSITE_HUE, - BlendMode::Saturation => COMBINER_CTRL_COMPOSITE_SATURATION, - BlendMode::Color => COMBINER_CTRL_COMPOSITE_COLOR, - BlendMode::Luminosity => COMBINER_CTRL_COMPOSITE_LUMINOSITY, - } - } +struct FilterParams { + p0: F32x4, + p1: F32x4, + p2: F32x4, + ctrl: i32, } -trait ToCombineMode { - fn to_combine_mode(self) -> i32; +pub(crate) struct PatternTexturePage { + pub(crate) framebuffer_id: FramebufferID, + pub(crate) must_preserve_contents: bool, } -impl ToCombineMode for PaintCompositeOp { - fn to_combine_mode(self) -> i32 { - match self { - PaintCompositeOp::DestIn => COMBINER_CTRL_COLOR_COMBINE_DEST_IN, - PaintCompositeOp::SrcIn => COMBINER_CTRL_COLOR_COMBINE_SRC_IN, - } - } +pub struct DebugUIPresenterInfo<'a, D> where D: Device { + pub device: &'a mut D, + pub allocator: &'a mut GPUMemoryAllocator, + pub debug_ui_presenter: &'a mut DebugUIPresenter, } diff --git a/renderer/src/gpu/shaders.rs b/renderer/src/gpu/shaders.rs index ee04ff1d..84bd0482 100644 --- a/renderer/src/gpu/shaders.rs +++ b/renderer/src/gpu/shaders.rs @@ -8,20 +8,12 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use crate::gpu::options::RendererOptions; -use crate::gpu::renderer::{MASK_TILES_ACROSS, MASK_TILES_DOWN}; -use crate::tiles::{TILE_HEIGHT, TILE_WIDTH}; -use pathfinder_gpu::{BufferTarget, BufferUploadMode, ComputeDimensions, Device, FeatureLevel}; -use pathfinder_gpu::{VertexAttrClass, VertexAttrDescriptor, VertexAttrType}; +use pathfinder_gpu::{BufferTarget, BufferUploadMode, Device, VertexAttrClass}; +use pathfinder_gpu::{VertexAttrDescriptor, VertexAttrType}; use pathfinder_resources::ResourceLoader; // TODO(pcwalton): Replace with `mem::size_of` calls? -pub(crate) const TILE_INSTANCE_SIZE: usize = 12; -const FILL_INSTANCE_SIZE: usize = 8; -const CLIP_TILE_INSTANCE_SIZE: usize = 8; - -pub const MAX_FILLS_PER_BATCH: usize = 0x10000; -pub const MAX_TILES_PER_BATCH: usize = MASK_TILES_ACROSS as usize * MASK_TILES_DOWN as usize; +pub(crate) const TILE_INSTANCE_SIZE: usize = 16; pub struct BlitVertexArray where D: Device { pub vertex_array: D::VertexArray, @@ -52,6 +44,25 @@ impl BlitVertexArray where D: Device { } } +pub struct VertexArraysCore where D: Device { + pub blit_vertex_array: BlitVertexArray, +} + +impl VertexArraysCore where D: Device { + pub fn new(device: &D, + programs: &ProgramsCore, + quad_vertex_positions_buffer: &D::Buffer, + quad_vertex_indices_buffer: &D::Buffer) + -> VertexArraysCore { + VertexArraysCore { + blit_vertex_array: BlitVertexArray::new(device, + &programs.blit_program, + quad_vertex_positions_buffer, + quad_vertex_indices_buffer), + } + } +} + pub struct ClearVertexArray where D: Device { pub vertex_array: D::VertexArray, } @@ -81,288 +92,32 @@ impl ClearVertexArray where D: Device { } } -pub struct FillVertexArray where D: Device { - pub vertex_array: D::VertexArray, -} - -impl FillVertexArray -where - D: Device, -{ - pub fn new( - device: &D, - fill_program: &FillRasterProgram, - vertex_buffer: &D::Buffer, - quad_vertex_positions_buffer: &D::Buffer, - quad_vertex_indices_buffer: &D::Buffer, - ) -> FillVertexArray { - let vertex_array = device.create_vertex_array(); - - let tess_coord_attr = device.get_vertex_attr(&fill_program.program, "TessCoord").unwrap(); - let from_px_attr = device.get_vertex_attr(&fill_program.program, "FromPx").unwrap(); - let to_px_attr = device.get_vertex_attr(&fill_program.program, "ToPx").unwrap(); - let from_subpx_attr = device.get_vertex_attr(&fill_program.program, "FromSubpx").unwrap(); - let to_subpx_attr = device.get_vertex_attr(&fill_program.program, "ToSubpx").unwrap(); - let tile_index_attr = device.get_vertex_attr(&fill_program.program, "TileIndex").unwrap(); - - device.bind_buffer(&vertex_array, quad_vertex_positions_buffer, BufferTarget::Vertex); - device.configure_vertex_attr(&vertex_array, &tess_coord_attr, &VertexAttrDescriptor { - size: 2, - class: VertexAttrClass::Int, - attr_type: VertexAttrType::U16, - stride: 4, - offset: 0, - divisor: 0, - buffer_index: 0, - }); - device.bind_buffer(&vertex_array, &vertex_buffer, BufferTarget::Vertex); - device.configure_vertex_attr(&vertex_array, &from_subpx_attr, &VertexAttrDescriptor { - size: 2, - class: VertexAttrClass::FloatNorm, - attr_type: VertexAttrType::U8, - stride: FILL_INSTANCE_SIZE, - offset: 0, - divisor: 1, - buffer_index: 1, - }); - device.configure_vertex_attr(&vertex_array, &to_subpx_attr, &VertexAttrDescriptor { - size: 2, - class: VertexAttrClass::FloatNorm, - attr_type: VertexAttrType::U8, - stride: FILL_INSTANCE_SIZE, - offset: 2, - divisor: 1, - buffer_index: 1, - }); - device.configure_vertex_attr(&vertex_array, &from_px_attr, &VertexAttrDescriptor { - size: 1, - class: VertexAttrClass::Int, - attr_type: VertexAttrType::U8, - stride: FILL_INSTANCE_SIZE, - offset: 4, - divisor: 1, - buffer_index: 1, - }); - device.configure_vertex_attr(&vertex_array, &to_px_attr, &VertexAttrDescriptor { - size: 1, - class: VertexAttrClass::Int, - attr_type: VertexAttrType::U8, - stride: FILL_INSTANCE_SIZE, - offset: 5, - divisor: 1, - buffer_index: 1, - }); - device.configure_vertex_attr(&vertex_array, &tile_index_attr, &VertexAttrDescriptor { - size: 1, - class: VertexAttrClass::Int, - attr_type: VertexAttrType::U16, - stride: FILL_INSTANCE_SIZE, - offset: 6, - divisor: 1, - buffer_index: 1, - }); - device.bind_buffer(&vertex_array, quad_vertex_indices_buffer, BufferTarget::Index); - - FillVertexArray { vertex_array } - } -} - -pub struct TileVertexArray where D: Device { - pub vertex_array: D::VertexArray, -} - -impl TileVertexArray where D: Device { - pub fn new(device: &D, - tile_program: &TileProgram, - tile_vertex_buffer: &D::Buffer, - quad_vertex_positions_buffer: &D::Buffer, - quad_vertex_indices_buffer: &D::Buffer) - -> TileVertexArray { - let vertex_array = device.create_vertex_array(); - - let tile_offset_attr = - device.get_vertex_attr(&tile_program.program, "TileOffset").unwrap(); - let tile_origin_attr = - device.get_vertex_attr(&tile_program.program, "TileOrigin").unwrap(); - let mask_0_tex_coord_attr = - device.get_vertex_attr(&tile_program.program, "MaskTexCoord0").unwrap(); - let mask_backdrop_attr = - device.get_vertex_attr(&tile_program.program, "MaskBackdrop").unwrap(); - let color_attr = device.get_vertex_attr(&tile_program.program, "Color").unwrap(); - let tile_ctrl_attr = device.get_vertex_attr(&tile_program.program, "TileCtrl").unwrap(); - - device.bind_buffer(&vertex_array, quad_vertex_positions_buffer, BufferTarget::Vertex); - device.configure_vertex_attr(&vertex_array, &tile_offset_attr, &VertexAttrDescriptor { - size: 2, - class: VertexAttrClass::Int, - attr_type: VertexAttrType::I16, - stride: 4, - offset: 0, - divisor: 0, - buffer_index: 0, - }); - device.bind_buffer(&vertex_array, tile_vertex_buffer, BufferTarget::Vertex); - device.configure_vertex_attr(&vertex_array, &tile_origin_attr, &VertexAttrDescriptor { - size: 2, - class: VertexAttrClass::Int, - attr_type: VertexAttrType::I16, - stride: TILE_INSTANCE_SIZE, - offset: 0, - divisor: 1, - buffer_index: 1, - }); - device.configure_vertex_attr(&vertex_array, &mask_0_tex_coord_attr, &VertexAttrDescriptor { - size: 2, - class: VertexAttrClass::Int, - attr_type: VertexAttrType::U8, - stride: TILE_INSTANCE_SIZE, - offset: 4, - divisor: 1, - buffer_index: 1, - }); - device.configure_vertex_attr(&vertex_array, &mask_backdrop_attr, &VertexAttrDescriptor { - size: 2, - class: VertexAttrClass::Int, - attr_type: VertexAttrType::I8, - stride: TILE_INSTANCE_SIZE, - offset: 6, - divisor: 1, - buffer_index: 1, - }); - device.configure_vertex_attr(&vertex_array, &color_attr, &VertexAttrDescriptor { - size: 1, - class: VertexAttrClass::Int, - attr_type: VertexAttrType::I16, - stride: TILE_INSTANCE_SIZE, - offset: 8, - divisor: 1, - buffer_index: 1, - }); - device.configure_vertex_attr(&vertex_array, &tile_ctrl_attr, &VertexAttrDescriptor { - size: 1, - class: VertexAttrClass::Int, - attr_type: VertexAttrType::I16, - stride: TILE_INSTANCE_SIZE, - offset: 10, - divisor: 1, - buffer_index: 1, - }); - device.bind_buffer(&vertex_array, quad_vertex_indices_buffer, BufferTarget::Index); - - TileVertexArray { vertex_array } - } -} - -pub struct CopyTileVertexArray where D: Device { - pub vertex_array: D::VertexArray, -} - -impl CopyTileVertexArray where D: Device { - pub fn new( - device: &D, - copy_tile_program: &CopyTileProgram, - copy_tile_vertex_buffer: &D::Buffer, - quads_vertex_indices_buffer: &D::Buffer, - ) -> CopyTileVertexArray { - let vertex_array = device.create_vertex_array(); - - let tile_position_attr = - device.get_vertex_attr(©_tile_program.program, "TilePosition").unwrap(); - - device.bind_buffer(&vertex_array, copy_tile_vertex_buffer, BufferTarget::Vertex); - device.configure_vertex_attr(&vertex_array, &tile_position_attr, &VertexAttrDescriptor { - size: 2, - class: VertexAttrClass::Int, - attr_type: VertexAttrType::I16, - stride: TILE_INSTANCE_SIZE, - offset: 0, - divisor: 0, - buffer_index: 0, - }); - device.bind_buffer(&vertex_array, quads_vertex_indices_buffer, BufferTarget::Index); - - CopyTileVertexArray { vertex_array } - } -} - -pub struct ClipTileVertexArray where D: Device { - pub vertex_array: D::VertexArray, - pub vertex_buffer: D::Buffer, -} - -impl ClipTileVertexArray where D: Device { - pub fn new(device: &D, - clip_tile_program: &ClipTileProgram, - quad_vertex_positions_buffer: &D::Buffer, - quad_vertex_indices_buffer: &D::Buffer) - -> ClipTileVertexArray { - let vertex_array = device.create_vertex_array(); - let vertex_buffer = device.create_buffer(BufferUploadMode::Dynamic); - - let tile_offset_attr = - device.get_vertex_attr(&clip_tile_program.program, "TileOffset").unwrap(); - let dest_tile_origin_attr = - device.get_vertex_attr(&clip_tile_program.program, "DestTileOrigin").unwrap(); - let src_tile_origin_attr = - device.get_vertex_attr(&clip_tile_program.program, "SrcTileOrigin").unwrap(); - let src_backdrop_attr = - device.get_vertex_attr(&clip_tile_program.program, "SrcBackdrop").unwrap(); - - device.bind_buffer(&vertex_array, quad_vertex_positions_buffer, BufferTarget::Vertex); - device.configure_vertex_attr(&vertex_array, &tile_offset_attr, &VertexAttrDescriptor { - size: 2, - class: VertexAttrClass::Int, - attr_type: VertexAttrType::I16, - stride: 4, - offset: 0, - divisor: 0, - buffer_index: 0, - }); - device.bind_buffer(&vertex_array, &vertex_buffer, BufferTarget::Vertex); - device.configure_vertex_attr(&vertex_array, &dest_tile_origin_attr, &VertexAttrDescriptor { - size: 2, - class: VertexAttrClass::Int, - attr_type: VertexAttrType::U8, - stride: CLIP_TILE_INSTANCE_SIZE, - offset: 0, - divisor: 1, - buffer_index: 1, - }); - device.configure_vertex_attr(&vertex_array, &src_tile_origin_attr, &VertexAttrDescriptor { - size: 2, - class: VertexAttrClass::Int, - attr_type: VertexAttrType::U8, - stride: CLIP_TILE_INSTANCE_SIZE, - offset: 2, - divisor: 1, - buffer_index: 1, - }); - device.configure_vertex_attr(&vertex_array, &src_backdrop_attr, &VertexAttrDescriptor { - size: 1, - class: VertexAttrClass::Int, - attr_type: VertexAttrType::I8, - stride: CLIP_TILE_INSTANCE_SIZE, - offset: 4, - divisor: 1, - buffer_index: 1, - }); - device.bind_buffer(&vertex_array, quad_vertex_indices_buffer, BufferTarget::Index); - - ClipTileVertexArray { vertex_array, vertex_buffer } - } -} - - pub struct BlitProgram where D: Device { pub program: D::Program, + pub dest_rect_uniform: D::Uniform, + pub framebuffer_size_uniform: D::Uniform, pub src_texture: D::TextureParameter, } impl BlitProgram where D: Device { pub fn new(device: &D, resources: &dyn ResourceLoader) -> BlitProgram { let program = device.create_raster_program(resources, "blit"); + let dest_rect_uniform = device.get_uniform(&program, "DestRect"); + let framebuffer_size_uniform = device.get_uniform(&program, "FramebufferSize"); let src_texture = device.get_texture_parameter(&program, "Src"); - BlitProgram { program, src_texture } + BlitProgram { program, dest_rect_uniform, framebuffer_size_uniform, src_texture } + } +} + +pub struct ProgramsCore where D: Device { + pub blit_program: BlitProgram, +} + +impl ProgramsCore where D: Device { + pub fn new(device: &D, resources: &dyn ResourceLoader) -> ProgramsCore { + ProgramsCore { + blit_program: BlitProgram::new(device, resources), + } } } @@ -383,211 +138,73 @@ impl ClearProgram where D: Device { } } -pub enum FillProgram where D: Device { - Raster(FillRasterProgram), - Compute(FillComputeProgram), -} - -impl FillProgram where D: Device { - pub fn new(device: &D, resources: &dyn ResourceLoader, options: &RendererOptions) - -> FillProgram { - match (options.no_compute, device.feature_level()) { - (false, FeatureLevel::D3D11) => { - FillProgram::Compute(FillComputeProgram::new(device, resources)) - } - (_, FeatureLevel::D3D10) | (true, _) => { - FillProgram::Raster(FillRasterProgram::new(device, resources)) - } - } - } -} - -pub struct FillRasterProgram where D: Device { +pub struct TileProgramCommon where D: Device { pub program: D::Program, - pub framebuffer_size_uniform: D::Uniform, - pub tile_size_uniform: D::Uniform, - pub area_lut_texture: D::TextureParameter, -} - -impl FillRasterProgram where D: Device { - pub fn new(device: &D, resources: &dyn ResourceLoader) -> FillRasterProgram { - let program = device.create_raster_program(resources, "fill"); - let framebuffer_size_uniform = device.get_uniform(&program, "FramebufferSize"); - let tile_size_uniform = device.get_uniform(&program, "TileSize"); - let area_lut_texture = device.get_texture_parameter(&program, "AreaLUT"); - FillRasterProgram { - program, - framebuffer_size_uniform, - tile_size_uniform, - area_lut_texture, - } - } -} - -pub struct FillComputeProgram where D: Device { - pub program: D::Program, - pub dest_image: D::ImageParameter, - pub area_lut_texture: D::TextureParameter, - pub first_tile_index_uniform: D::Uniform, - pub fills_storage_buffer: D::StorageBuffer, - pub next_fills_storage_buffer: D::StorageBuffer, - pub fill_tile_map_storage_buffer: D::StorageBuffer, -} - -impl FillComputeProgram where D: Device { - pub fn new(device: &D, resources: &dyn ResourceLoader) -> FillComputeProgram { - let mut program = device.create_compute_program(resources, "fill"); - let local_size = ComputeDimensions { x: TILE_WIDTH, y: TILE_HEIGHT / 4, z: 1 }; - device.set_compute_program_local_size(&mut program, local_size); - - let dest_image = device.get_image_parameter(&program, "Dest"); - let area_lut_texture = device.get_texture_parameter(&program, "AreaLUT"); - let first_tile_index_uniform = device.get_uniform(&program, "FirstTileIndex"); - let fills_storage_buffer = device.get_storage_buffer(&program, "Fills", 0); - let next_fills_storage_buffer = device.get_storage_buffer(&program, "NextFills", 1); - let fill_tile_map_storage_buffer = device.get_storage_buffer(&program, "FillTileMap", 2); - - FillComputeProgram { - program, - dest_image, - area_lut_texture, - first_tile_index_uniform, - fills_storage_buffer, - next_fills_storage_buffer, - fill_tile_map_storage_buffer, - } - } -} - -pub struct TileProgram where D: Device { - pub program: D::Program, - pub transform_uniform: D::Uniform, pub tile_size_uniform: D::Uniform, pub texture_metadata_texture: D::TextureParameter, pub texture_metadata_size_uniform: D::Uniform, - pub dest_texture: D::TextureParameter, + pub z_buffer_texture: D::TextureParameter, + pub z_buffer_texture_size_uniform: D::Uniform, pub color_texture_0: D::TextureParameter, pub color_texture_size_0_uniform: D::Uniform, pub color_texture_1: D::TextureParameter, pub mask_texture_0: D::TextureParameter, pub mask_texture_size_0_uniform: D::Uniform, pub gamma_lut_texture: D::TextureParameter, - pub filter_params_0_uniform: D::Uniform, - pub filter_params_1_uniform: D::Uniform, - pub filter_params_2_uniform: D::Uniform, pub framebuffer_size_uniform: D::Uniform, - pub ctrl_uniform: D::Uniform, } -impl TileProgram where D: Device { - pub fn new(device: &D, resources: &dyn ResourceLoader) -> TileProgram { - let program = device.create_raster_program(resources, "tile"); - let transform_uniform = device.get_uniform(&program, "Transform"); +impl TileProgramCommon where D: Device { + pub(crate) fn new(device: &D, program: D::Program) -> TileProgramCommon { let tile_size_uniform = device.get_uniform(&program, "TileSize"); let texture_metadata_texture = device.get_texture_parameter(&program, "TextureMetadata"); let texture_metadata_size_uniform = device.get_uniform(&program, "TextureMetadataSize"); - let dest_texture = device.get_texture_parameter(&program, "DestTexture"); + let z_buffer_texture = device.get_texture_parameter(&program, "ZBuffer"); + let z_buffer_texture_size_uniform = device.get_uniform(&program, "ZBufferSize"); let color_texture_0 = device.get_texture_parameter(&program, "ColorTexture0"); let color_texture_size_0_uniform = device.get_uniform(&program, "ColorTextureSize0"); let color_texture_1 = device.get_texture_parameter(&program, "ColorTexture1"); let mask_texture_0 = device.get_texture_parameter(&program, "MaskTexture0"); let mask_texture_size_0_uniform = device.get_uniform(&program, "MaskTextureSize0"); let gamma_lut_texture = device.get_texture_parameter(&program, "GammaLUT"); - let filter_params_0_uniform = device.get_uniform(&program, "FilterParams0"); - let filter_params_1_uniform = device.get_uniform(&program, "FilterParams1"); - let filter_params_2_uniform = device.get_uniform(&program, "FilterParams2"); let framebuffer_size_uniform = device.get_uniform(&program, "FramebufferSize"); - let ctrl_uniform = device.get_uniform(&program, "Ctrl"); - TileProgram { + + TileProgramCommon { program, - transform_uniform, tile_size_uniform, texture_metadata_texture, texture_metadata_size_uniform, - dest_texture, + z_buffer_texture, + z_buffer_texture_size_uniform, color_texture_0, color_texture_size_0_uniform, color_texture_1, mask_texture_0, mask_texture_size_0_uniform, gamma_lut_texture, - filter_params_0_uniform, - filter_params_1_uniform, - filter_params_2_uniform, framebuffer_size_uniform, - ctrl_uniform, } } } -pub struct CopyTileProgram where D: Device { - pub program: D::Program, - pub transform_uniform: D::Uniform, - pub tile_size_uniform: D::Uniform, - pub framebuffer_size_uniform: D::Uniform, - pub src_texture: D::TextureParameter, -} - -impl CopyTileProgram where D: Device { - pub fn new(device: &D, resources: &dyn ResourceLoader) -> CopyTileProgram { - let program = device.create_raster_program(resources, "tile_copy"); - let transform_uniform = device.get_uniform(&program, "Transform"); - let tile_size_uniform = device.get_uniform(&program, "TileSize"); - let framebuffer_size_uniform = device.get_uniform(&program, "FramebufferSize"); - let src_texture = device.get_texture_parameter(&program, "Src"); - CopyTileProgram { - program, - transform_uniform, - tile_size_uniform, - framebuffer_size_uniform, - src_texture, - } - } -} - -pub struct ClipTileProgram where D: Device { - pub program: D::Program, - pub src_texture: D::TextureParameter, -} - -impl ClipTileProgram where D: Device { - pub fn new(device: &D, resources: &dyn ResourceLoader) -> ClipTileProgram { - let program = device.create_raster_program(resources, "tile_clip"); - let src_texture = device.get_texture_parameter(&program, "Src"); - ClipTileProgram { program, src_texture } - } -} - -pub struct StencilProgram -where - D: Device, -{ +pub struct StencilProgram where D: Device { pub program: D::Program, } -impl StencilProgram -where - D: Device, -{ +impl StencilProgram where D: Device { pub fn new(device: &D, resources: &dyn ResourceLoader) -> StencilProgram { let program = device.create_raster_program(resources, "stencil"); StencilProgram { program } } } -pub struct StencilVertexArray -where - D: Device, -{ +pub struct StencilVertexArray where D: Device { pub vertex_array: D::VertexArray, pub vertex_buffer: D::Buffer, pub index_buffer: D::Buffer, } -impl StencilVertexArray -where - D: Device, -{ +impl StencilVertexArray where D: Device { pub fn new(device: &D, stencil_program: &StencilProgram) -> StencilVertexArray { let vertex_array = device.create_vertex_array(); let vertex_buffer = device.create_buffer(BufferUploadMode::Static); @@ -628,17 +245,11 @@ impl ReprojectionProgram where D: Device { } } -pub struct ReprojectionVertexArray -where - D: Device, -{ +pub struct ReprojectionVertexArray where D: Device { pub vertex_array: D::VertexArray, } -impl ReprojectionVertexArray -where - D: Device, -{ +impl ReprojectionVertexArray where D: Device { pub fn new( device: &D, reprojection_program: &ReprojectionProgram, diff --git a/renderer/src/gpu_data.rs b/renderer/src/gpu_data.rs index 2282df5e..9c14fd3f 100644 --- a/renderer/src/gpu_data.rs +++ b/renderer/src/gpu_data.rs @@ -13,13 +13,15 @@ use crate::builder::{ALPHA_TILES_PER_LEVEL, ALPHA_TILE_LEVEL_COUNT}; use crate::options::BoundingQuad; use crate::paint::PaintCompositeOp; +use crate::scene::PathId; +use crate::tile_map::DenseTileMap; use pathfinder_color::ColorU; use pathfinder_content::effects::{BlendMode, Filter}; use pathfinder_content::render_target::RenderTargetId; -use pathfinder_geometry::line_segment::{LineSegmentU4, LineSegmentU8}; +use pathfinder_geometry::line_segment::{LineSegment2F, LineSegmentU16}; use pathfinder_geometry::rect::RectI; use pathfinder_geometry::transform2d::Transform2F; -use pathfinder_geometry::vector::Vector2I; +use pathfinder_geometry::vector::{Vector2F, Vector2I}; use pathfinder_gpu::TextureSamplingFlags; use std::fmt::{Debug, Formatter, Result as DebugResult}; use std::sync::Arc; @@ -64,13 +66,18 @@ pub enum RenderCommand { UploadTextureMetadata(Vec), // Adds fills to the queue. - AddFills(Vec), + AddFillsD3D9(Vec), // Flushes the queue of fills. - FlushFills, + FlushFillsD3D9, - // Renders clips to the mask tile. - ClipTiles(Vec), + /// Upload a scene to GPU. + /// + /// This will only be sent if dicing and binning is done on GPU. + UploadSceneD3D11 { + draw_segments: SegmentsD3D11, + clip_segments: SegmentsD3D11, + }, // Pushes a render target onto the stack. Draw commands go to the render target on top of the // stack. @@ -79,11 +86,14 @@ pub enum RenderCommand { // Pops a render target from the stack. PopRenderTarget, - // Marks that tile compositing is about to begin. - BeginTileDrawing, + // Computes backdrops for tiles, prepares any Z-buffers, and performs clipping. + PrepareClipTilesD3D11(TileBatchDataD3D11), // Draws a batch of tiles to the render target on top of the stack. - DrawTiles(TileBatch), + DrawTilesD3D9(DrawTileBatchD3D9), + + // Draws a batch of tiles to the render target on top of the stack. + DrawTilesD3D11(DrawTileBatchD3D11), // Presents a rendered frame. Finish { cpu_build_time: Duration }, @@ -103,13 +113,125 @@ pub struct TextureLocation { pub rect: RectI, } +/// Information about a batch of tiles to be prepared (postprocessed). #[derive(Clone, Debug)] -pub struct TileBatch { - pub tiles: Vec, +pub struct TileBatchDataD3D11 { + /// The ID of this batch. + /// + /// The renderer should not assume that these values are consecutive. + pub batch_id: TileBatchId, + /// The number of paths in this batch. + pub path_count: u32, + /// The number of tiles in this batch. + pub tile_count: u32, + /// The total number of segments in this batch. + pub segment_count: u32, + /// Information needed to prepare the tiles. + pub prepare_info: PrepareTilesInfoD3D11, + /// Where the paths come from (draw or clip). + pub path_source: PathSource, + /// Information about clips applied to paths, if any of the paths have clips. + pub clipped_path_info: Option, +} + +/// Where a path should come from (draw or clip). +#[derive(Clone, Copy, PartialEq, Debug)] +pub enum PathSource { + Draw, + Clip, +} + +/// Information about a batch of tiles to be prepared on GPU. +#[derive(Clone, Debug)] +pub struct PrepareTilesInfoD3D11 { + /// Initial backdrop values for each tile column, packed together. + pub backdrops: Vec, + + /// Mapping from path index to metadata needed to compute propagation on GPU. + /// + /// This contains indices into the `tiles` vector. + pub propagate_metadata: Vec, + + /// Metadata about each path that will be diced (flattened). + pub dice_metadata: Vec, + + /// Sparse information about all the allocated tiles. + pub tile_path_info: Vec, + + /// A transform to apply to the segments. + pub transform: Transform2F, +} + +#[derive(Clone, Debug)] +pub struct SegmentsD3D11 { + pub points: Vec, + pub indices: Vec, +} + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct SegmentIndicesD3D11 { + pub first_point_index: u32, + pub flags: u32, +} + +/// Information about clips applied to paths in a batch. +#[derive(Clone, Debug)] +pub struct ClippedPathInfo { + /// The ID of the batch containing the clips. + /// + /// In the current implementation, this is always 0. + pub clip_batch_id: TileBatchId, + + /// The number of paths that have clips. + pub clipped_path_count: u32, + + /// The maximum number of clipped tiles. + /// + /// This is used to allocate vertex buffers. + pub max_clipped_tile_count: u32, + + /// The actual clips, if calculated on CPU. + pub clips: Option>, +} + +/// Together with the `TileBatchId`, uniquely identifies a path on the renderer side. +/// +/// Generally, `PathIndex(!0)` represents no path. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub struct PathBatchIndex(pub u32); + +/// Unique ID that identifies a batch of tiles. +#[derive(Clone, Copy, PartialEq, Debug)] +pub struct TileBatchId(pub u32); + +#[derive(Clone, Debug)] +pub enum DrawTileBatch { + D3D9(DrawTileBatchD3D9), + D3D11(DrawTileBatchD3D11), +} + +/// Information needed to draw a batch of tiles in D3D9. +#[derive(Clone, Debug)] +pub struct DrawTileBatchD3D9 { + pub tiles: Vec, + pub clips: Vec, + pub z_buffer_data: DenseTileMap, + /// The color texture to use. pub color_texture: Option, + /// The filter to use. pub filter: Filter, + /// The blend mode to composite these tiles with. pub blend_mode: BlendMode, - pub tile_page: u16, +} + +/// Information needed to draw a batch of tiles in D3D11. +#[derive(Clone, Debug)] +pub struct DrawTileBatchD3D11 { + /// Data for the tile batch. + pub tile_batch_data: TileBatchDataD3D11, + /// The color texture to use. + pub color_texture: Option, } #[derive(Clone, Copy, Debug, PartialEq)] @@ -119,90 +241,198 @@ pub struct TileBatchTexture { pub composite_op: PaintCompositeOp, } +#[derive(Clone, Copy, PartialEq, Debug)] +#[repr(C)] +pub struct TileId(pub i32); + +#[derive(Clone, Copy, PartialEq, Debug)] +#[repr(C)] +pub struct FillId(pub i32); + +// TODO(pcwalton): Pack better. #[derive(Clone, Copy, Debug)] -pub struct FillObjectPrimitive { - pub px: LineSegmentU4, - pub subpx: LineSegmentU8, +#[repr(C)] +pub struct TileObjectPrimitive { pub tile_x: i16, pub tile_y: i16, + pub alpha_tile_id: AlphaTileId, + pub path_id: PathId, + // TODO(pcwalton): Maybe look the color up based on path ID? + pub color: u16, + pub ctrl: u8, + pub backdrop: i8, } #[derive(Clone, Copy, Debug)] #[repr(C)] -pub struct TileObjectPrimitive { - pub alpha_tile_id: AlphaTileId, +pub struct TileD3D11 { + pub next_tile_id: TileId, + pub first_fill_id: FillId, + pub alpha_tile_id_lo: i16, + pub alpha_tile_id_hi: i8, + pub backdrop_delta: i8, + pub color: u16, + pub ctrl: u8, pub backdrop: i8, } +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct AlphaTileD3D11 { + pub alpha_tile_index: AlphaTileId, + pub clip_tile_index: AlphaTileId, +} + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct TilePathInfoD3D11 { + pub tile_min_x: i16, + pub tile_min_y: i16, + pub tile_max_x: i16, + pub tile_max_y: i16, + pub first_tile_index: u32, + // Must match the order in `TileD3D11`. + pub color: u16, + pub ctrl: u8, + pub backdrop: i8, +} + +// TODO(pcwalton): Pack better! +#[derive(Clone, Copy, Debug, PartialEq)] +#[repr(C)] +pub struct PropagateMetadataD3D11 { + pub tile_rect: RectI, + pub tile_offset: u32, + pub path_index: PathBatchIndex, + pub z_write: u32, + // This will generally not refer to the same batch as `path_index`. + pub clip_path_index: PathBatchIndex, + pub backdrop_offset: u32, + pub pad0: u32, + pub pad1: u32, + pub pad2: u32, +} + +#[derive(Clone, Copy, Debug, PartialEq)] +#[repr(C)] +pub struct DiceMetadataD3D11 { + pub global_path_id: PathId, + pub first_global_segment_index: u32, + pub first_batch_segment_index: u32, + pub pad: u32, +} + #[derive(Clone, Copy, Debug)] #[repr(C)] pub struct TextureMetadataEntry { pub color_0_transform: Transform2F, + pub color_0_combine_mode: ColorCombineMode, pub base_color: ColorU, + pub filter: Filter, + pub blend_mode: BlendMode, } -#[derive(Clone, Copy, Debug, Default)] -pub struct FillBatchEntry { - pub fill: Fill, - pub page: u16, +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub enum ColorCombineMode { + None, + SrcIn, + DestIn, } #[derive(Clone, Copy, Debug, Default)] #[repr(C)] pub struct Fill { - pub subpx: LineSegmentU8, - pub px: LineSegmentU4, - pub alpha_tile_index: u16, + pub line_segment: LineSegmentU16, + // The meaning of this field depends on whether fills are being done with the GPU rasterizer or + // GPU compute. If raster, this field names the index of the alpha tile that this fill belongs + // to. If compute, this field names the index of the next fill in the singly-linked list of + // fills belonging to this alpha tile. + pub link: u32, } -#[derive(Clone, Debug)] -pub struct ClipBatch { - pub clips: Vec, - pub key: ClipBatchKey, +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct ClipMetadata { + pub draw_tile_rect: RectI, + pub clip_tile_rect: RectI, + pub draw_tile_offset: u32, + pub clip_tile_offset: u32, + pub pad0: u32, + pub pad1: u32, } -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub struct ClipBatchKey { - pub dest_page: u16, - pub src_page: u16, - pub kind: ClipBatchKind, -} - -// Order is significant here. -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] -pub enum ClipBatchKind { - Draw, - Clip, -} - -#[derive(Clone, Copy, Debug, Default)] +#[derive(Clone, Copy, Debug)] #[repr(C)] pub struct Clip { - pub dest_u: u8, - pub dest_v: u8, - pub src_u: u8, - pub src_v: u8, - pub backdrop: i8, - pub pad_0: u8, - pub pad_1: u16, + pub dest_tile_id: AlphaTileId, + pub dest_backdrop: i32, + pub src_tile_id: AlphaTileId, + pub src_backdrop: i32, } -#[derive(Clone, Copy, Debug, Default)] +impl Default for Clip { + #[inline] + fn default() -> Clip { + Clip { + dest_tile_id: AlphaTileId(!0), + dest_backdrop: 0, + src_tile_id: AlphaTileId(!0), + src_backdrop: 0, + } + } +} + +#[derive(Clone, Copy, Debug)] #[repr(C)] -pub struct Tile { - pub tile_x: i16, - pub tile_y: i16, - pub mask_0_u: u8, - pub mask_0_v: u8, - pub mask_0_backdrop: i8, - pub pad: u8, - pub color: u16, - pub ctrl: u16, +pub struct BinSegment { + pub segment: LineSegment2F, + pub path_index: PathId, + pub pad0: u32, + pub pad1: u32, + pub pad2: u32, +} + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct BackdropInfoD3D11 { + pub initial_backdrop: i32, + // Column number, where 0 is the leftmost column in the tile rect. + pub tile_x_offset: i32, + pub path_index: PathBatchIndex, +} + +#[derive(Clone, Copy)] +#[repr(C)] +pub(crate) struct MicrolineD3D11 { + from_x_px: i16, + from_y_px: i16, + to_x_px: i16, + to_y_px: i16, + from_x_subpx: u8, + from_y_subpx: u8, + to_x_subpx: u8, + to_y_subpx: u8, + path_index: u32, +} + +#[derive(Clone, Copy)] +#[repr(C)] +pub(crate) struct FirstTileD3D11 { + first_tile: i32, } #[derive(Clone, Copy, PartialEq, Debug)] +#[repr(C)] pub struct AlphaTileId(pub u32); +impl PathBatchIndex { + #[inline] + pub fn none() -> PathBatchIndex { + PathBatchIndex(!0) + } +} + impl AlphaTileId { #[inline] pub fn new(next_alpha_tile_index: &[AtomicUsize; ALPHA_TILE_LEVEL_COUNT], level: usize) @@ -249,24 +479,40 @@ impl Debug for RenderCommand { RenderCommand::UploadTextureMetadata(ref metadata) => { write!(formatter, "UploadTextureMetadata(x{})", metadata.len()) } - RenderCommand::AddFills(ref fills) => { - write!(formatter, "AddFills(x{})", fills.len()) + RenderCommand::AddFillsD3D9(ref fills) => { + write!(formatter, "AddFillsD3D9(x{})", fills.len()) } - RenderCommand::FlushFills => write!(formatter, "FlushFills"), - RenderCommand::ClipTiles(ref batches) => { - write!(formatter, "ClipTiles(x{})", batches.len()) + RenderCommand::FlushFillsD3D9 => write!(formatter, "FlushFills"), + RenderCommand::UploadSceneD3D11 { ref draw_segments, ref clip_segments } => { + write!(formatter, + "UploadSceneD3D11(DP x{}, DI x{}, CP x{}, CI x{})", + draw_segments.points.len(), + draw_segments.indices.len(), + clip_segments.points.len(), + clip_segments.indices.len()) + } + RenderCommand::PrepareClipTilesD3D11(ref batch) => { + let clipped_path_count = match batch.clipped_path_info { + None => 0, + Some(ref clipped_path_info) => clipped_path_info.clipped_path_count, + }; + write!(formatter, + "PrepareClipTilesD3D11({:?}, C {})", + batch.batch_id, + clipped_path_count) } RenderCommand::PushRenderTarget(render_target_id) => { write!(formatter, "PushRenderTarget({:?})", render_target_id) } RenderCommand::PopRenderTarget => write!(formatter, "PopRenderTarget"), - RenderCommand::BeginTileDrawing => write!(formatter, "BeginTileDrawing"), - RenderCommand::DrawTiles(ref batch) => { + RenderCommand::DrawTilesD3D9(ref batch) => { + write!(formatter, "DrawTilesD3D9(x{:?})", batch.tiles.len()) + } + RenderCommand::DrawTilesD3D11(ref batch) => { write!(formatter, - "DrawTiles(x{}, C0 {:?}, {:?})", - batch.tiles.len(), - batch.color_texture, - batch.blend_mode) + "DrawTilesD3D11({:?}, C0 {:?})", + batch.tile_batch_data.batch_id, + batch.color_texture) } RenderCommand::Finish { cpu_build_time } => { write!(formatter, "Finish({} ms)", cpu_build_time.as_secs_f64() * 1000.0) @@ -274,3 +520,10 @@ impl Debug for RenderCommand { } } } + +impl Default for FirstTileD3D11 { + #[inline] + fn default() -> FirstTileD3D11 { + FirstTileD3D11 { first_tile: -1 } + } +} diff --git a/renderer/src/lib.rs b/renderer/src/lib.rs index 083f9ba3..ac43b530 100644 --- a/renderer/src/lib.rs +++ b/renderer/src/lib.rs @@ -1,6 +1,6 @@ // pathfinder/renderer/src/lib.rs // -// Copyright © 2019 The Pathfinder Project Developers. +// Copyright © 2020 The Pathfinder Project Developers. // // Licensed under the Apache License, Version 2.0 or the MIT license @@ -27,4 +27,3 @@ mod builder; mod tile_map; mod tiler; mod tiles; -mod z_buffer; diff --git a/renderer/src/options.rs b/renderer/src/options.rs index 86ba541e..646bdda4 100644 --- a/renderer/src/options.rs +++ b/renderer/src/options.rs @@ -10,6 +10,7 @@ //! Options that control how rendering is to be performed. +use crate::gpu::options::RendererLevel; use crate::gpu_data::RenderCommand; use pathfinder_geometry::rect::RectF; use pathfinder_geometry::transform2d::Transform2F; @@ -17,17 +18,21 @@ use pathfinder_geometry::transform3d::Perspective; use pathfinder_geometry::vector::{Vector2F, Vector4F}; use pathfinder_content::clip::PolygonClipper3D; -pub trait RenderCommandListener: Send + Sync { - fn send(&self, command: RenderCommand); +pub struct RenderCommandListener<'a> { + send_fn: RenderCommandSendFunction<'a>, } -impl RenderCommandListener for F -where - F: Fn(RenderCommand) + Send + Sync, -{ +pub type RenderCommandSendFunction<'a> = Box; + +impl<'a> RenderCommandListener<'a> { #[inline] - fn send(&self, command: RenderCommand) { - (*self)(command) + pub fn new(send_fn: RenderCommandSendFunction<'a>) -> RenderCommandListener<'a> { + RenderCommandListener { send_fn } + } + + #[inline] + pub fn send(&self, render_command: RenderCommand) { + (self.send_fn)(render_command) } } @@ -120,6 +125,13 @@ pub(crate) struct PreparedBuildOptions { pub(crate) subpixel_aa_enabled: bool, } +#[derive(Clone, Copy)] +pub(crate) enum PrepareMode { + CPU, + TransformCPUBinGPU, + GPU { transform: Transform2F }, +} + impl PreparedBuildOptions { #[inline] pub(crate) fn bounding_quad(&self) -> BoundingQuad { @@ -128,6 +140,24 @@ impl PreparedBuildOptions { _ => [Vector4F::default(); 4], } } + + #[inline] + pub(crate) fn to_prepare_mode(&self, renderer_level: RendererLevel) -> PrepareMode { + match renderer_level { + RendererLevel::D3D9 => PrepareMode::CPU, + RendererLevel::D3D11 => { + match self.transform { + PreparedRenderTransform::Perspective { .. } => PrepareMode::TransformCPUBinGPU, + PreparedRenderTransform::None => { + PrepareMode::GPU { transform: Transform2F::default() } + } + PreparedRenderTransform::Transform2D(transform) => { + PrepareMode::GPU { transform } + } + } + } + } + } } pub(crate) type BoundingQuad = [Vector4F; 4]; diff --git a/renderer/src/paint.rs b/renderer/src/paint.rs index 8d876f3c..25721a07 100644 --- a/renderer/src/paint.rs +++ b/renderer/src/paint.rs @@ -9,12 +9,12 @@ // except according to those terms. use crate::allocator::{AllocationMode, TextureAllocator}; -use crate::gpu_data::{RenderCommand, TextureLocation, TextureMetadataEntry, TexturePageDescriptor}; +use crate::gpu_data::{ColorCombineMode, RenderCommand, TextureLocation, TextureMetadataEntry, TexturePageDescriptor}; use crate::gpu_data::{TexturePageId, TileBatchTexture}; use crate::scene::{RenderTarget, SceneId}; use hashbrown::HashMap; use pathfinder_color::ColorU; -use pathfinder_content::effects::{Filter, PatternFilter}; +use pathfinder_content::effects::{BlendMode, Filter, PatternFilter}; use pathfinder_content::gradient::{Gradient, GradientGeometry}; use pathfinder_content::pattern::{Pattern, PatternSource}; use pathfinder_content::render_target::RenderTargetId; @@ -285,6 +285,7 @@ pub struct PaintMetadata { pub color_texture_metadata: Option, /// The base color that the color texture gets mixed into. pub base_color: ColorU, + pub blend_mode: BlendMode, /// True if this paint is fully opaque. pub is_opaque: bool, } @@ -435,6 +436,8 @@ impl Palette { color_texture_metadata, is_opaque: paint.is_opaque(), base_color: paint.base_color(), + // FIXME(pcwalton) + blend_mode: BlendMode::SrcOver, }); } @@ -496,7 +499,14 @@ impl Palette { None => Transform2F::default(), Some(ref color_texture_metadata) => color_texture_metadata.transform, }, + color_0_combine_mode: if paint_metadata.color_texture_metadata.is_some() { + ColorCombineMode::SrcIn + } else { + ColorCombineMode::None + }, base_color: paint_metadata.base_color, + filter: paint_metadata.filter(), + blend_mode: paint_metadata.blend_mode, } }).collect(); let mut render_commands = vec![RenderCommand::UploadTextureMetadata(texture_metadata)]; diff --git a/renderer/src/scene.rs b/renderer/src/scene.rs index 1805c65d..0d05e131 100644 --- a/renderer/src/scene.rs +++ b/renderer/src/scene.rs @@ -12,6 +12,7 @@ use crate::builder::SceneBuilder; use crate::concurrent::executor::Executor; +use crate::gpu::options::RendererLevel; use crate::options::{BuildOptions, PreparedBuildOptions}; use crate::options::{PreparedRenderTransform, RenderCommandListener}; use crate::paint::{MergedPaletteInfo, Paint, PaintId, PaintInfo, Palette}; @@ -22,19 +23,22 @@ use pathfinder_content::render_target::RenderTargetId; use pathfinder_geometry::rect::RectF; use pathfinder_geometry::transform2d::Transform2F; use pathfinder_geometry::vector::{Vector2I, vec2f}; +use std::ops::Range; use std::sync::atomic::{AtomicUsize, Ordering}; +use std::u64; static NEXT_SCENE_ID: AtomicUsize = AtomicUsize::new(0); #[derive(Clone)] pub struct Scene { - pub(crate) display_list: Vec, - pub(crate) paths: Vec, - pub(crate) clip_paths: Vec, + display_list: Vec, + draw_paths: Vec, + clip_paths: Vec, palette: Palette, bounds: RectF, view_box: RectF, id: SceneId, + epoch: SceneEpoch, } #[derive(Clone, Copy, Debug, PartialEq)] @@ -46,47 +50,47 @@ impl Scene { let scene_id = SceneId(NEXT_SCENE_ID.fetch_add(1, Ordering::Relaxed) as u32); Scene { display_list: vec![], - paths: vec![], + draw_paths: vec![], clip_paths: vec![], palette: Palette::new(scene_id), bounds: RectF::default(), view_box: RectF::default(), id: scene_id, + epoch: SceneEpoch::new(0, 1), } } - pub fn push_path(&mut self, path: DrawPath) { - let path_index = self.paths.len() as u32; - self.paths.push(path); - self.push_path_with_index(path_index); + pub fn push_draw_path(&mut self, draw_path: DrawPath) { + let draw_path_index = DrawPathId(self.draw_paths.len() as u32); + self.draw_paths.push(draw_path); + self.push_draw_path_with_index(draw_path_index); } - fn push_path_with_index(&mut self, path_index: u32) { - self.bounds = self.bounds.union_rect(self.paths[path_index as usize].outline.bounds()); + fn push_draw_path_with_index(&mut self, draw_path_id: DrawPathId) { + let new_path_bounds = self.draw_paths[draw_path_id.0 as usize].outline.bounds(); + self.bounds = self.bounds.union_rect(new_path_bounds); - if let Some(DisplayItem::DrawPaths { - start_index: _, - ref mut end_index - }) = self.display_list.last_mut() { - *end_index = path_index + 1; - } else { - self.display_list.push(DisplayItem::DrawPaths { - start_index: path_index, - end_index: path_index + 1, - }); + let end_path_id = DrawPathId(draw_path_id.0 + 1); + match self.display_list.last_mut() { + Some(DisplayItem::DrawPaths(ref mut range)) => range.end = end_path_id, + _ => self.display_list.push(DisplayItem::DrawPaths(draw_path_id..end_path_id)), } + + self.epoch.next(); } pub fn push_clip_path(&mut self, clip_path: ClipPath) -> ClipPathId { self.bounds = self.bounds.union_rect(clip_path.outline.bounds()); let clip_path_id = ClipPathId(self.clip_paths.len() as u32); self.clip_paths.push(clip_path); + self.epoch.next(); clip_path_id } pub fn push_render_target(&mut self, render_target: RenderTarget) -> RenderTargetId { let render_target_id = self.palette.push_render_target(render_target); self.display_list.push(DisplayItem::PushRenderTarget(render_target_id)); + self.epoch.next(); render_target_id } @@ -108,10 +112,10 @@ impl Scene { } // Merge draw paths. - let mut draw_path_mapping = Vec::with_capacity(scene.paths.len()); - for draw_path in scene.paths { - draw_path_mapping.push(self.paths.len() as u32); - self.paths.push(DrawPath { + let mut draw_path_mapping = Vec::with_capacity(scene.draw_paths.len()); + for draw_path in scene.draw_paths { + draw_path_mapping.push(self.draw_paths.len() as u32); + self.draw_paths.push(DrawPath { outline: draw_path.outline, paint: paint_mapping[&draw_path.paint], clip_path: draw_path.clip_path.map(|clip_path_id| { @@ -133,16 +137,17 @@ impl Scene { DisplayItem::PopRenderTarget => { self.display_list.push(DisplayItem::PopRenderTarget); } - DisplayItem::DrawPaths { - start_index: old_start_path_index, - end_index: old_end_path_index, - } => { - for old_path_index in old_start_path_index..old_end_path_index { - self.push_path_with_index(draw_path_mapping[old_path_index as usize]) + DisplayItem::DrawPaths(range) => { + for old_path_index in (range.start.0 as usize)..(range.end.0 as usize) { + let old_draw_path_id = DrawPathId(draw_path_mapping[old_path_index]); + self.push_draw_path_with_index(old_draw_path_id); } } } } + + // Bump epoch. + self.epoch.next(); } #[inline] @@ -152,12 +157,9 @@ impl Scene { #[allow(clippy::trivially_copy_pass_by_ref)] pub fn push_paint(&mut self, paint: &Paint) -> PaintId { - self.palette.push_paint(paint) - } - - #[inline] - pub fn path_count(&self) -> usize { - self.paths.len() + let paint_id = self.palette.push_paint(paint); + self.epoch.next(); + paint_id } #[inline] @@ -168,6 +170,7 @@ impl Scene { #[inline] pub fn set_bounds(&mut self, new_bounds: RectF) { self.bounds = new_bounds; + self.epoch.next(); } #[inline] @@ -178,13 +181,13 @@ impl Scene { #[inline] pub fn set_view_box(&mut self, new_view_box: RectF) { self.view_box = new_view_box; + self.epoch.next(); } - pub(crate) fn apply_render_options( - &self, - original_outline: &Outline, - options: &PreparedBuildOptions, - ) -> Outline { + pub(crate) fn apply_render_options(&self, + original_outline: &Outline, + options: &PreparedBuildOptions) + -> Outline { let mut outline; match options.transform { PreparedRenderTransform::Perspective { @@ -238,63 +241,131 @@ impl Scene { } #[inline] - pub fn build<'a, E>(&mut self, - options: BuildOptions, - listener: Box, - executor: &E) - where E: Executor { + pub fn build<'a, 'b, E>(&mut self, + options: BuildOptions, + sink: &'b mut SceneSink<'a>, + executor: &E) + where E: Executor { let prepared_options = options.prepare(self.bounds); - SceneBuilder::new(self, &prepared_options, listener).build(executor) + SceneBuilder::new(self, &prepared_options, sink).build(executor) } - pub fn paths<'a>(&'a self) -> PathIter { - PathIter { - scene: self, - pos: 0 + #[inline] + pub fn display_list(&self) -> &[DisplayItem] { + &self.display_list + } + + #[inline] + pub fn draw_paths(&self) -> &[DrawPath] { + &self.draw_paths + } + + #[inline] + pub fn clip_paths(&self) -> &[ClipPath] { + &self.clip_paths + } + + #[inline] + pub fn get_draw_path(&self, draw_path_id: DrawPathId) -> &DrawPath { + &self.draw_paths[draw_path_id.0 as usize] + } + + #[inline] + pub fn get_clip_path(&self, clip_path_id: ClipPathId) -> &ClipPath { + &self.clip_paths[clip_path_id.0 as usize] + } + + #[inline] + pub fn palette(&self) -> &Palette { + &self.palette + } + + #[inline] + pub fn id(&self) -> SceneId { + self.id + } + + #[inline] + pub fn epoch(&self) -> SceneEpoch { + self.epoch + } +} + +pub struct SceneSink<'a> { + pub(crate) listener: RenderCommandListener<'a>, + pub(crate) renderer_level: RendererLevel, + pub(crate) last_scene: Option, +} + +pub(crate) struct LastSceneInfo { + pub(crate) scene_id: SceneId, + pub(crate) scene_epoch: SceneEpoch, + pub(crate) draw_segment_ranges: Vec>, + pub(crate) clip_segment_ranges: Vec>, +} + +#[derive(Clone, Copy, PartialEq, Debug)] +pub struct SceneEpoch { + pub hi: u64, + pub lo: u64, +} + +impl SceneEpoch { + #[inline] + fn new(hi: u64, lo: u64) -> SceneEpoch { + SceneEpoch { hi, lo } + } + + #[inline] + fn successor(&self) -> SceneEpoch { + if self.lo == u64::MAX { + SceneEpoch { hi: self.hi + 1, lo: 0 } + } else { + SceneEpoch { hi: self.hi, lo: self.lo + 1 } } } + + #[inline] + fn next(&mut self) { + *self = self.successor(); + } } -pub struct PathIter<'a> { - scene: &'a Scene, - pos: usize -} - -impl<'a> Iterator for PathIter<'a> { - type Item = (&'a Paint, &'a Outline, &'a str); - fn next(&mut self) -> Option { - let item = self.scene.paths.get(self.pos).map(|path_object| { - ( - self.scene.palette.paints.get(path_object.paint.0 as usize).unwrap(), - &path_object.outline, - &*path_object.name - ) - }); - self.pos += 1; - item +impl<'a> SceneSink<'a> { + #[inline] + pub fn new(listener: RenderCommandListener<'a>, renderer_level: RendererLevel) + -> SceneSink<'a> { + SceneSink { listener, renderer_level, last_scene: None } } } #[derive(Clone, Debug)] pub struct DrawPath { - outline: Outline, - paint: PaintId, - clip_path: Option, - fill_rule: FillRule, - blend_mode: BlendMode, - name: String, + pub outline: Outline, + pub paint: PaintId, + pub clip_path: Option, + pub fill_rule: FillRule, + pub blend_mode: BlendMode, + pub name: String, } #[derive(Clone, Debug)] pub struct ClipPath { - outline: Outline, - fill_rule: FillRule, - name: String, + pub outline: Outline, + pub fill_rule: FillRule, + pub name: String, } -#[derive(Clone, Copy, Debug)] +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub struct DrawPathId(pub u32); + +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] pub struct ClipPathId(pub u32); +/// Either a draw path ID or a clip path ID, depending on context. +#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)] +pub struct PathId(pub u32); + #[derive(Clone, Debug)] pub struct RenderTarget { size: Vector2I, @@ -305,7 +376,7 @@ pub struct RenderTarget { #[derive(Clone, Debug)] pub enum DisplayItem { /// Draws paths to the render target on top of the stack. - DrawPaths { start_index: u32, end_index: u32 }, + DrawPaths(Range), /// Pushes a render target onto the top of the stack. PushRenderTarget(RenderTargetId), @@ -411,3 +482,29 @@ impl RenderTarget { self.size } } + +impl DrawPathId { + #[inline] + pub(crate) fn to_path_id(self) -> PathId { + PathId(self.0) + } +} + +impl ClipPathId { + #[inline] + pub(crate) fn to_path_id(self) -> PathId { + PathId(self.0) + } +} + +impl PathId { + #[inline] + pub(crate) fn to_clip_path_id(self) -> ClipPathId { + ClipPathId(self.0) + } + + #[inline] + pub(crate) fn to_draw_path_id(self) -> DrawPathId { + DrawPathId(self.0) + } +} diff --git a/renderer/src/tile_map.rs b/renderer/src/tile_map.rs index 8b86a858..3922b6b3 100644 --- a/renderer/src/tile_map.rs +++ b/renderer/src/tile_map.rs @@ -11,35 +11,23 @@ use pathfinder_geometry::rect::RectI; use pathfinder_geometry::vector::{Vector2I, vec2i}; -#[derive(Debug)] -pub struct DenseTileMap { +#[derive(Clone, Debug)] +pub struct DenseTileMap where T: Clone + Copy { pub data: Vec, pub rect: RectI, } -impl DenseTileMap { +impl DenseTileMap where T: Clone + Copy { #[inline] - pub fn new(rect: RectI) -> DenseTileMap - where - T: Copy + Clone + Default, - { - let length = rect.size().x() as usize * rect.size().y() as usize; - DenseTileMap { - data: vec![T::default(); length], - rect, - } - } - - #[inline] - pub fn from_builder(build: F, rect: RectI) -> DenseTileMap - where - F: FnMut(usize) -> T, - { - let length = rect.size().x() as usize * rect.size().y() as usize; - DenseTileMap { - data: (0..length).map(build).collect(), - rect, + pub fn from_builder(mut build: F, rect: RectI) -> DenseTileMap + where F: FnMut(Vector2I) -> T { + let mut data = Vec::with_capacity(rect.size().x() as usize * rect.size().y() as usize); + for y in rect.min_y()..rect.max_y() { + for x in rect.min_x()..rect.max_x() { + data.push(build(vec2i(x, y))); + } } + DenseTileMap { data, rect } } #[inline] @@ -47,6 +35,14 @@ impl DenseTileMap { self.coords_to_index(coords).and_then(|index| self.data.get(index)) } + #[inline] + pub fn get_mut(&mut self, coords: Vector2I) -> Option<&mut T> { + match self.coords_to_index(coords) { + None => None, + Some(index) => self.data.get_mut(index), + } + } + #[inline] pub fn coords_to_index(&self, coords: Vector2I) -> Option { if self.rect.contains_point(coords) { diff --git a/renderer/src/tiler.rs b/renderer/src/tiler.rs index 7f93ec36..cb0dcf6a 100644 --- a/renderer/src/tiler.rs +++ b/renderer/src/tiler.rs @@ -11,8 +11,12 @@ //! Implements the fast lattice-clipping algorithm from Nehab and Hoppe, "Random-Access Rendering //! of General Vector Graphics" 2006. -use crate::builder::{ObjectBuilder, Occluder, SceneBuilder, SolidTiles}; -use crate::tiles::{PackedTile, TILE_HEIGHT, TILE_WIDTH, TileType, TilingPathInfo}; +use crate::builder::{BuiltPath, BuiltPathBinCPUData, BuiltPathData, ObjectBuilder, SceneBuilder}; +use crate::gpu::options::RendererLevel; +use crate::gpu_data::AlphaTileId; +use crate::options::PrepareMode; +use crate::scene::PathId; +use crate::tiles::{DrawTilingPathInfo, TILE_HEIGHT, TILE_WIDTH, TilingPathInfo}; use pathfinder_content::fill::FillRule; use pathfinder_content::outline::{ContourIterFlags, Outline}; use pathfinder_content::segment::Segment; @@ -23,102 +27,172 @@ use pathfinder_simd::default::{F32x2, U32x2}; const FLATTENING_TOLERANCE: f32 = 0.25; -pub(crate) struct Tiler<'a, 'b> { - scene_builder: &'a SceneBuilder<'b, 'a>, +pub(crate) struct Tiler<'a, 'b, 'c, 'd> { + scene_builder: &'a SceneBuilder<'b, 'a, 'c, 'd>, pub(crate) object_builder: ObjectBuilder, outline: &'a Outline, - path_info: TilingPathInfo<'a>, + clip_path: Option<&'a BuiltPath>, } -impl<'a, 'b> Tiler<'a, 'b> { - pub(crate) fn new(scene_builder: &'a SceneBuilder<'b, 'a>, +impl<'a, 'b, 'c, 'd> Tiler<'a, 'b, 'c, 'd> { + pub(crate) fn new(scene_builder: &'a SceneBuilder<'b, 'a, 'c, 'd>, + path_id: PathId, outline: &'a Outline, fill_rule: FillRule, view_box: RectF, - path_info: TilingPathInfo<'a>) - -> Tiler<'a, 'b> { + prepare_mode: &PrepareMode, + built_clip_paths: &'a [BuiltPath], + path_info: TilingPathInfo) + -> Tiler<'a, 'b, 'c, 'd> { let bounds = outline.bounds().intersection(view_box).unwrap_or(RectF::default()); - let object_builder = ObjectBuilder::new(bounds, view_box, fill_rule, &path_info); - Tiler { scene_builder, object_builder, outline, path_info } + + let clip_path = match path_info { + TilingPathInfo::Draw(DrawTilingPathInfo { clip_path_id: Some(clip_path_id), .. }) => { + Some(&built_clip_paths[clip_path_id.0 as usize]) + } + _ => None, + }; + + let object_builder = ObjectBuilder::new(path_id, + bounds, + view_box, + fill_rule, + prepare_mode, + &path_info); + + Tiler { scene_builder, object_builder, outline, clip_path } } pub(crate) fn generate_tiles(&mut self) { + match self.object_builder.built_path.data { + BuiltPathData::CPU(_) => { + self.generate_fills(); + self.prepare_tiles(); + } + BuiltPathData::TransformCPUBinGPU(ref mut data) => { + data.outline = (*self.outline).clone(); + } + BuiltPathData::GPU => { + panic!("Shouldn't have generated a tiler at all if we're transforming on GPU!") + } + } + } + + fn generate_fills(&mut self) { + debug_assert_eq!(self.scene_builder.sink.renderer_level, RendererLevel::D3D9); + for contour in self.outline.contours() { for segment in contour.iter(ContourIterFlags::empty()) { process_segment(&segment, self.scene_builder, &mut self.object_builder); } } - - self.propagate_backdrops(); - self.pack_and_cull(); } - fn propagate_backdrops(&mut self) { - let tiles_across = self.object_builder.built_path.tiles.rect.width() as usize; - for (draw_tile_index, draw_tile) in self.object_builder - .built_path - .tiles - .data - .iter_mut() - .enumerate() { - let column = draw_tile_index % tiles_across; - let delta = draw_tile.backdrop; - draw_tile.backdrop = self.object_builder.current_backdrops[column]; - self.object_builder.current_backdrops[column] += delta; - } - } - - fn pack_and_cull(&mut self) { - let draw_tiling_path_info = match self.path_info { - TilingPathInfo::Clip => return, - TilingPathInfo::Draw(draw_tiling_path_info) => draw_tiling_path_info, + fn prepare_tiles(&mut self) { + // Don't do this here if the GPU will do it. + let (backdrops, tiles, clips) = match self.object_builder.built_path.data { + BuiltPathData::CPU(ref mut tiled_data) => { + (&mut tiled_data.backdrops, &mut tiled_data.tiles, &mut tiled_data.clip_tiles) + } + BuiltPathData::TransformCPUBinGPU(_) | BuiltPathData::GPU => { + panic!("We shouldn't be preparing tiles on CPU!") + } }; - let blend_mode_is_destructive = draw_tiling_path_info.blend_mode.is_destructive(); + // Propagate backdrops. + let tiles_across = tiles.rect.width() as usize; + for (draw_tile_index, draw_tile) in tiles.data.iter_mut().enumerate() { + let tile_coords = vec2i(draw_tile.tile_x as i32, draw_tile.tile_y as i32); + let column = draw_tile_index % tiles_across; + let delta = draw_tile.backdrop as i32; - for (draw_tile_index, draw_tile) in self.object_builder - .built_path - .tiles - .data - .iter() - .enumerate() { - let packed_tile = PackedTile::new(draw_tile_index as u32, - draw_tile, - &draw_tiling_path_info, - &self.object_builder); + let mut draw_alpha_tile_id = draw_tile.alpha_tile_id; + let mut draw_tile_backdrop = backdrops[column] as i8; - match packed_tile.tile_type { - TileType::Solid => { - match self.object_builder.built_path.solid_tiles { - SolidTiles::Occluders(ref mut occluders) => { - occluders.push(Occluder::new(packed_tile.tile_coords)); - } - SolidTiles::Regular(ref mut solid_tiles) => { - packed_tile.add_to(solid_tiles, - &mut self.object_builder.built_path.clip_tiles, - &draw_tiling_path_info, - &self.scene_builder); + if let Some(built_clip_path) = self.clip_path { + let clip_tiles = match built_clip_path.data { + BuiltPathData::CPU(BuiltPathBinCPUData { ref tiles, .. }) => tiles, + _ => unreachable!(), + }; + match clip_tiles.get(tile_coords) { + Some(clip_tile) => { + if clip_tile.alpha_tile_id != AlphaTileId(!0) && + draw_alpha_tile_id != AlphaTileId(!0) { + // Hard case: We have an alpha tile and a clip tile with masks. Add a + // job to combine the two masks. Because the mask combining step + // applies the backdrops, zero out the backdrop in the draw tile itself + // so that we don't double-count it. + let clip = clips.as_mut() + .expect("Where are the clips?") + .get_mut(tile_coords) + .unwrap(); + clip.dest_tile_id = draw_tile.alpha_tile_id; + clip.dest_backdrop = draw_tile_backdrop as i32; + clip.src_tile_id = clip_tile.alpha_tile_id; + clip.src_backdrop = clip_tile.backdrop as i32; + draw_tile_backdrop = 0; + } else if clip_tile.alpha_tile_id != AlphaTileId(!0) && + draw_alpha_tile_id == AlphaTileId(!0) && + draw_tile_backdrop != 0 { + // This is a solid draw tile, but there's a clip applied. Replace it + // with an alpha tile pointing directly to the clip mask. + draw_alpha_tile_id = clip_tile.alpha_tile_id; + draw_tile_backdrop = clip_tile.backdrop; + } else if clip_tile.alpha_tile_id == AlphaTileId(!0) && + clip_tile.backdrop == 0 { + // This is a blank clip tile. Cull the draw tile entirely. + draw_alpha_tile_id = AlphaTileId(!0); + draw_tile_backdrop = 0; } } - } - TileType::SingleMask => { - debug_assert_ne!(packed_tile.draw_tile.alpha_tile_id.page(), !0); - packed_tile.add_to(&mut self.object_builder.built_path.single_mask_tiles, - &mut self.object_builder.built_path.clip_tiles, - &draw_tiling_path_info, - &self.scene_builder); - } - TileType::Empty if blend_mode_is_destructive => { - packed_tile.add_to(&mut self.object_builder.built_path.empty_tiles, - &mut self.object_builder.built_path.clip_tiles, - &draw_tiling_path_info, - &self.scene_builder); - } - TileType::Empty => { - // Just cull. + None => { + // This draw tile is outside the clip path rect. Cull the tile. + draw_alpha_tile_id = AlphaTileId(!0); + draw_tile_backdrop = 0; + } } } + + draw_tile.alpha_tile_id = draw_alpha_tile_id; + draw_tile.backdrop = draw_tile_backdrop; + + backdrops[column] += delta; } + + /* + + // Calculate clips. + let built_clip_path = match self.path_info { + TilingPathInfo::Draw(DrawTilingPathInfo { + built_clip_path: Some(built_clip_path), + .. + }) => built_clip_path, + _ => return, + }; + + let clip_tiles = self.object_builder + .built_path + .clip_tiles + .as_mut() + .expect("Where are the clip tiles?"); + + for draw_tile in &mut self.object_builder.built_path.tiles.data { + let tile_coords = vec2i(draw_tile.tile_x as i32, draw_tile.tile_y as i32); + let built_clip_tile = match built_clip_path.tiles.get(tile_coords) { + None => { + draw_tile.alpha_tile_id = AlphaTileId(!0); + continue; + } + Some(built_clip_tile) => built_clip_tile, + }; + + let clip_tile = clip_tiles.get_mut(tile_coords).unwrap(); + clip_tile.dest_tile_id = draw_tile.alpha_tile_id; + clip_tile.dest_backdrop = draw_tile.backdrop as i32; + clip_tile.src_tile_id = built_clip_tile.alpha_tile_id; + clip_tile.src_backdrop = built_clip_tile.backdrop as i32; + } + */ } } @@ -165,8 +239,8 @@ fn process_line_segment(line_segment: LineSegment2F, // Compute `step = vec2f(vector.x < 0 ? -1 : 1, vector.y < 0 ? -1 : 1)`. let step = Vector2I((vector_is_negative | U32x2::splat(1)).to_i32x2()); - // Compute `first_tile_crossing = (from_tile_coords + vec2i(vector.x > 0 ? 1 : 0, - // vector.y > 0 ? 1 : 0)) * tile_size`. + // Compute `first_tile_crossing = (from_tile_coords + vec2i(vector.x >= 0 ? 1 : 0, + // vector.y >= 0 ? 1 : 0)) * tile_size`. let first_tile_crossing = (from_tile_coords + Vector2I((!vector_is_negative & U32x2::splat(1)).to_i32x2())).to_f32() * tile_size; diff --git a/renderer/src/tiles.rs b/renderer/src/tiles.rs index 4b2777b6..a56406c2 100644 --- a/renderer/src/tiles.rs +++ b/renderer/src/tiles.rs @@ -8,33 +8,33 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. -use crate::builder::{BuiltPath, ObjectBuilder}; -use crate::gpu_data::{AlphaTileId, TileObjectPrimitive}; -use crate::paint::{PaintId, PaintMetadata}; +use crate::gpu_data::{TILE_CTRL_MASK_0_SHIFT, TILE_CTRL_MASK_EVEN_ODD}; +use crate::gpu_data::{TILE_CTRL_MASK_WINDING, TileObjectPrimitive}; +use crate::paint::PaintId; +use crate::scene::ClipPathId; use pathfinder_content::effects::BlendMode; use pathfinder_content::fill::FillRule; use pathfinder_geometry::rect::{RectF, RectI}; -use pathfinder_geometry::vector::{Vector2I, vec2f}; +use pathfinder_geometry::vector::vec2f; pub const TILE_WIDTH: u32 = 16; pub const TILE_HEIGHT: u32 = 16; #[derive(Clone, Copy)] -pub(crate) enum TilingPathInfo<'a> { +pub(crate) enum TilingPathInfo { Clip, - Draw(DrawTilingPathInfo<'a>), + Draw(DrawTilingPathInfo), } #[derive(Clone, Copy)] -pub(crate) struct DrawTilingPathInfo<'a> { +pub(crate) struct DrawTilingPathInfo { pub(crate) paint_id: PaintId, - pub(crate) paint_metadata: &'a PaintMetadata, pub(crate) blend_mode: BlendMode, - pub(crate) built_clip_path: Option<&'a BuiltPath>, pub(crate) fill_rule: FillRule, + pub(crate) clip_path_id: Option, } -impl<'a> TilingPathInfo<'a> { +impl TilingPathInfo { pub(crate) fn has_destructive_blend_mode(&self) -> bool { match *self { TilingPathInfo::Draw(ref draw_tiling_path_info) => { @@ -43,126 +43,23 @@ impl<'a> TilingPathInfo<'a> { TilingPathInfo::Clip => false, } } -} -pub(crate) struct PackedTile<'a> { - pub(crate) tile_type: TileType, - pub(crate) tile_coords: Vector2I, - pub(crate) draw_tile: &'a TileObjectPrimitive, - pub(crate) clip_tile: Option<&'a TileObjectPrimitive>, -} - -#[derive(Clone, Copy, PartialEq)] -pub(crate) enum TileType { - Solid, - Empty, - SingleMask, -} - -impl<'a> PackedTile<'a> { - pub(crate) fn new(draw_tile_index: u32, - draw_tile: &'a TileObjectPrimitive, - draw_tiling_path_info: &DrawTilingPathInfo<'a>, - object_builder: &ObjectBuilder) - -> PackedTile<'a> { - let tile_coords = object_builder.local_tile_index_to_coords(draw_tile_index as u32); - - // First, if the draw tile is empty, cull it regardless of clip. - if draw_tile.is_solid() { - match (object_builder.built_path.fill_rule, draw_tile.backdrop) { - (FillRule::Winding, 0) => { - return PackedTile { - tile_type: TileType::Empty, - tile_coords, - draw_tile, - clip_tile: None, - }; - } - (FillRule::Winding, _) => {} - (FillRule::EvenOdd, backdrop) if backdrop % 2 == 0 => { - return PackedTile { - tile_type: TileType::Empty, - tile_coords, - draw_tile, - clip_tile: None, - }; - } - (FillRule::EvenOdd, _) => {} - } - } - - // Figure out what clip tile we need, if any. - let clip_tile = match draw_tiling_path_info.built_clip_path { - None => None, - Some(built_clip_path) => { - match built_clip_path.tiles.get(tile_coords) { - None => { - // This tile is outside of the bounds of the clip path entirely. We can - // cull it. - return PackedTile { - tile_type: TileType::Empty, - tile_coords, - draw_tile, - clip_tile: None, - }; + pub(crate) fn to_ctrl(&self) -> u8 { + let mut ctrl = 0; + match *self { + TilingPathInfo::Draw(ref draw_tiling_path_info) => { + match draw_tiling_path_info.fill_rule { + FillRule::EvenOdd => { + ctrl |= (TILE_CTRL_MASK_EVEN_ODD << TILE_CTRL_MASK_0_SHIFT) as u8 } - Some(clip_tile) if clip_tile.is_solid() => { - if clip_tile.backdrop != 0 { - // The clip tile is fully opaque, so this tile isn't clipped at - // all. - None - } else { - // This tile is completely clipped out. Cull it. - return PackedTile { - tile_type: TileType::Empty, - tile_coords, - draw_tile, - clip_tile: None, - }; - } + FillRule::Winding => { + ctrl |= (TILE_CTRL_MASK_WINDING << TILE_CTRL_MASK_0_SHIFT) as u8 } - Some(clip_tile) => Some(clip_tile), - } - } - }; - - // Choose a tile type. - match clip_tile { - None if draw_tile.is_solid() => { - // This is a solid tile that completely occludes the background. - PackedTile { tile_type: TileType::Solid, tile_coords, draw_tile, clip_tile } - } - None => { - // We have a draw tile and no clip tile. - PackedTile { - tile_type: TileType::SingleMask, - tile_coords, - draw_tile, - clip_tile: None, - } - } - Some(clip_tile) if draw_tile.is_solid() => { - // We have a solid draw tile and a clip tile. This is effectively the same as - // having a draw tile and no clip tile. - // - // FIXME(pcwalton): This doesn't preserve the fill rule of the clip path! - PackedTile { - tile_type: TileType::SingleMask, - tile_coords, - draw_tile: clip_tile, - clip_tile: None, - } - } - Some(clip_tile) => { - // We have both a draw and clip mask. Composite them together. - PackedTile { - tile_type: TileType::SingleMask, - tile_coords, - draw_tile, - clip_tile: Some(clip_tile), } } + TilingPathInfo::Clip => {} } + ctrl } } @@ -170,13 +67,6 @@ pub fn round_rect_out_to_tile_bounds(rect: RectF) -> RectI { (rect * vec2f(1.0 / TILE_WIDTH as f32, 1.0 / TILE_HEIGHT as f32)).round_out().to_i32() } -impl Default for TileObjectPrimitive { - #[inline] - fn default() -> TileObjectPrimitive { - TileObjectPrimitive { backdrop: 0, alpha_tile_id: AlphaTileId::invalid() } - } -} - impl TileObjectPrimitive { #[inline] pub fn is_solid(&self) -> bool { !self.alpha_tile_id.is_valid() } diff --git a/renderer/src/z_buffer.rs b/renderer/src/z_buffer.rs deleted file mode 100644 index ce67e29d..00000000 --- a/renderer/src/z_buffer.rs +++ /dev/null @@ -1,122 +0,0 @@ -// pathfinder/renderer/src/z_buffer.rs -// -// Copyright © 2019 The Pathfinder Project Developers. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -//! Software occlusion culling. - -use crate::builder::Occluder; -use crate::gpu_data::{Tile, TileBatch}; -use crate::paint::{PaintId, PaintMetadata}; -use crate::tile_map::DenseTileMap; -use crate::tiles; -use pathfinder_content::effects::BlendMode; -use pathfinder_geometry::rect::RectF; -use pathfinder_geometry::vector::Vector2I; -use vec_map::VecMap; - -pub(crate) struct ZBuffer { - buffer: DenseTileMap, - depth_metadata: VecMap, -} - -pub(crate) struct SolidTiles { - pub(crate) batches: Vec, -} - -#[derive(Clone, Copy)] -pub(crate) struct DepthMetadata { - pub(crate) paint_id: PaintId, -} -impl ZBuffer { - pub(crate) fn new(view_box: RectF) -> ZBuffer { - let tile_rect = tiles::round_rect_out_to_tile_bounds(view_box); - ZBuffer { - buffer: DenseTileMap::from_builder(|_| 0, tile_rect), - depth_metadata: VecMap::new(), - } - } - - pub(crate) fn test(&self, coords: Vector2I, depth: u32) -> bool { - let tile_index = self.buffer.coords_to_index_unchecked(coords); - self.buffer.data[tile_index as usize] < depth - } - - pub(crate) fn update(&mut self, - solid_tiles: &[Occluder], - depth: u32, - metadata: DepthMetadata) { - self.depth_metadata.insert(depth as usize, metadata); - for solid_tile in solid_tiles { - let tile_index = self.buffer.coords_to_index_unchecked(solid_tile.coords); - let z_dest = &mut self.buffer.data[tile_index as usize]; - *z_dest = u32::max(*z_dest, depth); - } - } - - pub(crate) fn build_solid_tiles(&self, paint_metadata: &[PaintMetadata]) -> SolidTiles { - let mut solid_tiles = SolidTiles { batches: vec![] }; - - for tile_index in 0..self.buffer.data.len() { - let depth = self.buffer.data[tile_index]; - if depth == 0 { - continue; - } - - let tile_coords = self.buffer.index_to_coords(tile_index); - - let depth_metadata = self.depth_metadata[depth as usize]; - let paint_id = depth_metadata.paint_id; - let paint_metadata = &paint_metadata[paint_id.0 as usize]; - - let tile_position = tile_coords + self.buffer.rect.origin(); - - // Create a batch if necessary. - let paint_tile_batch_texture = paint_metadata.tile_batch_texture(); - let paint_filter = paint_metadata.filter(); - match solid_tiles.batches.last() { - Some(TileBatch { color_texture: tile_batch_texture, filter: tile_filter, .. }) if - *tile_batch_texture == paint_tile_batch_texture && - *tile_filter == paint_filter => {} - _ => { - // Batch break. - // - // TODO(pcwalton): We could be more aggressive with batching here, since we - // know there are no overlaps. - solid_tiles.batches.push(TileBatch { - color_texture: paint_tile_batch_texture, - tiles: vec![], - filter: paint_filter, - blend_mode: BlendMode::default(), - tile_page: !0, - }); - } - } - - let batch = solid_tiles.batches.last_mut().unwrap(); - batch.tiles.push(Tile::new_solid_from_paint_id(tile_position, paint_id)); - } - - solid_tiles - } -} - -impl Tile { - pub(crate) fn new_solid_from_paint_id(tile_origin: Vector2I, paint_id: PaintId) -> Tile { - Tile { - tile_x: tile_origin.x() as i16, - tile_y: tile_origin.y() as i16, - mask_0_backdrop: 0, - mask_0_u: 0, - mask_0_v: 0, - ctrl: 0, - pad: 0, - color: paint_id.0, - } - } -} diff --git a/resources/MANIFEST b/resources/MANIFEST index 85928b7a..40de1616 100644 --- a/resources/MANIFEST +++ b/resources/MANIFEST @@ -7,69 +7,88 @@ shaders/gl3/blit.fs.glsl shaders/gl3/blit.vs.glsl shaders/gl3/clear.fs.glsl shaders/gl3/clear.vs.glsl -shaders/gl3/debug_solid.fs.glsl -shaders/gl3/debug_solid.vs.glsl -shaders/gl3/debug_texture.fs.glsl -shaders/gl3/debug_texture.vs.glsl +shaders/gl3/d3d9/fill.fs.glsl +shaders/gl3/d3d9/fill.vs.glsl +shaders/gl3/d3d9/tile.fs.glsl +shaders/gl3/d3d9/tile.vs.glsl +shaders/gl3/d3d9/tile_clip_combine.fs.glsl +shaders/gl3/d3d9/tile_clip_combine.vs.glsl +shaders/gl3/d3d9/tile_clip_copy.fs.glsl +shaders/gl3/d3d9/tile_clip_copy.vs.glsl +shaders/gl3/d3d9/tile_copy.fs.glsl +shaders/gl3/d3d9/tile_copy.vs.glsl +shaders/gl3/debug/solid.fs.glsl +shaders/gl3/debug/solid.vs.glsl +shaders/gl3/debug/texture.fs.glsl +shaders/gl3/debug/texture.vs.glsl shaders/gl3/demo_ground.fs.glsl shaders/gl3/demo_ground.vs.glsl -shaders/gl3/fill.fs.glsl -shaders/gl3/fill.vs.glsl shaders/gl3/reproject.fs.glsl shaders/gl3/reproject.vs.glsl shaders/gl3/stencil.fs.glsl shaders/gl3/stencil.vs.glsl -shaders/gl3/tile.fs.glsl -shaders/gl3/tile.vs.glsl -shaders/gl3/tile_clip.fs.glsl -shaders/gl3/tile_clip.vs.glsl -shaders/gl3/tile_copy.fs.glsl -shaders/gl3/tile_copy.vs.glsl shaders/gl4/blit.fs.glsl shaders/gl4/blit.vs.glsl shaders/gl4/clear.fs.glsl shaders/gl4/clear.vs.glsl -shaders/gl4/debug_solid.fs.glsl -shaders/gl4/debug_solid.vs.glsl -shaders/gl4/debug_texture.fs.glsl -shaders/gl4/debug_texture.vs.glsl +shaders/gl4/d3d11/bin.cs.glsl +shaders/gl4/d3d11/bound.cs.glsl +shaders/gl4/d3d11/dice.cs.glsl +shaders/gl4/d3d11/fill.cs.glsl +shaders/gl4/d3d11/propagate.cs.glsl +shaders/gl4/d3d11/sort.cs.glsl +shaders/gl4/d3d11/tile.cs.glsl +shaders/gl4/d3d9/fill.fs.glsl +shaders/gl4/d3d9/fill.vs.glsl +shaders/gl4/d3d9/tile.fs.glsl +shaders/gl4/d3d9/tile.vs.glsl +shaders/gl4/d3d9/tile_clip_combine.fs.glsl +shaders/gl4/d3d9/tile_clip_combine.vs.glsl +shaders/gl4/d3d9/tile_clip_copy.fs.glsl +shaders/gl4/d3d9/tile_clip_copy.vs.glsl +shaders/gl4/d3d9/tile_copy.fs.glsl +shaders/gl4/d3d9/tile_copy.vs.glsl +shaders/gl4/debug/solid.fs.glsl +shaders/gl4/debug/solid.vs.glsl +shaders/gl4/debug/texture.fs.glsl +shaders/gl4/debug/texture.vs.glsl shaders/gl4/demo_ground.fs.glsl shaders/gl4/demo_ground.vs.glsl -shaders/gl4/fill.fs.glsl -shaders/gl4/fill.vs.glsl shaders/gl4/reproject.fs.glsl shaders/gl4/reproject.vs.glsl shaders/gl4/stencil.fs.glsl shaders/gl4/stencil.vs.glsl -shaders/gl4/tile.fs.glsl -shaders/gl4/tile.vs.glsl -shaders/gl4/tile_clip.fs.glsl -shaders/gl4/tile_clip.vs.glsl -shaders/gl4/tile_copy.fs.glsl -shaders/gl4/tile_copy.vs.glsl shaders/metal/blit.fs.metal shaders/metal/blit.vs.metal shaders/metal/clear.fs.metal shaders/metal/clear.vs.metal -shaders/metal/debug_solid.fs.metal -shaders/metal/debug_solid.vs.metal -shaders/metal/debug_texture.fs.metal -shaders/metal/debug_texture.vs.metal +shaders/metal/d3d11/bin.cs.metal +shaders/metal/d3d11/bound.cs.metal +shaders/metal/d3d11/dice.cs.metal +shaders/metal/d3d11/fill.cs.metal +shaders/metal/d3d11/propagate.cs.metal +shaders/metal/d3d11/sort.cs.metal +shaders/metal/d3d11/tile.cs.metal +shaders/metal/d3d9/fill.fs.metal +shaders/metal/d3d9/fill.vs.metal +shaders/metal/d3d9/tile.fs.metal +shaders/metal/d3d9/tile.vs.metal +shaders/metal/d3d9/tile_clip_combine.fs.metal +shaders/metal/d3d9/tile_clip_combine.vs.metal +shaders/metal/d3d9/tile_clip_copy.fs.metal +shaders/metal/d3d9/tile_clip_copy.vs.metal +shaders/metal/d3d9/tile_copy.fs.metal +shaders/metal/d3d9/tile_copy.vs.metal +shaders/metal/debug/solid.fs.metal +shaders/metal/debug/solid.vs.metal +shaders/metal/debug/texture.fs.metal +shaders/metal/debug/texture.vs.metal shaders/metal/demo_ground.fs.metal shaders/metal/demo_ground.vs.metal -shaders/metal/fill.cs.metal -shaders/metal/fill.fs.metal -shaders/metal/fill.vs.metal shaders/metal/reproject.fs.metal shaders/metal/reproject.vs.metal shaders/metal/stencil.fs.metal shaders/metal/stencil.vs.metal -shaders/metal/tile.fs.metal -shaders/metal/tile.vs.metal -shaders/metal/tile_clip.fs.metal -shaders/metal/tile_clip.vs.metal -shaders/metal/tile_copy.fs.metal -shaders/metal/tile_copy.vs.metal textures/area-lut.png textures/debug-corner-fill.png textures/debug-corner-outline.png diff --git a/resources/shaders/gl3/blit.fs.glsl b/resources/shaders/gl3/blit.fs.glsl index b32fc956..85092e27 100644 --- a/resources/shaders/gl3/blit.fs.glsl +++ b/resources/shaders/gl3/blit.fs.glsl @@ -26,6 +26,6 @@ out vec4 oFragColor; void main(){ vec4 color = texture(uSrc, vTexCoord); - oFragColor = vec4(color . rgb * color . a, color . a); + oFragColor = color; } diff --git a/resources/shaders/gl3/blit.vs.glsl b/resources/shaders/gl3/blit.vs.glsl index 6c9391d5..98909d95 100644 --- a/resources/shaders/gl3/blit.vs.glsl +++ b/resources/shaders/gl3/blit.vs.glsl @@ -18,16 +18,17 @@ precision highp float; +uniform vec4 uDestRect; +uniform vec2 uFramebufferSize; + in ivec2 aPosition; out vec2 vTexCoord; void main(){ + vec2 position = mix(uDestRect . xy, uDestRect . zw, vec2(aPosition))/ uFramebufferSize; vec2 texCoord = vec2(aPosition); - - - vTexCoord = texCoord; - gl_Position = vec4(mix(vec2(- 1.0), vec2(1.0), vec2(aPosition)), 0.0, 1.0); + gl_Position = vec4(mix(vec2(- 1.0), vec2(1.0), position), 0.0, 1.0); } diff --git a/resources/shaders/gl3/fill.fs.glsl b/resources/shaders/gl3/d3d9/fill.fs.glsl similarity index 100% rename from resources/shaders/gl3/fill.fs.glsl rename to resources/shaders/gl3/d3d9/fill.fs.glsl diff --git a/resources/shaders/gl3/d3d9/fill.vs.glsl b/resources/shaders/gl3/d3d9/fill.vs.glsl new file mode 100644 index 00000000..b6e3ad45 --- /dev/null +++ b/resources/shaders/gl3/d3d9/fill.vs.glsl @@ -0,0 +1,86 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + + + + + +uniform vec2 uFramebufferSize; +uniform vec2 uTileSize; + +in uvec2 aTessCoord; +in uvec4 aLineSegment; +in int aTileIndex; + +out vec2 vFrom; +out vec2 vTo; + +vec2 computeTileOffset(uint tileIndex, float stencilTextureWidth, vec2 tileSize){ + uint tilesPerRow = uint(stencilTextureWidth / tileSize . x); + uvec2 tileOffset = uvec2(tileIndex % tilesPerRow, tileIndex / tilesPerRow); + return vec2(tileOffset)* tileSize * vec2(1.0, 0.25); +} + +vec4 computeVertexPosition(uint tileIndex, + uvec2 tessCoord, + uvec4 packedLineSegment, + vec2 tileSize, + vec2 framebufferSize, + out vec2 outFrom, + out vec2 outTo){ + vec2 tileOrigin = computeTileOffset(uint(tileIndex), framebufferSize . x, tileSize); + + vec4 lineSegment = vec4(packedLineSegment)/ 256.0; + vec2 from = lineSegment . xy, to = lineSegment . zw; + + vec2 position; + if(tessCoord . x == 0u) + position . x = floor(min(from . x, to . x)); + else + position . x = ceil(max(from . x, to . x)); + if(tessCoord . y == 0u) + position . y = floor(min(from . y, to . y)); + else + position . y = tileSize . y; + position . y = floor(position . y * 0.25); + + + + + + vec2 offset = vec2(0.0, 1.5)- position * vec2(1.0, 4.0); + outFrom = from + offset; + outTo = to + offset; + + vec2 globalPosition =(tileOrigin + position)/ framebufferSize * 2.0 - 1.0; + + + + return vec4(globalPosition, 0.0, 1.0); +} + +void main(){ + gl_Position = computeVertexPosition(uint(aTileIndex), + aTessCoord, + aLineSegment, + uTileSize, + uFramebufferSize, + vFrom, + vTo); +} + diff --git a/resources/shaders/gl4/tile.fs.glsl b/resources/shaders/gl3/d3d9/tile.fs.glsl similarity index 86% rename from resources/shaders/gl4/tile.fs.glsl rename to resources/shaders/gl3/d3d9/tile.fs.glsl index 33815bd9..ea281c03 100644 --- a/resources/shaders/gl4/tile.fs.glsl +++ b/resources/shaders/gl3/d3d9/tile.fs.glsl @@ -12,27 +12,6 @@ - - - - - - - - - - - - - - - - - - - - - #extension GL_GOOGLE_include_directive : enable precision highp float; @@ -83,24 +62,37 @@ precision highp float; -uniform sampler2D uColorTexture0; -uniform sampler2D uMaskTexture0; -uniform sampler2D uDestTexture; -uniform sampler2D uGammaLUT; -uniform vec2 uColorTextureSize0; -uniform vec2 uMaskTextureSize0; -uniform vec4 uFilterParams0; -uniform vec4 uFilterParams1; -uniform vec4 uFilterParams2; -uniform vec2 uFramebufferSize; -uniform int uCtrl; -in vec3 vMaskTexCoord0; -in vec2 vColorTexCoord0; -in vec4 vBaseColor; -in float vTileCtrl; -out vec4 oFragColor; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -567,27 +559,42 @@ float sampleMask(float maskAlpha, -void calculateColor(int tileCtrl, int ctrl){ +vec4 calculateColor(vec2 fragCoord, + sampler2D colorTexture0, + sampler2D maskTexture0, + sampler2D destTexture, + sampler2D gammaLUT, + vec2 colorTextureSize0, + vec2 maskTextureSize0, + vec4 filterParams0, + vec4 filterParams1, + vec4 filterParams2, + vec2 framebufferSize, + int ctrl, + vec3 maskTexCoord0, + vec2 colorTexCoord0, + vec4 baseColor, + int tileCtrl){ int maskCtrl0 =(tileCtrl >> 0)& 0x3; float maskAlpha = 1.0; - maskAlpha = sampleMask(maskAlpha, uMaskTexture0, uMaskTextureSize0, vMaskTexCoord0, maskCtrl0); + maskAlpha = sampleMask(maskAlpha, maskTexture0, maskTextureSize0, maskTexCoord0, maskCtrl0); - vec4 color = vBaseColor; + vec4 color = baseColor; int color0Combine =(ctrl >> 6)& 0x3; if(color0Combine != 0){ int color0Filter =(ctrl >> 4)& 0x3; - vec4 color0 = filterColor(vColorTexCoord0, - uColorTexture0, - uGammaLUT, - uColorTextureSize0, - gl_FragCoord . xy, - uFramebufferSize, - uFilterParams0, - uFilterParams1, - uFilterParams2, + vec4 color0 = filterColor(colorTexCoord0, + colorTexture0, + gammaLUT, + colorTextureSize0, + fragCoord, + framebufferSize, + filterParams0, + filterParams1, + filterParams2, color0Filter); color = combineColor0(color, color0, color0Combine); } @@ -597,18 +604,53 @@ void calculateColor(int tileCtrl, int ctrl){ int compositeOp =(ctrl >> 8)& 0xf; - color = composite(color, uDestTexture, uFramebufferSize, gl_FragCoord . xy, compositeOp); + color = composite(color, destTexture, framebufferSize, fragCoord, compositeOp); color . rgb *= color . a; - oFragColor = color; + return color; } +uniform sampler2D uColorTexture0; +uniform sampler2D uMaskTexture0; +uniform sampler2D uDestTexture; +uniform sampler2D uGammaLUT; +uniform vec2 uColorTextureSize0; +uniform vec2 uMaskTextureSize0; +uniform vec2 uFramebufferSize; + +in vec3 vMaskTexCoord0; +in vec2 vColorTexCoord0; +in vec4 vBaseColor; +in float vTileCtrl; +in vec4 vFilterParams0; +in vec4 vFilterParams1; +in vec4 vFilterParams2; +in float vCtrl; + +out vec4 oFragColor; + + void main(){ - calculateColor(int(vTileCtrl), uCtrl); + oFragColor = calculateColor(gl_FragCoord . xy, + uColorTexture0, + uMaskTexture0, + uDestTexture, + uGammaLUT, + uColorTextureSize0, + uMaskTextureSize0, + vFilterParams0, + vFilterParams1, + vFilterParams2, + uFramebufferSize, + int(vCtrl), + vMaskTexCoord0, + vColorTexCoord0, + vBaseColor, + int(vTileCtrl)); } diff --git a/resources/shaders/gl3/d3d9/tile.vs.glsl b/resources/shaders/gl3/d3d9/tile.vs.glsl new file mode 100644 index 00000000..8257d318 --- /dev/null +++ b/resources/shaders/gl3/d3d9/tile.vs.glsl @@ -0,0 +1,123 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + + + + + + + + + + + + + + + + +vec4 fetchUnscaled(sampler2D srcTexture, vec2 scale, vec2 originCoord, int entry){ + return texture(srcTexture,(originCoord + vec2(0.5)+ vec2(entry, 0))* scale); +} + +void computeTileVaryings(vec2 position, + int colorEntry, + sampler2D textureMetadata, + ivec2 textureMetadataSize, + out vec2 outColorTexCoord0, + out vec4 outBaseColor, + out vec4 outFilterParams0, + out vec4 outFilterParams1, + out vec4 outFilterParams2, + out int outCtrl){ + vec2 metadataScale = vec2(1.0)/ vec2(textureMetadataSize); + vec2 metadataEntryCoord = vec2(colorEntry % 128 * 8, colorEntry / 128); + vec4 colorTexMatrix0 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 0); + vec4 colorTexOffsets = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 1); + vec4 baseColor = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 2); + vec4 filterParams0 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 3); + vec4 filterParams1 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 4); + vec4 filterParams2 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 5); + vec4 extra = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 6); + outColorTexCoord0 = mat2(colorTexMatrix0)* position + colorTexOffsets . xy; + outBaseColor = baseColor; + outFilterParams0 = filterParams0; + outFilterParams1 = filterParams1; + outFilterParams2 = filterParams2; + outCtrl = int(extra . x); +} + + +uniform mat4 uTransform; +uniform vec2 uTileSize; +uniform sampler2D uTextureMetadata; +uniform ivec2 uTextureMetadataSize; +uniform sampler2D uZBuffer; +uniform ivec2 uZBufferSize; + +in ivec2 aTileOffset; +in ivec2 aTileOrigin; +in uvec4 aMaskTexCoord0; +in ivec2 aCtrlBackdrop; +in int aPathIndex; +in int aColor; + +out vec3 vMaskTexCoord0; +out vec2 vColorTexCoord0; +out vec4 vBaseColor; +out float vTileCtrl; +out vec4 vFilterParams0; +out vec4 vFilterParams1; +out vec4 vFilterParams2; +out float vCtrl; + +void main(){ + vec2 tileOrigin = vec2(aTileOrigin), tileOffset = vec2(aTileOffset); + vec2 position =(tileOrigin + tileOffset)* uTileSize; + + ivec4 zValue = ivec4(texture(uZBuffer,(tileOrigin + vec2(0.5))/ vec2(uZBufferSize))* 255.0); + if(aPathIndex <(zValue . x |(zValue . y << 8)|(zValue . z << 16)|(zValue . w << 24))){ + gl_Position = vec4(0.0); + return; + } + + uvec2 maskTileCoord = uvec2(aMaskTexCoord0 . x, aMaskTexCoord0 . y + 256u * aMaskTexCoord0 . z); + vec2 maskTexCoord0 =(vec2(maskTileCoord)+ tileOffset)* uTileSize; + if(aCtrlBackdrop . y == 0 && aMaskTexCoord0 . w != 0u){ + gl_Position = vec4(0.0); + return; + } + + int ctrl; + computeTileVaryings(position, + aColor, + uTextureMetadata, + uTextureMetadataSize, + vColorTexCoord0, + vBaseColor, + vFilterParams0, + vFilterParams1, + vFilterParams2, + ctrl); + + vTileCtrl = float(aCtrlBackdrop . x); + vCtrl = float(ctrl); + vMaskTexCoord0 = vec3(maskTexCoord0, float(aCtrlBackdrop . y)); + gl_Position = uTransform * vec4(position, 0.0, 1.0); +} + diff --git a/resources/shaders/gl3/d3d9/tile_clip_combine.fs.glsl b/resources/shaders/gl3/d3d9/tile_clip_combine.fs.glsl new file mode 100644 index 00000000..b097b871 --- /dev/null +++ b/resources/shaders/gl3/d3d9/tile_clip_combine.fs.glsl @@ -0,0 +1,34 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + +precision highp float; + + + + + +uniform sampler2D uSrc; + +in vec2 vTexCoord0; +in float vBackdrop0; +in vec2 vTexCoord1; +in float vBackdrop1; + +out vec4 oFragColor; + +void main(){ + oFragColor = min(abs(texture(uSrc, vTexCoord0)+ vBackdrop0), + abs(texture(uSrc, vTexCoord1)+ vBackdrop1)); +} + diff --git a/resources/shaders/gl3/d3d9/tile_clip_combine.vs.glsl b/resources/shaders/gl3/d3d9/tile_clip_combine.vs.glsl new file mode 100644 index 00000000..38f85f04 --- /dev/null +++ b/resources/shaders/gl3/d3d9/tile_clip_combine.vs.glsl @@ -0,0 +1,54 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + +precision highp float; + + + + + +uniform vec2 uFramebufferSize; + +in ivec2 aTileOffset; +in int aDestTileIndex; +in int aDestBackdrop; +in int aSrcTileIndex; +in int aSrcBackdrop; + +out vec2 vTexCoord0; +out float vBackdrop0; +out vec2 vTexCoord1; +out float vBackdrop1; + +void main(){ + vec2 destPosition = vec2(ivec2(aDestTileIndex % 256, aDestTileIndex / 256)+ aTileOffset); + vec2 srcPosition = vec2(ivec2(aSrcTileIndex % 256, aSrcTileIndex / 256)+ aTileOffset); + destPosition *= vec2(16.0, 4.0)/ uFramebufferSize; + srcPosition *= vec2(16.0, 4.0)/ uFramebufferSize; + + vTexCoord0 = destPosition; + vTexCoord1 = srcPosition; + + vBackdrop0 = float(aDestBackdrop); + vBackdrop1 = float(aSrcBackdrop); + + if(aDestTileIndex < 0) + destPosition = vec2(0.0); + + + + + gl_Position = vec4(mix(vec2(- 1.0), vec2(1.0), destPosition), 0.0, 1.0); +} + diff --git a/resources/shaders/gl3/tile_clip.fs.glsl b/resources/shaders/gl3/d3d9/tile_clip_copy.fs.glsl similarity index 69% rename from resources/shaders/gl3/tile_clip.fs.glsl rename to resources/shaders/gl3/d3d9/tile_clip_copy.fs.glsl index 9b8b6cd3..b1d846fe 100644 --- a/resources/shaders/gl3/tile_clip.fs.glsl +++ b/resources/shaders/gl3/d3d9/tile_clip_copy.fs.glsl @@ -21,11 +21,10 @@ precision highp float; uniform sampler2D uSrc; in vec2 vTexCoord; -in float vBackdrop; out vec4 oFragColor; void main(){ - oFragColor = clamp(abs(texture(uSrc, vTexCoord)+ vBackdrop), 0.0, 1.0); + oFragColor = texture(uSrc, vTexCoord); } diff --git a/resources/shaders/gl3/d3d9/tile_clip_copy.vs.glsl b/resources/shaders/gl3/d3d9/tile_clip_copy.vs.glsl new file mode 100644 index 00000000..04b2982f --- /dev/null +++ b/resources/shaders/gl3/d3d9/tile_clip_copy.vs.glsl @@ -0,0 +1,42 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + +precision highp float; + + + + + +uniform vec2 uFramebufferSize; + +in ivec2 aTileOffset; +in int aTileIndex; + +out vec2 vTexCoord; + +void main(){ + vec2 position = vec2(ivec2(aTileIndex % 256, aTileIndex / 256)+ aTileOffset); + position *= vec2(16.0, 4.0)/ uFramebufferSize; + + vTexCoord = position; + + if(aTileIndex < 0) + position = vec2(0.0); + + + + + gl_Position = vec4(mix(vec2(- 1.0), vec2(1.0), position), 0.0, 1.0); +} + diff --git a/resources/shaders/gl3/tile_copy.fs.glsl b/resources/shaders/gl3/d3d9/tile_copy.fs.glsl similarity index 100% rename from resources/shaders/gl3/tile_copy.fs.glsl rename to resources/shaders/gl3/d3d9/tile_copy.fs.glsl diff --git a/resources/shaders/gl3/tile_copy.vs.glsl b/resources/shaders/gl3/d3d9/tile_copy.vs.glsl similarity index 100% rename from resources/shaders/gl3/tile_copy.vs.glsl rename to resources/shaders/gl3/d3d9/tile_copy.vs.glsl diff --git a/resources/shaders/gl3/debug_solid.fs.glsl b/resources/shaders/gl3/debug/solid.fs.glsl similarity index 100% rename from resources/shaders/gl3/debug_solid.fs.glsl rename to resources/shaders/gl3/debug/solid.fs.glsl diff --git a/resources/shaders/gl3/debug_solid.vs.glsl b/resources/shaders/gl3/debug/solid.vs.glsl similarity index 100% rename from resources/shaders/gl3/debug_solid.vs.glsl rename to resources/shaders/gl3/debug/solid.vs.glsl diff --git a/resources/shaders/gl3/debug_texture.fs.glsl b/resources/shaders/gl3/debug/texture.fs.glsl similarity index 100% rename from resources/shaders/gl3/debug_texture.fs.glsl rename to resources/shaders/gl3/debug/texture.fs.glsl diff --git a/resources/shaders/gl3/debug_texture.vs.glsl b/resources/shaders/gl3/debug/texture.vs.glsl similarity index 100% rename from resources/shaders/gl3/debug_texture.vs.glsl rename to resources/shaders/gl3/debug/texture.vs.glsl diff --git a/resources/shaders/gl3/fill.vs.glsl b/resources/shaders/gl3/fill.vs.glsl deleted file mode 100644 index 58b0b7f0..00000000 --- a/resources/shaders/gl3/fill.vs.glsl +++ /dev/null @@ -1,71 +0,0 @@ -#version {{version}} -// Automatically generated from files in pathfinder/shaders/. Do not edit! - - - - - - - - - - - - -precision highp float; - - - - - -uniform vec2 uFramebufferSize; -uniform vec2 uTileSize; - -in uvec2 aTessCoord; -in uint aFromPx; -in uint aToPx; -in vec2 aFromSubpx; -in vec2 aToSubpx; -in uint aTileIndex; - -out vec2 vFrom; -out vec2 vTo; - -vec2 computeTileOffset(uint tileIndex, float stencilTextureWidth){ - uint tilesPerRow = uint(stencilTextureWidth / uTileSize . x); - uvec2 tileOffset = uvec2(tileIndex % tilesPerRow, tileIndex / tilesPerRow); - return vec2(tileOffset)* uTileSize * vec2(1.0, 0.25); -} - -void main(){ - vec2 tileOrigin = computeTileOffset(aTileIndex, uFramebufferSize . x); - - vec2 from = vec2(aFromPx & 15u, aFromPx >> 4u)+ aFromSubpx; - vec2 to = vec2(aToPx & 15u, aToPx >> 4u)+ aToSubpx; - - vec2 position; - if(aTessCoord . x == 0u) - position . x = floor(min(from . x, to . x)); - else - position . x = ceil(max(from . x, to . x)); - if(aTessCoord . y == 0u) - position . y = floor(min(from . y, to . y)); - else - position . y = uTileSize . y; - position . y = floor(position . y * 0.25); - - - - - - vec2 offset = vec2(0.0, 1.5)- position * vec2(1.0, 4.0); - vFrom = from + offset; - vTo = to + offset; - - vec2 globalPosition =(tileOrigin + position)/ uFramebufferSize * 2.0 - 1.0; - - - - gl_Position = vec4(globalPosition, 0.0, 1.0); -} - diff --git a/resources/shaders/gl3/tile.vs.glsl b/resources/shaders/gl3/tile.vs.glsl deleted file mode 100644 index 6f3819c0..00000000 --- a/resources/shaders/gl3/tile.vs.glsl +++ /dev/null @@ -1,59 +0,0 @@ -#version {{version}} -// Automatically generated from files in pathfinder/shaders/. Do not edit! - - - - - - - - - - - - -precision highp float; - - - - - -uniform mat4 uTransform; -uniform vec2 uTileSize; -uniform sampler2D uTextureMetadata; -uniform ivec2 uTextureMetadataSize; - -in ivec2 aTileOffset; -in ivec2 aTileOrigin; -in uvec2 aMaskTexCoord0; -in ivec2 aMaskBackdrop; -in int aColor; -in int aTileCtrl; - -out vec3 vMaskTexCoord0; -out vec2 vColorTexCoord0; -out vec4 vBaseColor; -out float vTileCtrl; - -void main(){ - vec2 tileOrigin = vec2(aTileOrigin), tileOffset = vec2(aTileOffset); - vec2 position =(tileOrigin + tileOffset)* uTileSize; - - vec2 maskTexCoord0 =(vec2(aMaskTexCoord0)+ tileOffset)* uTileSize; - - vec2 textureMetadataScale = vec2(1.0)/ vec2(uTextureMetadataSize); - vec2 metadataEntryCoord = vec2(aColor % 128 * 4, aColor / 128); - vec2 colorTexMatrix0Coord =(metadataEntryCoord + vec2(0.5, 0.5))* textureMetadataScale; - vec2 colorTexOffsetsCoord =(metadataEntryCoord + vec2(1.5, 0.5))* textureMetadataScale; - vec2 baseColorCoord =(metadataEntryCoord + vec2(2.5, 0.5))* textureMetadataScale; - vec4 colorTexMatrix0 = texture(uTextureMetadata, colorTexMatrix0Coord); - vec4 colorTexOffsets = texture(uTextureMetadata, colorTexOffsetsCoord); - vec4 baseColor = texture(uTextureMetadata, baseColorCoord); - - vColorTexCoord0 = mat2(colorTexMatrix0)* position + colorTexOffsets . xy; - vMaskTexCoord0 = vec3(maskTexCoord0, float(aMaskBackdrop . x)); - vBaseColor = baseColor; - vTileCtrl = float(aTileCtrl); - gl_Position = uTransform * vec4(position, 0.0, 1.0); -} - diff --git a/resources/shaders/gl3/tile_clip.vs.glsl b/resources/shaders/gl3/tile_clip.vs.glsl deleted file mode 100644 index 6693ec92..00000000 --- a/resources/shaders/gl3/tile_clip.vs.glsl +++ /dev/null @@ -1,36 +0,0 @@ -#version {{version}} -// Automatically generated from files in pathfinder/shaders/. Do not edit! - - - - - - - - - - - - -precision highp float; - - - - - -in ivec2 aTileOffset; -in ivec2 aDestTileOrigin; -in ivec2 aSrcTileOrigin; -in int aSrcBackdrop; - -out vec2 vTexCoord; -out float vBackdrop; - -void main(){ - vec2 destPosition = vec2(aDestTileOrigin + aTileOffset)/ vec2(256.0); - vec2 srcPosition = vec2(aSrcTileOrigin + aTileOffset)/ vec2(256.0); - vTexCoord = srcPosition; - vBackdrop = float(aSrcBackdrop); - gl_Position = vec4(mix(vec2(- 1.0), vec2(1.0), destPosition), 0.0, 1.0); -} - diff --git a/resources/shaders/gl4/blit.fs.glsl b/resources/shaders/gl4/blit.fs.glsl index b32fc956..85092e27 100644 --- a/resources/shaders/gl4/blit.fs.glsl +++ b/resources/shaders/gl4/blit.fs.glsl @@ -26,6 +26,6 @@ out vec4 oFragColor; void main(){ vec4 color = texture(uSrc, vTexCoord); - oFragColor = vec4(color . rgb * color . a, color . a); + oFragColor = color; } diff --git a/resources/shaders/gl4/blit.vs.glsl b/resources/shaders/gl4/blit.vs.glsl index 6c9391d5..98909d95 100644 --- a/resources/shaders/gl4/blit.vs.glsl +++ b/resources/shaders/gl4/blit.vs.glsl @@ -18,16 +18,17 @@ precision highp float; +uniform vec4 uDestRect; +uniform vec2 uFramebufferSize; + in ivec2 aPosition; out vec2 vTexCoord; void main(){ + vec2 position = mix(uDestRect . xy, uDestRect . zw, vec2(aPosition))/ uFramebufferSize; vec2 texCoord = vec2(aPosition); - - - vTexCoord = texCoord; - gl_Position = vec4(mix(vec2(- 1.0), vec2(1.0), vec2(aPosition)), 0.0, 1.0); + gl_Position = vec4(mix(vec2(- 1.0), vec2(1.0), position), 0.0, 1.0); } diff --git a/resources/shaders/gl4/d3d11/bin.cs.glsl b/resources/shaders/gl4/d3d11/bin.cs.glsl new file mode 100644 index 00000000..c185cc15 --- /dev/null +++ b/resources/shaders/gl4/d3d11/bin.cs.glsl @@ -0,0 +1,259 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + + + + + + + + + + + + +precision highp float; + + + + + +layout(local_size_x = 64)in; + +uniform int uMicrolineCount; + +uniform int uMaxFillCount; + +layout(std430, binding = 0)buffer bMicrolines { + restrict readonly uvec4 iMicrolines[]; +}; + +layout(std430, binding = 1)buffer bMetadata { + + + + + + + restrict readonly ivec4 iMetadata[]; +}; + + + + + + +layout(std430, binding = 2)buffer bIndirectDrawParams { + restrict uint iIndirectDrawParams[]; +}; + +layout(std430, binding = 3)buffer bFills { + restrict writeonly uint iFills[]; +}; + +layout(std430, binding = 4)buffer bTiles { + + + + + restrict uint iTiles[]; +}; + +layout(std430, binding = 5)buffer bBackdrops { + + + + restrict uint iBackdrops[]; +}; + +uint computeTileIndexNoCheck(ivec2 tileCoords, ivec4 pathTileRect, uint pathTileOffset){ + ivec2 offsetCoords = tileCoords - pathTileRect . xy; + return pathTileOffset + offsetCoords . x + offsetCoords . y *(pathTileRect . z - pathTileRect . x); +} + +bvec4 computeTileOutcodes(ivec2 tileCoords, ivec4 pathTileRect){ + return bvec4(lessThan(tileCoords, pathTileRect . xy), + greaterThanEqual(tileCoords, pathTileRect . zw)); +} + +bool computeTileIndex(ivec2 tileCoords, + ivec4 pathTileRect, + uint pathTileOffset, + out uint outTileIndex){ + outTileIndex = computeTileIndexNoCheck(tileCoords, pathTileRect, pathTileOffset); + return ! any(computeTileOutcodes(tileCoords, pathTileRect)); +} + +void addFill(vec4 lineSegment, ivec2 tileCoords, ivec4 pathTileRect, uint pathTileOffset){ + + uint tileIndex; + if(! computeTileIndex(tileCoords, pathTileRect, pathTileOffset, tileIndex)){ + return; + } + + + uvec4 scaledLocalLine = uvec4((lineSegment - vec4(tileCoords . xyxy * ivec4(16)))* vec4(256.0)); + if(scaledLocalLine . x == scaledLocalLine . z) + return; + + + uint fillIndex = atomicAdd(iIndirectDrawParams[1], 1); + + + uint fillLink = atomicExchange(iTiles[tileIndex * 4 + 1], + int(fillIndex)); + + + if(fillIndex < uMaxFillCount){ + iFills[fillIndex * 3 + 0]= scaledLocalLine . x |(scaledLocalLine . y << 16); + iFills[fillIndex * 3 + 1]= scaledLocalLine . z |(scaledLocalLine . w << 16); + iFills[fillIndex * 3 + 2]= fillLink; + } +} + +void adjustBackdrop(int backdropDelta, + ivec2 tileCoords, + ivec4 pathTileRect, + uint pathTileOffset, + uint pathBackdropOffset){ + bvec4 outcodes = computeTileOutcodes(tileCoords, pathTileRect); + if(any(outcodes)){ + if(! outcodes . x && outcodes . y && ! outcodes . z){ + uint backdropIndex = pathBackdropOffset + uint(tileCoords . x - pathTileRect . x); + atomicAdd(iBackdrops[backdropIndex * 3], backdropDelta); + } + } else { + uint tileIndex = computeTileIndexNoCheck(tileCoords, pathTileRect, pathTileOffset); + atomicAdd(iTiles[tileIndex * 4 + 2], + uint(backdropDelta)<< 24); + } +} + +vec4 unpackMicroline(uvec4 packedMicroline, out uint outPathIndex){ + outPathIndex = packedMicroline . w; + ivec4 signedMicroline = ivec4(packedMicroline); + return vec4((signedMicroline . x << 16)>> 16, signedMicroline . x >> 16, + (signedMicroline . y << 16)>> 16, signedMicroline . y >> 16)+ + vec4(signedMicroline . z & 0xff,(signedMicroline . z >> 8)& 0xff, + (signedMicroline . z >> 16)& 0xff,(signedMicroline . z >> 24)& 0xff)/ 256.0; +} + +void main(){ + uint segmentIndex = gl_GlobalInvocationID . x; + if(segmentIndex >= uMicrolineCount) + return; + + uint pathIndex; + vec4 lineSegment = unpackMicroline(iMicrolines[segmentIndex], pathIndex); + + ivec4 pathTileRect = iMetadata[pathIndex * 3 + 0]; + uint pathTileOffset = uint(iMetadata[pathIndex * 3 + 1]. x); + uint pathBackdropOffset = uint(iMetadata[pathIndex * 3 + 2]. x); + + + + ivec2 tileSize = ivec2(16); + + ivec4 tileLineSegment = ivec4(floor(lineSegment / vec4(tileSize . xyxy))); + ivec2 fromTileCoords = tileLineSegment . xy, toTileCoords = tileLineSegment . zw; + + vec2 vector = lineSegment . zw - lineSegment . xy; + vec2 vectorIsNegative = vec2(vector . x < 0.0 ? - 1.0 : 0.0, vector . y < 0.0 ? - 1.0 : 0.0); + ivec2 tileStep = ivec2(vector . x < 0.0 ? - 1 : 1, vector . y < 0.0 ? - 1 : 1); + + vec2 firstTileCrossing = vec2((fromTileCoords + ivec2(vector . x >= 0.0 ? 1 : 0, + vector . y >= 0.0 ? 1 : 0))* tileSize); + + vec2 tMax =(firstTileCrossing - lineSegment . xy)/ vector; + vec2 tDelta = abs(tileSize / vector); + + vec2 currentPosition = lineSegment . xy; + ivec2 tileCoords = fromTileCoords; + int lastStepDirection = 0; + uint iteration = 0; + + while(iteration < 1024u){ + int nextStepDirection; + if(tMax . x < tMax . y) + nextStepDirection = 1; + else if(tMax . x > tMax . y) + nextStepDirection = 2; + else if(tileStep . x > 0.0) + nextStepDirection = 1; + else + nextStepDirection = 2; + + float nextT = min(nextStepDirection == 1 ? tMax . x : tMax . y, 1.0); + + + if(tileCoords == toTileCoords) + nextStepDirection = 0; + + vec2 nextPosition = mix(lineSegment . xy, lineSegment . zw, nextT); + vec4 clippedLineSegment = vec4(currentPosition, nextPosition); + addFill(clippedLineSegment, tileCoords, pathTileRect, pathTileOffset); + + + vec4 auxiliarySegment; + bool haveAuxiliarySegment = false; + if(tileStep . y < 0 && nextStepDirection == 2){ + auxiliarySegment = vec4(clippedLineSegment . zw, vec2(tileCoords * tileSize)); + haveAuxiliarySegment = true; + } else if(tileStep . y > 0 && lastStepDirection == 2){ + auxiliarySegment = vec4(vec2(tileCoords * tileSize), clippedLineSegment . xy); + haveAuxiliarySegment = true; + } + if(haveAuxiliarySegment) + addFill(auxiliarySegment, tileCoords, pathTileRect, pathTileOffset); + + + + + + if(tileStep . x < 0 && lastStepDirection == 1){ + adjustBackdrop(1, + tileCoords, + pathTileRect, + pathTileOffset, + pathBackdropOffset); + } else if(tileStep . x > 0 && nextStepDirection == 1){ + adjustBackdrop(- 1, + tileCoords, + pathTileRect, + pathTileOffset, + pathBackdropOffset); + } + + + if(nextStepDirection == 1){ + tMax . x += tDelta . x; + tileCoords . x += tileStep . x; + } else if(nextStepDirection == 2){ + tMax . y += tDelta . y; + tileCoords . y += tileStep . y; + } else if(nextStepDirection == 0){ + break; + } + + currentPosition = nextPosition; + lastStepDirection = nextStepDirection; + + iteration ++; + } +} + diff --git a/resources/shaders/gl4/d3d11/bound.cs.glsl b/resources/shaders/gl4/d3d11/bound.cs.glsl new file mode 100644 index 00000000..57f07444 --- /dev/null +++ b/resources/shaders/gl4/d3d11/bound.cs.glsl @@ -0,0 +1,87 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + + + + + + + + + + +layout(local_size_x = 64)in; + +uniform int uPathCount; +uniform int uTileCount; + +layout(std430, binding = 0)buffer bTilePathInfo { + + + + + restrict readonly uvec4 iTilePathInfo[]; +}; + +layout(std430, binding = 1)buffer bTiles { + + + + + restrict uint iTiles[]; +}; + +void main(){ + uint tileIndex = gl_GlobalInvocationID . x; + if(tileIndex >= uint(uTileCount)) + return; + + uint lowPathIndex = 0, highPathIndex = uint(uPathCount); + int iteration = 0; + while(iteration < 1024 && lowPathIndex + 1 < highPathIndex){ + uint midPathIndex = lowPathIndex +(highPathIndex - lowPathIndex)/ 2; + uint midTileIndex = iTilePathInfo[midPathIndex]. z; + if(tileIndex < midTileIndex){ + highPathIndex = midPathIndex; + } else { + lowPathIndex = midPathIndex; + if(tileIndex == midTileIndex) + break; + } + iteration ++; + } + + uint pathIndex = lowPathIndex; + uvec4 pathInfo = iTilePathInfo[pathIndex]; + + ivec2 packedTileRect = ivec2(pathInfo . xy); + ivec4 tileRect = ivec4((packedTileRect . x << 16)>> 16, packedTileRect . x >> 16, + (packedTileRect . y << 16)>> 16, packedTileRect . y >> 16); + + uint tileOffset = tileIndex - pathInfo . z; + uint tileWidth = uint(tileRect . z - tileRect . x); + ivec2 tileCoords = tileRect . xy + ivec2(tileOffset % tileWidth, tileOffset / tileWidth); + + iTiles[tileIndex * 4 + 0]= ~ 0u; + iTiles[tileIndex * 4 + 1]= ~ 0u; + iTiles[tileIndex * 4 + 2]= 0x00ffffffu; + iTiles[tileIndex * 4 + 3]= pathInfo . w; +} + diff --git a/resources/shaders/gl4/d3d11/dice.cs.glsl b/resources/shaders/gl4/d3d11/dice.cs.glsl new file mode 100644 index 00000000..42549df7 --- /dev/null +++ b/resources/shaders/gl4/d3d11/dice.cs.glsl @@ -0,0 +1,220 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + + + + + + + + + + + + + +precision highp float; + + + + + +layout(local_size_x = 64)in; + +uniform mat2 uTransform; +uniform vec2 uTranslation; +uniform int uPathCount; +uniform int uLastBatchSegmentIndex; +uniform int uMaxMicrolineCount; + +layout(std430, binding = 0)buffer bComputeIndirectParams { + + + + + restrict uint iComputeIndirectParams[]; +}; + + +layout(std430, binding = 1)buffer bDiceMetadata { + + + + + restrict readonly uvec4 iDiceMetadata[]; +}; + +layout(std430, binding = 2)buffer bPoints { + restrict readonly vec2 iPoints[]; +}; + +layout(std430, binding = 3)buffer bInputIndices { + restrict readonly uvec2 iInputIndices[]; +}; + +layout(std430, binding = 4)buffer bMicrolines { + + + + + restrict uvec4 iMicrolines[]; +}; + +void emitMicroline(vec4 microlineSegment, uint pathIndex, uint outputMicrolineIndex){ + if(outputMicrolineIndex >= uMaxMicrolineCount) + return; + + ivec4 microlineSubpixels = ivec4(round(clamp(microlineSegment, - 32768.0, 32767.0)* 256.0)); + ivec4 microlinePixels = ivec4(floor(vec4(microlineSubpixels)/ 256.0)); + ivec4 microlineFractPixels = microlineSubpixels - microlinePixels * 256; + + iMicrolines[outputMicrolineIndex]= + uvec4((uint(microlinePixels . x)& 0xffff)|(uint(microlinePixels . y)<< 16), + (uint(microlinePixels . z)& 0xffff)|(uint(microlinePixels . w)<< 16), + uint(microlineFractPixels . x)|(uint(microlineFractPixels . y)<< 8)| + (uint(microlineFractPixels . z)<< 16)|(uint(microlineFractPixels . w)<< 24), + pathIndex); +} + + +bool curveIsFlat(vec4 baseline, vec4 ctrl){ + vec4 uv = vec4(3.0)* ctrl - vec4(2.0)* baseline - baseline . zwxy; + uv *= uv; + uv = max(uv, uv . zwxy); + return uv . x + uv . y <= 16.0 * 0.25 * 0.25; +} + +void subdivideCurve(vec4 baseline, + vec4 ctrl, + float t, + out vec4 prevBaseline, + out vec4 prevCtrl, + out vec4 nextBaseline, + out vec4 nextCtrl){ + vec2 p0 = baseline . xy, p1 = ctrl . xy, p2 = ctrl . zw, p3 = baseline . zw; + vec2 p0p1 = mix(p0, p1, t), p1p2 = mix(p1, p2, t), p2p3 = mix(p2, p3, t); + vec2 p0p1p2 = mix(p0p1, p1p2, t), p1p2p3 = mix(p1p2, p2p3, t); + vec2 p0p1p2p3 = mix(p0p1p2, p1p2p3, t); + prevBaseline = vec4(p0, p0p1p2p3); + prevCtrl = vec4(p0p1, p0p1p2); + nextBaseline = vec4(p0p1p2p3, p3); + nextCtrl = vec4(p1p2p3, p2p3); +} + +vec2 sampleCurve(vec4 baseline, vec4 ctrl, float t){ + vec2 p0 = baseline . xy, p1 = ctrl . xy, p2 = ctrl . zw, p3 = baseline . zw; + vec2 p0p1 = mix(p0, p1, t), p1p2 = mix(p1, p2, t), p2p3 = mix(p2, p3, t); + vec2 p0p1p2 = mix(p0p1, p1p2, t), p1p2p3 = mix(p1p2, p2p3, t); + return mix(p0p1p2, p1p2p3, t); +} + +vec2 sampleLine(vec4 line, float t){ + return mix(line . xy, line . zw, t); +} + +vec2 getPoint(uint pointIndex){ + return uTransform * iPoints[pointIndex]+ uTranslation; +} + +void main(){ + uint batchSegmentIndex = gl_GlobalInvocationID . x; + if(batchSegmentIndex >= uLastBatchSegmentIndex) + return; + + + uint lowPathIndex = 0, highPathIndex = uint(uPathCount); + int iteration = 0; + while(iteration < 1024 && lowPathIndex + 1 < highPathIndex){ + uint midPathIndex = lowPathIndex +(highPathIndex - lowPathIndex)/ 2; + uint midBatchSegmentIndex = iDiceMetadata[midPathIndex]. z; + if(batchSegmentIndex < midBatchSegmentIndex){ + highPathIndex = midPathIndex; + } else { + lowPathIndex = midPathIndex; + if(batchSegmentIndex == midBatchSegmentIndex) + break; + } + iteration ++; + } + + uint batchPathIndex = lowPathIndex; + uvec4 diceMetadata = iDiceMetadata[batchPathIndex]; + uint firstGlobalSegmentIndexInPath = diceMetadata . y; + uint firstBatchSegmentIndexInPath = diceMetadata . z; + uint globalSegmentIndex = batchSegmentIndex - firstBatchSegmentIndexInPath + + firstGlobalSegmentIndexInPath; + + uvec2 inputIndices = iInputIndices[globalSegmentIndex]; + uint fromPointIndex = inputIndices . x, flagsPathIndex = inputIndices . y; + + uint toPointIndex = fromPointIndex; + if((flagsPathIndex & 0x40000000u)!= 0u) + toPointIndex += 3; + else if((flagsPathIndex & 0x80000000u)!= 0u) + toPointIndex += 2; + else + toPointIndex += 1; + + vec4 baseline = vec4(getPoint(fromPointIndex), getPoint(toPointIndex)); + + + + + + vec4 ctrl = vec4(0.0); + float segmentCountF; + bool isCurve =(flagsPathIndex &(0x40000000u | + 0x80000000u))!= 0; + if(isCurve){ + vec2 ctrl0 = getPoint(fromPointIndex + 1); + if((flagsPathIndex & 0x80000000u)!= 0){ + vec2 ctrl0_2 = ctrl0 * vec2(2.0); + ctrl =(baseline +(ctrl0 * vec2(2.0)). xyxy)* vec4(1.0 / 3.0); + } else { + ctrl = vec4(ctrl0, getPoint(fromPointIndex + 2)); + } + vec2 bound = vec2(6.0)* max(abs(ctrl . zw - 2.0 * ctrl . xy + baseline . xy), + abs(baseline . zw - 2.0 * ctrl . zw + ctrl . xy)); + segmentCountF = sqrt(length(bound)/(8.0 * 0.25)); + } else { + segmentCountF = length(baseline . zw - baseline . xy)/ 16.0; + } + + + int segmentCount = max(int(ceil(segmentCountF)), 1); + uint firstOutputMicrolineIndex = + atomicAdd(iComputeIndirectParams[3], + segmentCount); + + float prevT = 0.0; + vec2 prevPoint = baseline . xy; + for(int segmentIndex = 0;segmentIndex < segmentCount;segmentIndex ++){ + float nextT = float(segmentIndex + 1)/ float(segmentCount); + vec2 nextPoint; + if(isCurve) + nextPoint = sampleCurve(baseline, ctrl, nextT); + else + nextPoint = sampleLine(baseline, nextT); + emitMicroline(vec4(prevPoint, nextPoint), + batchPathIndex, + firstOutputMicrolineIndex + segmentIndex); + prevT = nextT; + prevPoint = nextPoint; + } +} + diff --git a/resources/shaders/gl4/d3d11/fill.cs.glsl b/resources/shaders/gl4/d3d11/fill.cs.glsl new file mode 100644 index 00000000..ccc459d9 --- /dev/null +++ b/resources/shaders/gl4/d3d11/fill.cs.glsl @@ -0,0 +1,145 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + + + + + + + + + + + + + + + + +vec4 computeCoverage(vec2 from, vec2 to, sampler2D areaLUT){ + + vec2 left = from . x < to . x ? from : to, right = from . x < to . x ? to : from; + + + vec2 window = clamp(vec2(from . x, to . x), - 0.5, 0.5); + float offset = mix(window . x, window . y, 0.5)- left . x; + float t = offset /(right . x - left . x); + + + float y = mix(left . y, right . y, t); + float d =(right . y - left . y)/(right . x - left . x); + + + float dX = window . x - window . y; + return texture(areaLUT, vec2(y + 8.0, abs(d * dX))/ 16.0)* dX; +} + + +layout(local_size_x = 16, local_size_y = 4)in; + + + + + + +layout(rgba8)uniform image2D uDest; +uniform sampler2D uAreaLUT; +uniform ivec2 uAlphaTileRange; + +layout(std430, binding = 0)buffer bFills { + restrict readonly uint iFills[]; +}; + +layout(std430, binding = 1)buffer bTiles { + + + + + + restrict uint iTiles[]; +}; + +layout(std430, binding = 2)buffer bAlphaTiles { + + + restrict readonly uint iAlphaTiles[]; +}; + + + + + + + + + + + + +vec4 accumulateCoverageForFillList(int fillIndex, ivec2 tileSubCoord){ + vec2 tileFragCoord = vec2(tileSubCoord)+ vec2(0.5); + vec4 coverages = vec4(0.0); + int iteration = 0; + do { + uint fillFrom = iFills[fillIndex * 3 + 0], fillTo = iFills[fillIndex * 3 + 1]; + vec4 lineSegment = vec4(fillFrom & 0xffff, fillFrom >> 16, + fillTo & 0xffff, fillTo >> 16)/ 256.0; + lineSegment -= tileFragCoord . xyxy; + coverages += computeCoverage(lineSegment . xy, lineSegment . zw, uAreaLUT); + fillIndex = int(iFills[fillIndex * 3 + 2]); + iteration ++; + } while(fillIndex >= 0 && iteration < 1024); + return coverages; +} + + +ivec2 computeTileCoord(uint alphaTileIndex){ + uint x = alphaTileIndex & 0xff; + uint y =(alphaTileIndex >> 8u)& 0xff +(((alphaTileIndex >> 16u)& 0xff)<< 8u); + return ivec2(16, 4)* ivec2(x, y)+ ivec2(gl_LocalInvocationID . xy); +} + +void main(){ + ivec2 tileSubCoord = ivec2(gl_LocalInvocationID . xy)* ivec2(1, 4); + + + uint batchAlphaTileIndex =(gl_WorkGroupID . x |(gl_WorkGroupID . y << 15)); + uint alphaTileIndex = batchAlphaTileIndex + uint(uAlphaTileRange . x); + if(alphaTileIndex >= uint(uAlphaTileRange . y)) + return; + + uint tileIndex = iAlphaTiles[batchAlphaTileIndex * 2 + 0]; + if((int(iTiles[tileIndex * 4 + 2]<< 8)>> 8)< 0) + return; + + int fillIndex = int(iTiles[tileIndex * 4 + 1]); + int backdrop = int(iTiles[tileIndex * 4 + 3])>> 24; + + + vec4 coverages = vec4(backdrop); + coverages += accumulateCoverageForFillList(fillIndex, tileSubCoord); + coverages = clamp(abs(coverages), 0.0, 1.0); + + + int clipTileIndex = int(iAlphaTiles[batchAlphaTileIndex * 2 + 1]); + if(clipTileIndex >= 0) + coverages = min(coverages, imageLoad(uDest, computeTileCoord(clipTileIndex))); + + imageStore(uDest, computeTileCoord(alphaTileIndex), coverages); +} + diff --git a/resources/shaders/gl4/d3d11/propagate.cs.glsl b/resources/shaders/gl4/d3d11/propagate.cs.glsl new file mode 100644 index 00000000..a4d9d182 --- /dev/null +++ b/resources/shaders/gl4/d3d11/propagate.cs.glsl @@ -0,0 +1,227 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + + + + + +layout(local_size_x = 64)in; + + + + + + +uniform ivec2 uFramebufferTileSize; +uniform int uColumnCount; +uniform int uFirstAlphaTileIndex; + +layout(std430, binding = 0)buffer bDrawMetadata { + + + + + + + restrict readonly uvec4 iDrawMetadata[]; +}; + +layout(std430, binding = 1)buffer bClipMetadata { + + + + + + restrict readonly uvec4 iClipMetadata[]; +}; + +layout(std430, binding = 2)buffer bBackdrops { + + + + restrict readonly int iBackdrops[]; +}; + +layout(std430, binding = 3)buffer bDrawTiles { + + + + + restrict uint iDrawTiles[]; +}; + +layout(std430, binding = 4)buffer bClipTiles { + + + + + restrict uint iClipTiles[]; +}; + +layout(std430, binding = 5)buffer bZBuffer { + restrict int iZBuffer[]; +}; + +layout(std430, binding = 6)buffer bFirstTileMap { + restrict int iFirstTileMap[]; +}; + +layout(std430, binding = 7)buffer bIndirectDrawParams { + + + + + + restrict uint iIndirectDrawParams[]; +}; + +layout(std430, binding = 8)buffer bAlphaTiles { + + + restrict uint iAlphaTiles[]; +}; + +uint calculateTileIndex(uint bufferOffset, uvec4 tileRect, uvec2 tileCoord){ + return bufferOffset + tileCoord . y *(tileRect . z - tileRect . x)+ tileCoord . x; +} + +void main(){ + uint columnIndex = gl_GlobalInvocationID . x; + if(int(columnIndex)>= uColumnCount) + return; + + int currentBackdrop = iBackdrops[columnIndex * 3 + 0]; + int tileX = iBackdrops[columnIndex * 3 + 1]; + uint drawPathIndex = uint(iBackdrops[columnIndex * 3 + 2]); + + uvec4 drawTileRect = iDrawMetadata[drawPathIndex * 3 + 0]; + uvec4 drawOffsets = iDrawMetadata[drawPathIndex * 3 + 1]; + uvec2 drawTileSize = drawTileRect . zw - drawTileRect . xy; + uint drawTileBufferOffset = drawOffsets . x; + bool zWrite = drawOffsets . z != 0; + + int clipPathIndex = int(drawOffsets . w); + uvec4 clipTileRect = uvec4(0u), clipOffsets = uvec4(0u); + if(clipPathIndex >= 0){ + clipTileRect = iClipMetadata[clipPathIndex * 2 + 0]; + clipOffsets = iClipMetadata[clipPathIndex * 2 + 1]; + } + uint clipTileBufferOffset = clipOffsets . x, clipBackdropOffset = clipOffsets . y; + + for(uint tileY = 0;tileY < drawTileSize . y;tileY ++){ + uvec2 drawTileCoord = uvec2(tileX, tileY); + uint drawTileIndex = calculateTileIndex(drawTileBufferOffset, drawTileRect, drawTileCoord); + + int drawAlphaTileIndex = - 1; + int clipAlphaTileIndex = - 1; + int drawFirstFillIndex = int(iDrawTiles[drawTileIndex * 4 + 1]); + int drawBackdropDelta = + int(iDrawTiles[drawTileIndex * 4 + 2])>> 24; + uint drawTileWord = iDrawTiles[drawTileIndex * 4 + 3]& 0x00ffffff; + + int drawTileBackdrop = currentBackdrop; + bool haveDrawAlphaMask = drawFirstFillIndex >= 0; + bool needNewAlphaTile = haveDrawAlphaMask; + + + if(clipPathIndex >= 0){ + uvec2 tileCoord = drawTileCoord + drawTileRect . xy; + if(all(bvec4(greaterThanEqual(tileCoord, clipTileRect . xy), + lessThan(tileCoord, clipTileRect . zw)))){ + uvec2 clipTileCoord = tileCoord - clipTileRect . xy; + uint clipTileIndex = calculateTileIndex(clipTileBufferOffset, + clipTileRect, + clipTileCoord); + + + + + + + int thisClipAlphaTileIndex = + int(iClipTiles[clipTileIndex * 4 + + 2]<< 8)>> 8; + + uint clipTileWord = iClipTiles[clipTileIndex * 4 + 3]; + int clipTileBackdrop = int(clipTileWord)>> 24; + + if(thisClipAlphaTileIndex >= 0){ + if(haveDrawAlphaMask){ + clipAlphaTileIndex = thisClipAlphaTileIndex; + needNewAlphaTile = true; + } else { + if(drawTileBackdrop != 0){ + + + drawAlphaTileIndex = thisClipAlphaTileIndex; + clipAlphaTileIndex = - 1; + needNewAlphaTile = false; + } else { + + drawAlphaTileIndex = - 1; + clipAlphaTileIndex = - 1; + needNewAlphaTile = false; + } + } + } else { + + if(clipTileBackdrop == 0){ + + drawTileBackdrop = 0; + needNewAlphaTile = false; + } else { + needNewAlphaTile = true; + } + } + } else { + + drawTileBackdrop = 0; + needNewAlphaTile = false; + } + } + + if(needNewAlphaTile){ + uint drawBatchAlphaTileIndex = atomicAdd(iIndirectDrawParams[4], 1); + iAlphaTiles[drawBatchAlphaTileIndex * 2 + 0]= drawTileIndex; + iAlphaTiles[drawBatchAlphaTileIndex * 2 + 1]= clipAlphaTileIndex; + drawAlphaTileIndex = int(drawBatchAlphaTileIndex)+ uFirstAlphaTileIndex; + } + + iDrawTiles[drawTileIndex * 4 + 2]= + (uint(drawAlphaTileIndex)& 0x00ffffffu)|(uint(drawBackdropDelta)<< 24); + iDrawTiles[drawTileIndex * 4 + 3]= + drawTileWord |(uint(drawTileBackdrop)<< 24); + + + ivec2 tileCoord = ivec2(tileX, tileY)+ ivec2(drawTileRect . xy); + int tileMapIndex = tileCoord . y * uFramebufferTileSize . x + tileCoord . x; + if(zWrite && drawTileBackdrop != 0 && drawAlphaTileIndex < 0) + atomicMax(iZBuffer[tileMapIndex], int(drawTileIndex)); + + + if(drawTileBackdrop != 0 || drawAlphaTileIndex >= 0){ + int nextTileIndex = atomicExchange(iFirstTileMap[tileMapIndex], int(drawTileIndex)); + iDrawTiles[drawTileIndex * 4 + 0]= nextTileIndex; + } + + currentBackdrop += drawBackdropDelta; + } +} + diff --git a/resources/shaders/gl4/d3d11/sort.cs.glsl b/resources/shaders/gl4/d3d11/sort.cs.glsl new file mode 100644 index 00000000..42de2d39 --- /dev/null +++ b/resources/shaders/gl4/d3d11/sort.cs.glsl @@ -0,0 +1,96 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + + + + + + + + + + +uniform int uTileCount; + +layout(std430, binding = 0)buffer bTiles { + + + + + restrict uint iTiles[]; +}; + +layout(std430, binding = 1)buffer bFirstTileMap { + restrict int iFirstTileMap[]; +}; + +layout(std430, binding = 2)buffer bZBuffer { + restrict readonly int iZBuffer[]; +}; + +layout(local_size_x = 64)in; + +int getFirst(uint globalTileIndex){ + return iFirstTileMap[globalTileIndex]; +} + +int getNextTile(int tileIndex){ + return int(iTiles[tileIndex * 4 + 0]); +} + +void setNextTile(int tileIndex, int newNextTileIndex){ + iTiles[tileIndex * 4 + 0]= uint(newNextTileIndex); +} + +void main(){ + uint globalTileIndex = gl_GlobalInvocationID . x; + if(globalTileIndex >= uint(uTileCount)) + return; + + int zValue = iZBuffer[globalTileIndex]; + + int unsortedFirstTileIndex = getFirst(globalTileIndex); + int sortedFirstTileIndex = - 1; + + while(unsortedFirstTileIndex >= 0){ + int currentTileIndex = unsortedFirstTileIndex; + unsortedFirstTileIndex = getNextTile(currentTileIndex); + + if(currentTileIndex >= zValue){ + int prevTrialTileIndex = - 1; + int trialTileIndex = sortedFirstTileIndex; + while(true){ + if(trialTileIndex < 0 || currentTileIndex < trialTileIndex){ + if(prevTrialTileIndex < 0){ + setNextTile(currentTileIndex, sortedFirstTileIndex); + sortedFirstTileIndex = currentTileIndex; + } else { + setNextTile(currentTileIndex, trialTileIndex); + setNextTile(prevTrialTileIndex, currentTileIndex); + } + break; + } + prevTrialTileIndex = trialTileIndex; + trialTileIndex = getNextTile(trialTileIndex); + } + } + } + + iFirstTileMap[globalTileIndex]= sortedFirstTileIndex; +} + diff --git a/resources/shaders/gl4/tile_fill.cs.glsl b/resources/shaders/gl4/d3d11/tile.cs.glsl similarity index 66% rename from resources/shaders/gl4/tile_fill.cs.glsl rename to resources/shaders/gl4/d3d11/tile.cs.glsl index 025f2e2b..c7470675 100644 --- a/resources/shaders/gl4/tile_fill.cs.glsl +++ b/resources/shaders/gl4/d3d11/tile.cs.glsl @@ -15,48 +15,13 @@ #extension GL_GOOGLE_include_directive : enable precision highp float; -precision highp sampler2D; + + + + layout(local_size_x = 16, local_size_y = 4)in; -layout(rgba8)uniform image2D uDestImage; -uniform sampler2D uTextureMetadata; -uniform ivec2 uTextureMetadataSize; -uniform sampler2D uColorTexture0; -uniform sampler2D uMaskTexture0; -uniform sampler2D uGammaLUT; -uniform vec2 uTileSize; -uniform vec4 uFilterParams0; -uniform vec4 uFilterParams1; -uniform vec4 uFilterParams2; -uniform vec2 uFramebufferSize; -uniform vec2 uColorTextureSize0; -uniform int uCtrl; -uniform sampler2D uAreaLUT; - -layout(std430, binding = 0)buffer bFills { - restrict readonly uvec2 iFills[]; -}; - -layout(std430, binding = 1)buffer bNextFills { - restrict readonly int iNextFills[]; -}; - -layout(std430, binding = 2)buffer bFillTileMap { - restrict readonly int iFillTileMap[]; -}; - -layout(std430, binding = 3)buffer bTiles { - restrict readonly uint iTiles[]; -}; - -layout(std430, binding = 4)buffer bNextTiles { - restrict readonly int iNextTiles[]; -}; - -layout(std430, binding = 5)buffer bFirstTiles { - restrict readonly int iFirstTiles[]; -}; @@ -69,23 +34,6 @@ layout(std430, binding = 5)buffer bFirstTiles { -vec4 computeCoverage(vec2 from, vec2 to, sampler2D areaLUT){ - - vec2 left = from . x < to . x ? from : to, right = from . x < to . x ? to : from; - - - vec2 window = clamp(vec2(from . x, to . x), - 0.5, 0.5); - float offset = mix(window . x, window . y, 0.5)- left . x; - float t = offset /(right . x - left . x); - - - float y = mix(left . y, right . y, t); - float d =(right . y - left . y)/(right . x - left . x); - - - float dX = window . x - window . y; - return texture(areaLUT, vec2(y + 8.0, abs(d * dX))/ 16.0)* dX; -} @@ -98,41 +46,6 @@ vec4 computeCoverage(vec2 from, vec2 to, sampler2D areaLUT){ -vec4 computeCoverage(vec2 from, vec2 to, sampler2D areaLUT); - -ivec2 calculateTileOrigin(uint tileIndex){ - return ivec2(tileIndex & 0xff,(tileIndex >> 8u)& 0xff)* 16; -} - -vec4 calculateFillAlpha(ivec2 tileSubCoord, uint tileIndex){ - int fillIndex = iFillTileMap[tileIndex]; - if(fillIndex < 0) - return vec4(0.0); - - vec4 coverages = vec4(0.0); - do { - uvec2 fill = iFills[fillIndex]; - vec2 from = vec2(fill . y & 0xf,(fill . y >> 4u)& 0xf)+ - vec2(fill . x & 0xff,(fill . x >> 8u)& 0xff)/ 256.0; - vec2 to = vec2((fill . y >> 8u)& 0xf,(fill . y >> 12u)& 0xf)+ - vec2((fill . x >> 16u)& 0xff,(fill . x >> 24u)& 0xff)/ 256.0; - - coverages += computeCoverage(from -(vec2(tileSubCoord)+ vec2(0.5)), - to -(vec2(tileSubCoord)+ vec2(0.5)), - uAreaLUT); - - fillIndex = iNextFills[fillIndex]; - } while(fillIndex >= 0); - - return coverages; -} - - - - - - - @@ -648,11 +561,27 @@ float sampleMask(float maskAlpha, -vec4 calculateColorWithMaskAlpha(float maskAlpha, - vec4 baseColor, - vec2 colorTexCoord0, - vec2 fragCoord, - int ctrl){ +vec4 calculateColor(vec2 fragCoord, + sampler2D colorTexture0, + sampler2D maskTexture0, + sampler2D destTexture, + sampler2D gammaLUT, + vec2 colorTextureSize0, + vec2 maskTextureSize0, + vec4 filterParams0, + vec4 filterParams1, + vec4 filterParams2, + vec2 framebufferSize, + int ctrl, + vec3 maskTexCoord0, + vec2 colorTexCoord0, + vec4 baseColor, + int tileCtrl){ + + int maskCtrl0 =(tileCtrl >> 0)& 0x3; + float maskAlpha = 1.0; + maskAlpha = sampleMask(maskAlpha, maskTexture0, maskTextureSize0, maskTexCoord0, maskCtrl0); + vec4 color = baseColor; int color0Combine =(ctrl >> 6)& @@ -660,14 +589,14 @@ vec4 calculateColorWithMaskAlpha(float maskAlpha, if(color0Combine != 0){ int color0Filter =(ctrl >> 4)& 0x3; vec4 color0 = filterColor(colorTexCoord0, - uColorTexture0, - uGammaLUT, - uColorTextureSize0, + colorTexture0, + gammaLUT, + colorTextureSize0, fragCoord, - uFramebufferSize, - uFilterParams0, - uFilterParams1, - uFilterParams2, + framebufferSize, + filterParams0, + filterParams1, + filterParams2, color0Filter); color = combineColor0(color, color0, color0Combine); } @@ -676,11 +605,8 @@ vec4 calculateColorWithMaskAlpha(float maskAlpha, color . a *= maskAlpha; - - - - - + int compositeOp =(ctrl >> 8)& 0xf; + color = composite(color, destTexture, framebufferSize, fragCoord, compositeOp); color . rgb *= color . a; @@ -698,85 +624,168 @@ vec4 calculateColorWithMaskAlpha(float maskAlpha, -void lookupTextureMetadata(int color, - out mat2 outColorTexMatrix0, - out vec4 outColorTexOffsets, - out vec4 outBaseColor){ - vec2 textureMetadataScale = vec2(1.0)/ vec2(uTextureMetadataSize); - vec2 metadataEntryCoord = vec2(color % 128 * 4, color / 128); - vec2 colorTexMatrix0Coord =(metadataEntryCoord + vec2(0.5, 0.5))* textureMetadataScale; - vec2 colorTexOffsetsCoord =(metadataEntryCoord + vec2(1.5, 0.5))* textureMetadataScale; - vec2 baseColorCoord =(metadataEntryCoord + vec2(2.5, 0.5))* textureMetadataScale; - outColorTexMatrix0 = mat2(texture(uTextureMetadata, colorTexMatrix0Coord)); - outColorTexOffsets = texture(uTextureMetadata, colorTexOffsetsCoord); - outBaseColor = texture(uTextureMetadata, baseColorCoord); +vec4 fetchUnscaled(sampler2D srcTexture, vec2 scale, vec2 originCoord, int entry){ + return texture(srcTexture,(originCoord + vec2(0.5)+ vec2(entry, 0))* scale); +} + +void computeTileVaryings(vec2 position, + int colorEntry, + sampler2D textureMetadata, + ivec2 textureMetadataSize, + out vec2 outColorTexCoord0, + out vec4 outBaseColor, + out vec4 outFilterParams0, + out vec4 outFilterParams1, + out vec4 outFilterParams2, + out int outCtrl){ + vec2 metadataScale = vec2(1.0)/ vec2(textureMetadataSize); + vec2 metadataEntryCoord = vec2(colorEntry % 128 * 8, colorEntry / 128); + vec4 colorTexMatrix0 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 0); + vec4 colorTexOffsets = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 1); + vec4 baseColor = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 2); + vec4 filterParams0 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 3); + vec4 filterParams1 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 4); + vec4 filterParams2 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 5); + vec4 extra = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 6); + outColorTexCoord0 = mat2(colorTexMatrix0)* position + colorTexOffsets . xy; + outBaseColor = baseColor; + outFilterParams0 = filterParams0; + outFilterParams1 = filterParams1; + outFilterParams2 = filterParams2; + outCtrl = int(extra . x); } + + + + + + +uniform int uLoadAction; +uniform vec4 uClearColor; +uniform vec2 uTileSize; +uniform sampler2D uTextureMetadata; +uniform ivec2 uTextureMetadataSize; +uniform sampler2D uZBuffer; +uniform ivec2 uZBufferSize; +uniform sampler2D uColorTexture0; +uniform sampler2D uMaskTexture0; +uniform sampler2D uDestTexture; +uniform sampler2D uGammaLUT; +uniform vec2 uColorTextureSize0; +uniform vec2 uMaskTextureSize0; +uniform vec2 uFramebufferSize; +uniform ivec2 uFramebufferTileSize; +layout(rgba8)uniform image2D uDestImage; + +layout(std430, binding = 0)buffer bTiles { + + + + + + restrict readonly uint iTiles[]; +}; + +layout(std430, binding = 1)buffer bFirstTileMap { + restrict readonly int iFirstTileMap[]; +}; + +uint calculateTileIndex(uint bufferOffset, uvec4 tileRect, uvec2 tileCoord){ + return bufferOffset + tileCoord . y *(tileRect . z - tileRect . x)+ tileCoord . x; +} + +ivec2 toImageCoords(ivec2 coords){ + return ivec2(coords . x, uFramebufferSize . y - coords . y); +} + void main(){ - int maskCtrl0 =(uCtrl >> 0)& 0x1; + ivec2 tileCoord = ivec2(gl_WorkGroupID . xy); + ivec2 firstTileSubCoord = ivec2(gl_LocalInvocationID . xy)* ivec2(1, 4); + ivec2 firstFragCoord = tileCoord * ivec2(uTileSize)+ firstTileSubCoord; - vec4 colors[4]= { vec4(0.0), vec4(0.0), vec4(0.0), vec4(0.0)}; - ivec2 tileSubCoord = ivec2(gl_LocalInvocationID . xy)* ivec2(1, 4); - ivec2 tileOrigin = ivec2(0); - int tileIndex = iFirstTiles[gl_WorkGroupID . z]; - int overlapCount = 0; + int tileIndex = iFirstTileMap[tileCoord . x + uFramebufferTileSize . x * tileCoord . y]; + if(tileIndex < 0 && uLoadAction != 0) + return; + + mat4 destColors; + for(int subY = 0;subY < 4;subY ++){ + if(uLoadAction == 0){ + destColors[subY]= uClearColor; + } else { + ivec2 imageCoords = toImageCoords(firstFragCoord + ivec2(0, subY)); + destColors[subY]= imageLoad(uDestImage, imageCoords); + } + } while(tileIndex >= 0){ - overlapCount ++; + for(int subY = 0;subY < 4;subY ++){ + ivec2 tileSubCoord = firstTileSubCoord + ivec2(0, subY); + vec2 fragCoord = vec2(firstFragCoord + ivec2(0, subY))+ vec2(0.5); - uint tileCoord = iTiles[tileIndex * 3 + 0]; - uint maskTexCoord = iTiles[tileIndex * 3 + 1]; - uint colorCtrl = iTiles[tileIndex * 3 + 2]; + int alphaTileIndex = + int(iTiles[tileIndex * 4 + 2]<< 8)>> 8; + uint tileControlWord = iTiles[tileIndex * 4 + 3]; + uint colorEntry = tileControlWord & 0xffff; + int tileCtrl = int((tileControlWord >> 16)& 0xff); - tileOrigin = ivec2(int(tileCoord & 0xffff), int(tileCoord >> 16)); + int backdrop; + uvec2 maskTileCoord; + if(alphaTileIndex >= 0){ + backdrop = 0; + maskTileCoord = uvec2(alphaTileIndex & 0xff, alphaTileIndex >> 8)* + uvec2(uTileSize); + } else { - int ctrl = int(uCtrl); - int tileColor = int(colorCtrl & 0xffff); - int tileCtrl = int(colorCtrl >> 16); + backdrop = int(tileControlWord)>> 24; + maskTileCoord = uvec2(0u); + tileCtrl &= ~(0x3 << 0); + } - mat2 colorTexMatrix0; - vec4 colorTexOffsets; - vec4 baseColor; - lookupTextureMetadata(tileColor, colorTexMatrix0, colorTexOffsets, baseColor); + vec3 maskTexCoord0 = vec3(vec2(ivec2(maskTileCoord)+ tileSubCoord), backdrop); - int maskTileCtrl0 =(tileCtrl >> 0)& 0x3; + vec2 colorTexCoord0; + vec4 baseColor, filterParams0, filterParams1, filterParams2; + int ctrl; + computeTileVaryings(fragCoord, + int(colorEntry), + uTextureMetadata, + uTextureMetadataSize, + colorTexCoord0, + baseColor, + filterParams0, + filterParams1, + filterParams2, + ctrl); - vec4 maskAlphas = vec4(1.0); - if(maskCtrl0 != 0 && maskTileCtrl0 != 0){ - uint maskTileIndex0 = maskTexCoord & 0xffff; - int maskTileBackdrop0 = int(maskTexCoord << 8)>> 24; - maskAlphas = clamp(abs(calculateFillAlpha(tileSubCoord, maskTileIndex0)+ - float(maskTileBackdrop0)), 0.0, 1.0); + vec4 srcColor = calculateColor(fragCoord, + uColorTexture0, + uMaskTexture0, + uDestTexture, + uGammaLUT, + uColorTextureSize0, + uMaskTextureSize0, + filterParams0, + filterParams1, + filterParams2, + uFramebufferSize, + ctrl, + maskTexCoord0, + colorTexCoord0, + baseColor, + tileCtrl); + + destColors[subY]= destColors[subY]*(1.0 - srcColor . a)+ srcColor; } - for(int yOffset = 0;yOffset < 4;yOffset ++){ - - ivec2 fragCoordI = tileOrigin * ivec2(uTileSize)+ tileSubCoord + ivec2(0, yOffset); - vec2 fragCoord = vec2(fragCoordI)+ vec2(0.5); - vec2 colorTexCoord0 = colorTexMatrix0 * fragCoord + colorTexOffsets . xy; - vec4 color = calculateColorWithMaskAlpha(maskAlphas[yOffset], - baseColor, - colorTexCoord0, - fragCoord, - ctrl); - colors[yOffset]= colors[yOffset]*(1.0 - color . a)+ color; - } - - tileIndex = iNextTiles[tileIndex]; + tileIndex = int(iTiles[tileIndex * 4 + 0]); } - for(int yOffset = 0;yOffset < 4;yOffset ++){ - ivec2 fragCoord = tileOrigin * ivec2(uTileSize)+ tileSubCoord + ivec2(0, yOffset); - - - vec4 color = colors[yOffset]; - if(color . a < 1.0) - color = imageLoad(uDestImage, fragCoord)*(1.0 - color . a)+ color; - imageStore(uDestImage, fragCoord, color); - } + for(int subY = 0;subY < 4;subY ++) + imageStore(uDestImage, toImageCoords(firstFragCoord + ivec2(0, subY)), destColors[subY]); } diff --git a/resources/shaders/gl4/fill.fs.glsl b/resources/shaders/gl4/d3d9/fill.fs.glsl similarity index 100% rename from resources/shaders/gl4/fill.fs.glsl rename to resources/shaders/gl4/d3d9/fill.fs.glsl diff --git a/resources/shaders/gl4/d3d9/fill.vs.glsl b/resources/shaders/gl4/d3d9/fill.vs.glsl new file mode 100644 index 00000000..b6e3ad45 --- /dev/null +++ b/resources/shaders/gl4/d3d9/fill.vs.glsl @@ -0,0 +1,86 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + + + + + +uniform vec2 uFramebufferSize; +uniform vec2 uTileSize; + +in uvec2 aTessCoord; +in uvec4 aLineSegment; +in int aTileIndex; + +out vec2 vFrom; +out vec2 vTo; + +vec2 computeTileOffset(uint tileIndex, float stencilTextureWidth, vec2 tileSize){ + uint tilesPerRow = uint(stencilTextureWidth / tileSize . x); + uvec2 tileOffset = uvec2(tileIndex % tilesPerRow, tileIndex / tilesPerRow); + return vec2(tileOffset)* tileSize * vec2(1.0, 0.25); +} + +vec4 computeVertexPosition(uint tileIndex, + uvec2 tessCoord, + uvec4 packedLineSegment, + vec2 tileSize, + vec2 framebufferSize, + out vec2 outFrom, + out vec2 outTo){ + vec2 tileOrigin = computeTileOffset(uint(tileIndex), framebufferSize . x, tileSize); + + vec4 lineSegment = vec4(packedLineSegment)/ 256.0; + vec2 from = lineSegment . xy, to = lineSegment . zw; + + vec2 position; + if(tessCoord . x == 0u) + position . x = floor(min(from . x, to . x)); + else + position . x = ceil(max(from . x, to . x)); + if(tessCoord . y == 0u) + position . y = floor(min(from . y, to . y)); + else + position . y = tileSize . y; + position . y = floor(position . y * 0.25); + + + + + + vec2 offset = vec2(0.0, 1.5)- position * vec2(1.0, 4.0); + outFrom = from + offset; + outTo = to + offset; + + vec2 globalPosition =(tileOrigin + position)/ framebufferSize * 2.0 - 1.0; + + + + return vec4(globalPosition, 0.0, 1.0); +} + +void main(){ + gl_Position = computeVertexPosition(uint(aTileIndex), + aTessCoord, + aLineSegment, + uTileSize, + uFramebufferSize, + vFrom, + vTo); +} + diff --git a/resources/shaders/gl3/tile.fs.glsl b/resources/shaders/gl4/d3d9/tile.fs.glsl similarity index 86% rename from resources/shaders/gl3/tile.fs.glsl rename to resources/shaders/gl4/d3d9/tile.fs.glsl index 33815bd9..ea281c03 100644 --- a/resources/shaders/gl3/tile.fs.glsl +++ b/resources/shaders/gl4/d3d9/tile.fs.glsl @@ -12,27 +12,6 @@ - - - - - - - - - - - - - - - - - - - - - #extension GL_GOOGLE_include_directive : enable precision highp float; @@ -83,24 +62,37 @@ precision highp float; -uniform sampler2D uColorTexture0; -uniform sampler2D uMaskTexture0; -uniform sampler2D uDestTexture; -uniform sampler2D uGammaLUT; -uniform vec2 uColorTextureSize0; -uniform vec2 uMaskTextureSize0; -uniform vec4 uFilterParams0; -uniform vec4 uFilterParams1; -uniform vec4 uFilterParams2; -uniform vec2 uFramebufferSize; -uniform int uCtrl; -in vec3 vMaskTexCoord0; -in vec2 vColorTexCoord0; -in vec4 vBaseColor; -in float vTileCtrl; -out vec4 oFragColor; + + + + + + + + + + + + + + + + + + + + + + + + + + + + + @@ -567,27 +559,42 @@ float sampleMask(float maskAlpha, -void calculateColor(int tileCtrl, int ctrl){ +vec4 calculateColor(vec2 fragCoord, + sampler2D colorTexture0, + sampler2D maskTexture0, + sampler2D destTexture, + sampler2D gammaLUT, + vec2 colorTextureSize0, + vec2 maskTextureSize0, + vec4 filterParams0, + vec4 filterParams1, + vec4 filterParams2, + vec2 framebufferSize, + int ctrl, + vec3 maskTexCoord0, + vec2 colorTexCoord0, + vec4 baseColor, + int tileCtrl){ int maskCtrl0 =(tileCtrl >> 0)& 0x3; float maskAlpha = 1.0; - maskAlpha = sampleMask(maskAlpha, uMaskTexture0, uMaskTextureSize0, vMaskTexCoord0, maskCtrl0); + maskAlpha = sampleMask(maskAlpha, maskTexture0, maskTextureSize0, maskTexCoord0, maskCtrl0); - vec4 color = vBaseColor; + vec4 color = baseColor; int color0Combine =(ctrl >> 6)& 0x3; if(color0Combine != 0){ int color0Filter =(ctrl >> 4)& 0x3; - vec4 color0 = filterColor(vColorTexCoord0, - uColorTexture0, - uGammaLUT, - uColorTextureSize0, - gl_FragCoord . xy, - uFramebufferSize, - uFilterParams0, - uFilterParams1, - uFilterParams2, + vec4 color0 = filterColor(colorTexCoord0, + colorTexture0, + gammaLUT, + colorTextureSize0, + fragCoord, + framebufferSize, + filterParams0, + filterParams1, + filterParams2, color0Filter); color = combineColor0(color, color0, color0Combine); } @@ -597,18 +604,53 @@ void calculateColor(int tileCtrl, int ctrl){ int compositeOp =(ctrl >> 8)& 0xf; - color = composite(color, uDestTexture, uFramebufferSize, gl_FragCoord . xy, compositeOp); + color = composite(color, destTexture, framebufferSize, fragCoord, compositeOp); color . rgb *= color . a; - oFragColor = color; + return color; } +uniform sampler2D uColorTexture0; +uniform sampler2D uMaskTexture0; +uniform sampler2D uDestTexture; +uniform sampler2D uGammaLUT; +uniform vec2 uColorTextureSize0; +uniform vec2 uMaskTextureSize0; +uniform vec2 uFramebufferSize; + +in vec3 vMaskTexCoord0; +in vec2 vColorTexCoord0; +in vec4 vBaseColor; +in float vTileCtrl; +in vec4 vFilterParams0; +in vec4 vFilterParams1; +in vec4 vFilterParams2; +in float vCtrl; + +out vec4 oFragColor; + + void main(){ - calculateColor(int(vTileCtrl), uCtrl); + oFragColor = calculateColor(gl_FragCoord . xy, + uColorTexture0, + uMaskTexture0, + uDestTexture, + uGammaLUT, + uColorTextureSize0, + uMaskTextureSize0, + vFilterParams0, + vFilterParams1, + vFilterParams2, + uFramebufferSize, + int(vCtrl), + vMaskTexCoord0, + vColorTexCoord0, + vBaseColor, + int(vTileCtrl)); } diff --git a/resources/shaders/gl4/d3d9/tile.vs.glsl b/resources/shaders/gl4/d3d9/tile.vs.glsl new file mode 100644 index 00000000..8257d318 --- /dev/null +++ b/resources/shaders/gl4/d3d9/tile.vs.glsl @@ -0,0 +1,123 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + + + + + + + + + + + + + + + + +vec4 fetchUnscaled(sampler2D srcTexture, vec2 scale, vec2 originCoord, int entry){ + return texture(srcTexture,(originCoord + vec2(0.5)+ vec2(entry, 0))* scale); +} + +void computeTileVaryings(vec2 position, + int colorEntry, + sampler2D textureMetadata, + ivec2 textureMetadataSize, + out vec2 outColorTexCoord0, + out vec4 outBaseColor, + out vec4 outFilterParams0, + out vec4 outFilterParams1, + out vec4 outFilterParams2, + out int outCtrl){ + vec2 metadataScale = vec2(1.0)/ vec2(textureMetadataSize); + vec2 metadataEntryCoord = vec2(colorEntry % 128 * 8, colorEntry / 128); + vec4 colorTexMatrix0 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 0); + vec4 colorTexOffsets = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 1); + vec4 baseColor = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 2); + vec4 filterParams0 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 3); + vec4 filterParams1 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 4); + vec4 filterParams2 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 5); + vec4 extra = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 6); + outColorTexCoord0 = mat2(colorTexMatrix0)* position + colorTexOffsets . xy; + outBaseColor = baseColor; + outFilterParams0 = filterParams0; + outFilterParams1 = filterParams1; + outFilterParams2 = filterParams2; + outCtrl = int(extra . x); +} + + +uniform mat4 uTransform; +uniform vec2 uTileSize; +uniform sampler2D uTextureMetadata; +uniform ivec2 uTextureMetadataSize; +uniform sampler2D uZBuffer; +uniform ivec2 uZBufferSize; + +in ivec2 aTileOffset; +in ivec2 aTileOrigin; +in uvec4 aMaskTexCoord0; +in ivec2 aCtrlBackdrop; +in int aPathIndex; +in int aColor; + +out vec3 vMaskTexCoord0; +out vec2 vColorTexCoord0; +out vec4 vBaseColor; +out float vTileCtrl; +out vec4 vFilterParams0; +out vec4 vFilterParams1; +out vec4 vFilterParams2; +out float vCtrl; + +void main(){ + vec2 tileOrigin = vec2(aTileOrigin), tileOffset = vec2(aTileOffset); + vec2 position =(tileOrigin + tileOffset)* uTileSize; + + ivec4 zValue = ivec4(texture(uZBuffer,(tileOrigin + vec2(0.5))/ vec2(uZBufferSize))* 255.0); + if(aPathIndex <(zValue . x |(zValue . y << 8)|(zValue . z << 16)|(zValue . w << 24))){ + gl_Position = vec4(0.0); + return; + } + + uvec2 maskTileCoord = uvec2(aMaskTexCoord0 . x, aMaskTexCoord0 . y + 256u * aMaskTexCoord0 . z); + vec2 maskTexCoord0 =(vec2(maskTileCoord)+ tileOffset)* uTileSize; + if(aCtrlBackdrop . y == 0 && aMaskTexCoord0 . w != 0u){ + gl_Position = vec4(0.0); + return; + } + + int ctrl; + computeTileVaryings(position, + aColor, + uTextureMetadata, + uTextureMetadataSize, + vColorTexCoord0, + vBaseColor, + vFilterParams0, + vFilterParams1, + vFilterParams2, + ctrl); + + vTileCtrl = float(aCtrlBackdrop . x); + vCtrl = float(ctrl); + vMaskTexCoord0 = vec3(maskTexCoord0, float(aCtrlBackdrop . y)); + gl_Position = uTransform * vec4(position, 0.0, 1.0); +} + diff --git a/resources/shaders/gl4/d3d9/tile_clip_combine.fs.glsl b/resources/shaders/gl4/d3d9/tile_clip_combine.fs.glsl new file mode 100644 index 00000000..b097b871 --- /dev/null +++ b/resources/shaders/gl4/d3d9/tile_clip_combine.fs.glsl @@ -0,0 +1,34 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + +precision highp float; + + + + + +uniform sampler2D uSrc; + +in vec2 vTexCoord0; +in float vBackdrop0; +in vec2 vTexCoord1; +in float vBackdrop1; + +out vec4 oFragColor; + +void main(){ + oFragColor = min(abs(texture(uSrc, vTexCoord0)+ vBackdrop0), + abs(texture(uSrc, vTexCoord1)+ vBackdrop1)); +} + diff --git a/resources/shaders/gl4/d3d9/tile_clip_combine.vs.glsl b/resources/shaders/gl4/d3d9/tile_clip_combine.vs.glsl new file mode 100644 index 00000000..38f85f04 --- /dev/null +++ b/resources/shaders/gl4/d3d9/tile_clip_combine.vs.glsl @@ -0,0 +1,54 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + +precision highp float; + + + + + +uniform vec2 uFramebufferSize; + +in ivec2 aTileOffset; +in int aDestTileIndex; +in int aDestBackdrop; +in int aSrcTileIndex; +in int aSrcBackdrop; + +out vec2 vTexCoord0; +out float vBackdrop0; +out vec2 vTexCoord1; +out float vBackdrop1; + +void main(){ + vec2 destPosition = vec2(ivec2(aDestTileIndex % 256, aDestTileIndex / 256)+ aTileOffset); + vec2 srcPosition = vec2(ivec2(aSrcTileIndex % 256, aSrcTileIndex / 256)+ aTileOffset); + destPosition *= vec2(16.0, 4.0)/ uFramebufferSize; + srcPosition *= vec2(16.0, 4.0)/ uFramebufferSize; + + vTexCoord0 = destPosition; + vTexCoord1 = srcPosition; + + vBackdrop0 = float(aDestBackdrop); + vBackdrop1 = float(aSrcBackdrop); + + if(aDestTileIndex < 0) + destPosition = vec2(0.0); + + + + + gl_Position = vec4(mix(vec2(- 1.0), vec2(1.0), destPosition), 0.0, 1.0); +} + diff --git a/resources/shaders/gl4/tile_clip.fs.glsl b/resources/shaders/gl4/d3d9/tile_clip_copy.fs.glsl similarity index 69% rename from resources/shaders/gl4/tile_clip.fs.glsl rename to resources/shaders/gl4/d3d9/tile_clip_copy.fs.glsl index 9b8b6cd3..b1d846fe 100644 --- a/resources/shaders/gl4/tile_clip.fs.glsl +++ b/resources/shaders/gl4/d3d9/tile_clip_copy.fs.glsl @@ -21,11 +21,10 @@ precision highp float; uniform sampler2D uSrc; in vec2 vTexCoord; -in float vBackdrop; out vec4 oFragColor; void main(){ - oFragColor = clamp(abs(texture(uSrc, vTexCoord)+ vBackdrop), 0.0, 1.0); + oFragColor = texture(uSrc, vTexCoord); } diff --git a/resources/shaders/gl4/d3d9/tile_clip_copy.vs.glsl b/resources/shaders/gl4/d3d9/tile_clip_copy.vs.glsl new file mode 100644 index 00000000..04b2982f --- /dev/null +++ b/resources/shaders/gl4/d3d9/tile_clip_copy.vs.glsl @@ -0,0 +1,42 @@ +#version {{version}} +// Automatically generated from files in pathfinder/shaders/. Do not edit! + + + + + + + + + + + + +precision highp float; + + + + + +uniform vec2 uFramebufferSize; + +in ivec2 aTileOffset; +in int aTileIndex; + +out vec2 vTexCoord; + +void main(){ + vec2 position = vec2(ivec2(aTileIndex % 256, aTileIndex / 256)+ aTileOffset); + position *= vec2(16.0, 4.0)/ uFramebufferSize; + + vTexCoord = position; + + if(aTileIndex < 0) + position = vec2(0.0); + + + + + gl_Position = vec4(mix(vec2(- 1.0), vec2(1.0), position), 0.0, 1.0); +} + diff --git a/resources/shaders/gl4/tile_copy.fs.glsl b/resources/shaders/gl4/d3d9/tile_copy.fs.glsl similarity index 100% rename from resources/shaders/gl4/tile_copy.fs.glsl rename to resources/shaders/gl4/d3d9/tile_copy.fs.glsl diff --git a/resources/shaders/gl4/tile_copy.vs.glsl b/resources/shaders/gl4/d3d9/tile_copy.vs.glsl similarity index 100% rename from resources/shaders/gl4/tile_copy.vs.glsl rename to resources/shaders/gl4/d3d9/tile_copy.vs.glsl diff --git a/resources/shaders/gl4/debug_solid.fs.glsl b/resources/shaders/gl4/debug/solid.fs.glsl similarity index 100% rename from resources/shaders/gl4/debug_solid.fs.glsl rename to resources/shaders/gl4/debug/solid.fs.glsl diff --git a/resources/shaders/gl4/debug_solid.vs.glsl b/resources/shaders/gl4/debug/solid.vs.glsl similarity index 100% rename from resources/shaders/gl4/debug_solid.vs.glsl rename to resources/shaders/gl4/debug/solid.vs.glsl diff --git a/resources/shaders/gl4/debug_texture.fs.glsl b/resources/shaders/gl4/debug/texture.fs.glsl similarity index 100% rename from resources/shaders/gl4/debug_texture.fs.glsl rename to resources/shaders/gl4/debug/texture.fs.glsl diff --git a/resources/shaders/gl4/debug_texture.vs.glsl b/resources/shaders/gl4/debug/texture.vs.glsl similarity index 100% rename from resources/shaders/gl4/debug_texture.vs.glsl rename to resources/shaders/gl4/debug/texture.vs.glsl diff --git a/resources/shaders/gl4/fill.cs.glsl b/resources/shaders/gl4/fill.cs.glsl deleted file mode 100644 index 3a528df5..00000000 --- a/resources/shaders/gl4/fill.cs.glsl +++ /dev/null @@ -1,100 +0,0 @@ -#version {{version}} -// Automatically generated from files in pathfinder/shaders/. Do not edit! - - - - - - - - - - - - -#extension GL_GOOGLE_include_directive : enable - -precision highp float; - - - - - - - - - - - - - - - - -vec4 computeCoverage(vec2 from, vec2 to, sampler2D areaLUT){ - - vec2 left = from . x < to . x ? from : to, right = from . x < to . x ? to : from; - - - vec2 window = clamp(vec2(from . x, to . x), - 0.5, 0.5); - float offset = mix(window . x, window . y, 0.5)- left . x; - float t = offset /(right . x - left . x); - - - float y = mix(left . y, right . y, t); - float d =(right . y - left . y)/(right . x - left . x); - - - float dX = window . x - window . y; - return texture(areaLUT, vec2(y + 8.0, abs(d * dX))/ 16.0)* dX; -} - - -layout(local_size_x = 16, local_size_y = 4)in; - -uniform writeonly image2D uDest; -uniform sampler2D uAreaLUT; -uniform int uFirstTileIndex; - -layout(std430, binding = 0)buffer bFills { - restrict readonly uvec2 iFills[]; -}; - -layout(std430, binding = 1)buffer bNextFills { - restrict readonly int iNextFills[]; -}; - -layout(std430, binding = 2)buffer bFillTileMap { - restrict readonly int iFillTileMap[]; -}; - -void main(){ - ivec2 tileSubCoord = ivec2(gl_LocalInvocationID . xy)* ivec2(1, 4); - uint tileIndexOffset = gl_WorkGroupID . z; - - uint tileIndex = tileIndexOffset + uint(uFirstTileIndex); - - int fillIndex = iFillTileMap[tileIndex]; - if(fillIndex < 0) - return; - - vec4 coverages = vec4(0.0); - do { - uvec2 fill = iFills[fillIndex]; - vec2 from = vec2(fill . y & 0xf,(fill . y >> 4u)& 0xf)+ - vec2(fill . x & 0xff,(fill . x >> 8u)& 0xff)/ 256.0; - vec2 to = vec2((fill . y >> 8u)& 0xf,(fill . y >> 12u)& 0xf)+ - vec2((fill . x >> 16u)& 0xff,(fill . x >> 24u)& 0xff)/ 256.0; - - coverages += computeCoverage(from -(vec2(tileSubCoord)+ vec2(0.5)), - to -(vec2(tileSubCoord)+ vec2(0.5)), - uAreaLUT); - - fillIndex = iNextFills[fillIndex]; - } while(fillIndex >= 0); - - ivec2 tileOrigin = ivec2(tileIndex & 0xff,(tileIndex >> 8u)& 0xff)* ivec2(16, 4); - ivec2 destCoord = tileOrigin + ivec2(gl_LocalInvocationID . xy); - imageStore(uDest, destCoord, coverages); -} - diff --git a/resources/shaders/gl4/fill.vs.glsl b/resources/shaders/gl4/fill.vs.glsl deleted file mode 100644 index 58b0b7f0..00000000 --- a/resources/shaders/gl4/fill.vs.glsl +++ /dev/null @@ -1,71 +0,0 @@ -#version {{version}} -// Automatically generated from files in pathfinder/shaders/. Do not edit! - - - - - - - - - - - - -precision highp float; - - - - - -uniform vec2 uFramebufferSize; -uniform vec2 uTileSize; - -in uvec2 aTessCoord; -in uint aFromPx; -in uint aToPx; -in vec2 aFromSubpx; -in vec2 aToSubpx; -in uint aTileIndex; - -out vec2 vFrom; -out vec2 vTo; - -vec2 computeTileOffset(uint tileIndex, float stencilTextureWidth){ - uint tilesPerRow = uint(stencilTextureWidth / uTileSize . x); - uvec2 tileOffset = uvec2(tileIndex % tilesPerRow, tileIndex / tilesPerRow); - return vec2(tileOffset)* uTileSize * vec2(1.0, 0.25); -} - -void main(){ - vec2 tileOrigin = computeTileOffset(aTileIndex, uFramebufferSize . x); - - vec2 from = vec2(aFromPx & 15u, aFromPx >> 4u)+ aFromSubpx; - vec2 to = vec2(aToPx & 15u, aToPx >> 4u)+ aToSubpx; - - vec2 position; - if(aTessCoord . x == 0u) - position . x = floor(min(from . x, to . x)); - else - position . x = ceil(max(from . x, to . x)); - if(aTessCoord . y == 0u) - position . y = floor(min(from . y, to . y)); - else - position . y = uTileSize . y; - position . y = floor(position . y * 0.25); - - - - - - vec2 offset = vec2(0.0, 1.5)- position * vec2(1.0, 4.0); - vFrom = from + offset; - vTo = to + offset; - - vec2 globalPosition =(tileOrigin + position)/ uFramebufferSize * 2.0 - 1.0; - - - - gl_Position = vec4(globalPosition, 0.0, 1.0); -} - diff --git a/resources/shaders/gl4/tile.vs.glsl b/resources/shaders/gl4/tile.vs.glsl deleted file mode 100644 index 6f3819c0..00000000 --- a/resources/shaders/gl4/tile.vs.glsl +++ /dev/null @@ -1,59 +0,0 @@ -#version {{version}} -// Automatically generated from files in pathfinder/shaders/. Do not edit! - - - - - - - - - - - - -precision highp float; - - - - - -uniform mat4 uTransform; -uniform vec2 uTileSize; -uniform sampler2D uTextureMetadata; -uniform ivec2 uTextureMetadataSize; - -in ivec2 aTileOffset; -in ivec2 aTileOrigin; -in uvec2 aMaskTexCoord0; -in ivec2 aMaskBackdrop; -in int aColor; -in int aTileCtrl; - -out vec3 vMaskTexCoord0; -out vec2 vColorTexCoord0; -out vec4 vBaseColor; -out float vTileCtrl; - -void main(){ - vec2 tileOrigin = vec2(aTileOrigin), tileOffset = vec2(aTileOffset); - vec2 position =(tileOrigin + tileOffset)* uTileSize; - - vec2 maskTexCoord0 =(vec2(aMaskTexCoord0)+ tileOffset)* uTileSize; - - vec2 textureMetadataScale = vec2(1.0)/ vec2(uTextureMetadataSize); - vec2 metadataEntryCoord = vec2(aColor % 128 * 4, aColor / 128); - vec2 colorTexMatrix0Coord =(metadataEntryCoord + vec2(0.5, 0.5))* textureMetadataScale; - vec2 colorTexOffsetsCoord =(metadataEntryCoord + vec2(1.5, 0.5))* textureMetadataScale; - vec2 baseColorCoord =(metadataEntryCoord + vec2(2.5, 0.5))* textureMetadataScale; - vec4 colorTexMatrix0 = texture(uTextureMetadata, colorTexMatrix0Coord); - vec4 colorTexOffsets = texture(uTextureMetadata, colorTexOffsetsCoord); - vec4 baseColor = texture(uTextureMetadata, baseColorCoord); - - vColorTexCoord0 = mat2(colorTexMatrix0)* position + colorTexOffsets . xy; - vMaskTexCoord0 = vec3(maskTexCoord0, float(aMaskBackdrop . x)); - vBaseColor = baseColor; - vTileCtrl = float(aTileCtrl); - gl_Position = uTransform * vec4(position, 0.0, 1.0); -} - diff --git a/resources/shaders/gl4/tile_clip.vs.glsl b/resources/shaders/gl4/tile_clip.vs.glsl deleted file mode 100644 index 6693ec92..00000000 --- a/resources/shaders/gl4/tile_clip.vs.glsl +++ /dev/null @@ -1,36 +0,0 @@ -#version {{version}} -// Automatically generated from files in pathfinder/shaders/. Do not edit! - - - - - - - - - - - - -precision highp float; - - - - - -in ivec2 aTileOffset; -in ivec2 aDestTileOrigin; -in ivec2 aSrcTileOrigin; -in int aSrcBackdrop; - -out vec2 vTexCoord; -out float vBackdrop; - -void main(){ - vec2 destPosition = vec2(aDestTileOrigin + aTileOffset)/ vec2(256.0); - vec2 srcPosition = vec2(aSrcTileOrigin + aTileOffset)/ vec2(256.0); - vTexCoord = srcPosition; - vBackdrop = float(aSrcBackdrop); - gl_Position = vec4(mix(vec2(- 1.0), vec2(1.0), destPosition), 0.0, 1.0); -} - diff --git a/resources/shaders/gl4/tile_fill.fs.glsl b/resources/shaders/gl4/tile_fill.fs.glsl deleted file mode 100644 index 88b566ff..00000000 --- a/resources/shaders/gl4/tile_fill.fs.glsl +++ /dev/null @@ -1,708 +0,0 @@ -#version {{version}} -// Automatically generated from files in pathfinder/shaders/. Do not edit! - - - - - - - - - - - - -#extension GL_GOOGLE_include_directive : enable - -precision highp float; -precision highp sampler2D; - -uniform sampler2D uColorTexture0; -uniform sampler2D uMaskTexture0; -uniform sampler2D uDestTexture; -uniform sampler2D uGammaLUT; -uniform vec4 uFilterParams0; -uniform vec4 uFilterParams1; -uniform vec4 uFilterParams2; -uniform vec2 uFramebufferSize; -uniform vec2 uColorTextureSize0; -uniform int uCtrl; -uniform sampler2D uAreaLUT; - -layout(std430, binding = 0)buffer bFills { - restrict readonly uvec2 iFills[]; -}; - -layout(std430, binding = 1)buffer bNextFills { - restrict readonly int iNextFills[]; -}; - -layout(std430, binding = 2)buffer bFillTileMap { - restrict readonly int iFillTileMap[]; -}; - -in vec2 vTileSubCoord; -flat in uint vMaskTileIndex0; -flat in int vMaskTileBackdrop0; -in vec2 vColorTexCoord0; -in vec4 vBaseColor; -in float vTileCtrl; - -out vec4 oFragColor; - - - - - - - - - - - - -vec4 computeCoverage(vec2 from, vec2 to, sampler2D areaLUT){ - - vec2 left = from . x < to . x ? from : to, right = from . x < to . x ? to : from; - - - vec2 window = clamp(vec2(from . x, to . x), - 0.5, 0.5); - float offset = mix(window . x, window . y, 0.5)- left . x; - float t = offset /(right . x - left . x); - - - float y = mix(left . y, right . y, t); - float d =(right . y - left . y)/(right . x - left . x); - - - float dX = window . x - window . y; - return texture(areaLUT, vec2(y + 8.0, abs(d * dX))/ 16.0)* dX; -} - - - - - - - - - - - - -vec4 computeCoverage(vec2 from, vec2 to, sampler2D areaLUT); - -ivec2 calculateTileOrigin(uint tileIndex){ - return ivec2(tileIndex & 0xff,(tileIndex >> 8u)& 0xff)* 16; -} - -vec4 calculateFillAlpha(ivec2 tileSubCoord, uint tileIndex){ - int fillIndex = iFillTileMap[tileIndex]; - if(fillIndex < 0) - return vec4(0.0); - - vec4 coverages = vec4(0.0); - do { - uvec2 fill = iFills[fillIndex]; - vec2 from = vec2(fill . y & 0xf,(fill . y >> 4u)& 0xf)+ - vec2(fill . x & 0xff,(fill . x >> 8u)& 0xff)/ 256.0; - vec2 to = vec2((fill . y >> 8u)& 0xf,(fill . y >> 12u)& 0xf)+ - vec2((fill . x >> 16u)& 0xff,(fill . x >> 24u)& 0xff)/ 256.0; - - coverages += computeCoverage(from -(vec2(tileSubCoord)+ vec2(0.5)), - to -(vec2(tileSubCoord)+ vec2(0.5)), - uAreaLUT); - - fillIndex = iNextFills[fillIndex]; - } while(fillIndex >= 0); - - return coverages; -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -vec4 sampleColor(sampler2D colorTexture, vec2 colorTexCoord){ - return texture(colorTexture, colorTexCoord); -} - - - -vec4 combineColor0(vec4 destColor, vec4 srcColor, int op){ - switch(op){ - case 0x1 : - return vec4(srcColor . rgb, srcColor . a * destColor . a); - case 0x2 : - return vec4(destColor . rgb, srcColor . a * destColor . a); - } - return destColor; -} - - - -float filterTextSample1Tap(float offset, sampler2D colorTexture, vec2 colorTexCoord){ - return texture(colorTexture, colorTexCoord + vec2(offset, 0.0)). r; -} - - -void filterTextSample9Tap(out vec4 outAlphaLeft, - out float outAlphaCenter, - out vec4 outAlphaRight, - sampler2D colorTexture, - vec2 colorTexCoord, - vec4 kernel, - float onePixel){ - bool wide = kernel . x > 0.0; - outAlphaLeft = - vec4(wide ? filterTextSample1Tap(- 4.0 * onePixel, colorTexture, colorTexCoord): 0.0, - filterTextSample1Tap(- 3.0 * onePixel, colorTexture, colorTexCoord), - filterTextSample1Tap(- 2.0 * onePixel, colorTexture, colorTexCoord), - filterTextSample1Tap(- 1.0 * onePixel, colorTexture, colorTexCoord)); - outAlphaCenter = filterTextSample1Tap(0.0, colorTexture, colorTexCoord); - outAlphaRight = - vec4(filterTextSample1Tap(1.0 * onePixel, colorTexture, colorTexCoord), - filterTextSample1Tap(2.0 * onePixel, colorTexture, colorTexCoord), - filterTextSample1Tap(3.0 * onePixel, colorTexture, colorTexCoord), - wide ? filterTextSample1Tap(4.0 * onePixel, colorTexture, colorTexCoord): 0.0); -} - -float filterTextConvolve7Tap(vec4 alpha0, vec3 alpha1, vec4 kernel){ - return dot(alpha0, kernel)+ dot(alpha1, kernel . zyx); -} - -float filterTextGammaCorrectChannel(float bgColor, float fgColor, sampler2D gammaLUT){ - return texture(gammaLUT, vec2(fgColor, 1.0 - bgColor)). r; -} - - -vec3 filterTextGammaCorrect(vec3 bgColor, vec3 fgColor, sampler2D gammaLUT){ - return vec3(filterTextGammaCorrectChannel(bgColor . r, fgColor . r, gammaLUT), - filterTextGammaCorrectChannel(bgColor . g, fgColor . g, gammaLUT), - filterTextGammaCorrectChannel(bgColor . b, fgColor . b, gammaLUT)); -} - - - - - - -vec4 filterText(vec2 colorTexCoord, - sampler2D colorTexture, - sampler2D gammaLUT, - vec2 colorTextureSize, - vec4 filterParams0, - vec4 filterParams1, - vec4 filterParams2){ - - vec4 kernel = filterParams0; - vec3 bgColor = filterParams1 . rgb; - vec3 fgColor = filterParams2 . rgb; - bool gammaCorrectionEnabled = filterParams2 . a != 0.0; - - - vec3 alpha; - if(kernel . w == 0.0){ - alpha = texture(colorTexture, colorTexCoord). rrr; - } else { - vec4 alphaLeft, alphaRight; - float alphaCenter; - filterTextSample9Tap(alphaLeft, - alphaCenter, - alphaRight, - colorTexture, - colorTexCoord, - kernel, - 1.0 / colorTextureSize . x); - - float r = filterTextConvolve7Tap(alphaLeft, vec3(alphaCenter, alphaRight . xy), kernel); - float g = filterTextConvolve7Tap(vec4(alphaLeft . yzw, alphaCenter), alphaRight . xyz, kernel); - float b = filterTextConvolve7Tap(vec4(alphaLeft . zw, alphaCenter, alphaRight . x), - alphaRight . yzw, - kernel); - - alpha = vec3(r, g, b); - } - - - if(gammaCorrectionEnabled) - alpha = filterTextGammaCorrect(bgColor, alpha, gammaLUT); - - - return vec4(mix(bgColor, fgColor, alpha), 1.0); -} - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -vec4 filterRadialGradient(vec2 colorTexCoord, - sampler2D colorTexture, - vec2 colorTextureSize, - vec2 fragCoord, - vec2 framebufferSize, - vec4 filterParams0, - vec4 filterParams1){ - vec2 lineFrom = filterParams0 . xy, lineVector = filterParams0 . zw; - vec2 radii = filterParams1 . xy, uvOrigin = filterParams1 . zw; - - vec2 dP = colorTexCoord - lineFrom, dC = lineVector; - float dR = radii . y - radii . x; - - float a = dot(dC, dC)- dR * dR; - float b = dot(dP, dC)+ radii . x * dR; - float c = dot(dP, dP)- radii . x * radii . x; - float discrim = b * b - a * c; - - vec4 color = vec4(0.0); - if(abs(discrim)>= 0.00001){ - vec2 ts = vec2(sqrt(discrim)* vec2(1.0, - 1.0)+ vec2(b))/ vec2(a); - if(ts . x > ts . y) - ts = ts . yx; - float t = ts . x >= 0.0 ? ts . x : ts . y; - color = texture(colorTexture, uvOrigin + vec2(clamp(t, 0.0, 1.0), 0.0)); - } - - return color; -} - - - - - - -vec4 filterBlur(vec2 colorTexCoord, - sampler2D colorTexture, - vec2 colorTextureSize, - vec4 filterParams0, - vec4 filterParams1){ - - vec2 srcOffsetScale = filterParams0 . xy / colorTextureSize; - int support = int(filterParams0 . z); - vec3 gaussCoeff = filterParams1 . xyz; - - - float gaussSum = gaussCoeff . x; - vec4 color = texture(colorTexture, colorTexCoord)* gaussCoeff . x; - gaussCoeff . xy *= gaussCoeff . yz; - - - - - - - - - - for(int i = 1;i <= support;i += 2){ - float gaussPartialSum = gaussCoeff . x; - gaussCoeff . xy *= gaussCoeff . yz; - gaussPartialSum += gaussCoeff . x; - - vec2 srcOffset = srcOffsetScale *(float(i)+ gaussCoeff . x / gaussPartialSum); - color +=(texture(colorTexture, colorTexCoord - srcOffset)+ - texture(colorTexture, colorTexCoord + srcOffset))* gaussPartialSum; - - gaussSum += 2.0 * gaussPartialSum; - gaussCoeff . xy *= gaussCoeff . yz; - } - - - return color / gaussSum; -} - -vec4 filterNone(vec2 colorTexCoord, sampler2D colorTexture){ - return sampleColor(colorTexture, colorTexCoord); -} - -vec4 filterColor(vec2 colorTexCoord, - sampler2D colorTexture, - sampler2D gammaLUT, - vec2 colorTextureSize, - vec2 fragCoord, - vec2 framebufferSize, - vec4 filterParams0, - vec4 filterParams1, - vec4 filterParams2, - int colorFilter){ - switch(colorFilter){ - case 0x1 : - return filterRadialGradient(colorTexCoord, - colorTexture, - colorTextureSize, - fragCoord, - framebufferSize, - filterParams0, - filterParams1); - case 0x3 : - return filterBlur(colorTexCoord, - colorTexture, - colorTextureSize, - filterParams0, - filterParams1); - case 0x2 : - return filterText(colorTexCoord, - colorTexture, - gammaLUT, - colorTextureSize, - filterParams0, - filterParams1, - filterParams2); - } - return filterNone(colorTexCoord, colorTexture); -} - - - -vec3 compositeSelect(bvec3 cond, vec3 ifTrue, vec3 ifFalse){ - return vec3(cond . x ? ifTrue . x : ifFalse . x, - cond . y ? ifTrue . y : ifFalse . y, - cond . z ? ifTrue . z : ifFalse . z); -} - -float compositeDivide(float num, float denom){ - return denom != 0.0 ? num / denom : 0.0; -} - -vec3 compositeColorDodge(vec3 destColor, vec3 srcColor){ - bvec3 destZero = equal(destColor, vec3(0.0)), srcOne = equal(srcColor, vec3(1.0)); - return compositeSelect(destZero, - vec3(0.0), - compositeSelect(srcOne, vec3(1.0), destColor /(vec3(1.0)- srcColor))); -} - - -vec3 compositeHSLToRGB(vec3 hsl){ - float a = hsl . y * min(hsl . z, 1.0 - hsl . z); - vec3 ks = mod(vec3(0.0, 8.0, 4.0)+ vec3(hsl . x * 1.9098593171027443), 12.0); - return hsl . zzz - clamp(min(ks - vec3(3.0), vec3(9.0)- ks), - 1.0, 1.0)* a; -} - - -vec3 compositeRGBToHSL(vec3 rgb){ - float v = max(max(rgb . r, rgb . g), rgb . b), xMin = min(min(rgb . r, rgb . g), rgb . b); - float c = v - xMin, l = mix(xMin, v, 0.5); - vec3 terms = rgb . r == v ? vec3(0.0, rgb . gb): - rgb . g == v ? vec3(2.0, rgb . br): - vec3(4.0, rgb . rg); - float h = 1.0471975511965976 * compositeDivide(terms . x * c + terms . y - terms . z, c); - float s = compositeDivide(c, v); - return vec3(h, s, l); -} - -vec3 compositeScreen(vec3 destColor, vec3 srcColor){ - return destColor + srcColor - destColor * srcColor; -} - -vec3 compositeHardLight(vec3 destColor, vec3 srcColor){ - return compositeSelect(lessThanEqual(srcColor, vec3(0.5)), - destColor * vec3(2.0)* srcColor, - compositeScreen(destColor, vec3(2.0)* srcColor - vec3(1.0))); -} - -vec3 compositeSoftLight(vec3 destColor, vec3 srcColor){ - vec3 darkenedDestColor = - compositeSelect(lessThanEqual(destColor, vec3(0.25)), - ((vec3(16.0)* destColor - 12.0)* destColor + 4.0)* destColor, - sqrt(destColor)); - vec3 factor = compositeSelect(lessThanEqual(srcColor, vec3(0.5)), - destColor *(vec3(1.0)- destColor), - darkenedDestColor - destColor); - return destColor +(srcColor * 2.0 - 1.0)* factor; -} - -vec3 compositeHSL(vec3 destColor, vec3 srcColor, int op){ - switch(op){ - case 0xc : - return vec3(srcColor . x, destColor . y, destColor . z); - case 0xd : - return vec3(destColor . x, srcColor . y, destColor . z); - case 0xe : - return vec3(srcColor . x, srcColor . y, destColor . z); - default : - return vec3(destColor . x, destColor . y, srcColor . z); - } -} - -vec3 compositeRGB(vec3 destColor, vec3 srcColor, int op){ - switch(op){ - case 0x1 : - return destColor * srcColor; - case 0x2 : - return compositeScreen(destColor, srcColor); - case 0x3 : - return compositeHardLight(srcColor, destColor); - case 0x4 : - return min(destColor, srcColor); - case 0x5 : - return max(destColor, srcColor); - case 0x6 : - return compositeColorDodge(destColor, srcColor); - case 0x7 : - return vec3(1.0)- compositeColorDodge(vec3(1.0)- destColor, vec3(1.0)- srcColor); - case 0x8 : - return compositeHardLight(destColor, srcColor); - case 0x9 : - return compositeSoftLight(destColor, srcColor); - case 0xa : - return abs(destColor - srcColor); - case 0xb : - return destColor + srcColor - vec3(2.0)* destColor * srcColor; - case 0xc : - case 0xd : - case 0xe : - case 0xf : - return compositeHSLToRGB(compositeHSL(compositeRGBToHSL(destColor), - compositeRGBToHSL(srcColor), - op)); - } - return srcColor; -} - -vec4 composite(vec4 srcColor, - sampler2D destTexture, - vec2 destTextureSize, - vec2 fragCoord, - int op){ - if(op == 0x0) - return srcColor; - - - vec2 destTexCoord = fragCoord / destTextureSize; - vec4 destColor = texture(destTexture, destTexCoord); - vec3 blendedRGB = compositeRGB(destColor . rgb, srcColor . rgb, op); - return vec4(srcColor . a *(1.0 - destColor . a)* srcColor . rgb + - srcColor . a * destColor . a * blendedRGB + - (1.0 - srcColor . a)* destColor . rgb, - 1.0); -} - - - -float sampleMask(float maskAlpha, - sampler2D maskTexture, - vec2 maskTextureSize, - vec3 maskTexCoord, - int maskCtrl){ - if(maskCtrl == 0) - return maskAlpha; - - ivec2 maskTexCoordI = ivec2(floor(maskTexCoord . xy)); - vec4 texel = texture(maskTexture,(vec2(maskTexCoordI / ivec2(1, 4))+ 0.5)/ maskTextureSize); - float coverage = texel[maskTexCoordI . y % 4]+ maskTexCoord . z; - - if((maskCtrl & 0x1)!= 0) - coverage = abs(coverage); - else - coverage = 1.0 - abs(1.0 - mod(coverage, 2.0)); - return min(maskAlpha, coverage); -} - - - -vec4 calculateColorWithMaskAlpha(float maskAlpha, - vec4 baseColor, - vec2 colorTexCoord0, - vec2 fragCoord, - int ctrl){ - - vec4 color = baseColor; - int color0Combine =(ctrl >> 6)& - 0x3; - if(color0Combine != 0){ - int color0Filter =(ctrl >> 4)& 0x3; - vec4 color0 = filterColor(colorTexCoord0, - uColorTexture0, - uGammaLUT, - uColorTextureSize0, - fragCoord, - uFramebufferSize, - uFilterParams0, - uFilterParams1, - uFilterParams2, - color0Filter); - color = combineColor0(color, color0, color0Combine); - } - - - color . a *= maskAlpha; - - - - - - - - - - color . rgb *= color . a; - return color; -} - - -vec4 calculateColor(int tileCtrl, int ctrl){ - float maskAlpha = 1.0; - int maskCtrl0 =(ctrl >> 0)& 0x1; - int maskTileCtrl0 =(tileCtrl >> 0)& 0x3; - uint maskTileIndex0 = vMaskTileIndex0; - if(maskCtrl0 != 0 && maskTileCtrl0 != 0){ - ivec2 tileSubCoord = ivec2(floor(vTileSubCoord)); - vec4 alphas = calculateFillAlpha(tileSubCoord, maskTileIndex0)+ float(vMaskTileBackdrop0); - maskAlpha = alphas . x; - } - return calculateColorWithMaskAlpha(maskAlpha, - vBaseColor, - vColorTexCoord0, - gl_FragCoord . xy, - ctrl); -} - - - - - -void main(){ - oFragColor = calculateColor(int(vTileCtrl), uCtrl); - -} - diff --git a/resources/shaders/gl4/tile_fill.vs.glsl b/resources/shaders/gl4/tile_fill.vs.glsl deleted file mode 100644 index 1c089c09..00000000 --- a/resources/shaders/gl4/tile_fill.vs.glsl +++ /dev/null @@ -1,58 +0,0 @@ -#version {{version}} -// Automatically generated from files in pathfinder/shaders/. Do not edit! - - - - - - - - - - - - -precision highp float; -precision highp sampler2D; - -uniform mat4 uTransform; -uniform vec2 uTileSize; -uniform sampler2D uTextureMetadata; -uniform ivec2 uTextureMetadataSize; - -in ivec2 aTileOffset; -in ivec2 aTileOrigin; -in uint aMaskTileIndex0; -in ivec2 aMaskBackdrop; -in int aColor; -in int aTileCtrl; - -out vec2 vTileSubCoord; -flat out uint vMaskTileIndex0; -flat out int vMaskTileBackdrop0; -out vec2 vColorTexCoord0; -out vec4 vBaseColor; -out float vTileCtrl; - -void main(){ - vec2 tileOrigin = vec2(aTileOrigin), tileOffset = vec2(aTileOffset); - vec2 position =(tileOrigin + tileOffset)* uTileSize; - - vec2 textureMetadataScale = vec2(1.0)/ vec2(uTextureMetadataSize); - vec2 metadataEntryCoord = vec2(aColor % 128 * 4, aColor / 128); - vec2 colorTexMatrix0Coord =(metadataEntryCoord + vec2(0.5, 0.5))* textureMetadataScale; - vec2 colorTexOffsetsCoord =(metadataEntryCoord + vec2(1.5, 0.5))* textureMetadataScale; - vec2 baseColorCoord =(metadataEntryCoord + vec2(2.5, 0.5))* textureMetadataScale; - vec4 colorTexMatrix0 = texture(uTextureMetadata, colorTexMatrix0Coord); - vec4 colorTexOffsets = texture(uTextureMetadata, colorTexOffsetsCoord); - vec4 baseColor = texture(uTextureMetadata, baseColorCoord); - - vTileSubCoord = tileOffset * vec2(16.0); - vColorTexCoord0 = mat2(colorTexMatrix0)* position + colorTexOffsets . xy; - vMaskTileIndex0 = aMaskTileIndex0; - vMaskTileBackdrop0 = aMaskBackdrop . x; - vBaseColor = baseColor; - vTileCtrl = float(aTileCtrl); - gl_Position = uTransform * vec4(position, 0.0, 1.0); -} - diff --git a/resources/shaders/metal/blit.fs.metal b/resources/shaders/metal/blit.fs.metal index cb0ab32a..d7a4afb2 100644 --- a/resources/shaders/metal/blit.fs.metal +++ b/resources/shaders/metal/blit.fs.metal @@ -18,7 +18,7 @@ fragment main0_out main0(main0_in in [[stage_in]], texture2d uSrc [[textu { main0_out out = {}; float4 color = uSrc.sample(uSrcSmplr, in.vTexCoord); - out.oFragColor = float4(color.xyz * color.w, color.w); + out.oFragColor = color; return out; } diff --git a/resources/shaders/metal/blit.vs.metal b/resources/shaders/metal/blit.vs.metal index 701d97f0..35de802e 100644 --- a/resources/shaders/metal/blit.vs.metal +++ b/resources/shaders/metal/blit.vs.metal @@ -15,13 +15,13 @@ struct main0_in int2 aPosition [[attribute(0)]]; }; -vertex main0_out main0(main0_in in [[stage_in]]) +vertex main0_out main0(main0_in in [[stage_in]], constant float4& uDestRect [[buffer(0)]], constant float2& uFramebufferSize [[buffer(1)]]) { main0_out out = {}; + float2 position = mix(uDestRect.xy, uDestRect.zw, float2(in.aPosition)) / uFramebufferSize; float2 texCoord = float2(in.aPosition); - texCoord.y = 1.0 - texCoord.y; out.vTexCoord = texCoord; - out.gl_Position = float4(mix(float2(-1.0), float2(1.0), float2(in.aPosition)), 0.0, 1.0); + out.gl_Position = float4(mix(float2(-1.0), float2(1.0), position), 0.0, 1.0); return out; } diff --git a/resources/shaders/metal/d3d11/bin.cs.metal b/resources/shaders/metal/d3d11/bin.cs.metal new file mode 100644 index 00000000..7008d475 --- /dev/null +++ b/resources/shaders/metal/d3d11/bin.cs.metal @@ -0,0 +1,284 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct bIndirectDrawParams +{ + uint iIndirectDrawParams[1]; +}; + +struct bTiles +{ + uint iTiles[1]; +}; + +struct bFills +{ + uint iFills[1]; +}; + +struct bBackdrops +{ + uint iBackdrops[1]; +}; + +struct bMicrolines +{ + uint4 iMicrolines[1]; +}; + +struct bMetadata +{ + int4 iMetadata[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +static inline __attribute__((always_inline)) +float4 unpackMicroline(thread const uint4& packedMicroline, thread uint& outPathIndex) +{ + outPathIndex = packedMicroline.w; + int4 signedMicroline = int4(packedMicroline); + return float4(float((signedMicroline.x << 16) >> 16), float(signedMicroline.x >> 16), float((signedMicroline.y << 16) >> 16), float(signedMicroline.y >> 16)) + (float4(float(signedMicroline.z & 255), float((signedMicroline.z >> 8) & 255), float((signedMicroline.z >> 16) & 255), float((signedMicroline.z >> 24) & 255)) / float4(256.0)); +} + +static inline __attribute__((always_inline)) +uint computeTileIndexNoCheck(thread const int2& tileCoords, thread const int4& pathTileRect, thread const uint& pathTileOffset) +{ + int2 offsetCoords = tileCoords - pathTileRect.xy; + return (pathTileOffset + uint(offsetCoords.x)) + uint(offsetCoords.y * (pathTileRect.z - pathTileRect.x)); +} + +static inline __attribute__((always_inline)) +bool4 computeTileOutcodes(thread const int2& tileCoords, thread const int4& pathTileRect) +{ + return bool4(tileCoords < pathTileRect.xy, tileCoords >= pathTileRect.zw); +} + +static inline __attribute__((always_inline)) +bool computeTileIndex(thread const int2& tileCoords, thread const int4& pathTileRect, thread const uint& pathTileOffset, thread uint& outTileIndex) +{ + int2 param = tileCoords; + int4 param_1 = pathTileRect; + uint param_2 = pathTileOffset; + outTileIndex = computeTileIndexNoCheck(param, param_1, param_2); + int2 param_3 = tileCoords; + int4 param_4 = pathTileRect; + return !any(computeTileOutcodes(param_3, param_4)); +} + +static inline __attribute__((always_inline)) +void addFill(thread const float4& lineSegment, thread const int2& tileCoords, thread const int4& pathTileRect, thread const uint& pathTileOffset, device bIndirectDrawParams& v_155, device bTiles& v_165, thread int uMaxFillCount, device bFills& v_186) +{ + int2 param = tileCoords; + int4 param_1 = pathTileRect; + uint param_2 = pathTileOffset; + uint param_3; + bool _124 = computeTileIndex(param, param_1, param_2, param_3); + uint tileIndex = param_3; + if (!_124) + { + return; + } + uint4 scaledLocalLine = uint4((lineSegment - float4(tileCoords.xyxy * int4(16))) * float4(256.0)); + if (scaledLocalLine.x == scaledLocalLine.z) + { + return; + } + uint _160 = atomic_fetch_add_explicit((device atomic_uint*)&v_155.iIndirectDrawParams[1], 1u, memory_order_relaxed); + uint fillIndex = _160; + uint _174 = atomic_exchange_explicit((device atomic_uint*)&v_165.iTiles[(tileIndex * 4u) + 1u], uint(int(fillIndex)), memory_order_relaxed); + uint fillLink = _174; + if (fillIndex < uint(uMaxFillCount)) + { + v_186.iFills[(fillIndex * 3u) + 0u] = scaledLocalLine.x | (scaledLocalLine.y << uint(16)); + v_186.iFills[(fillIndex * 3u) + 1u] = scaledLocalLine.z | (scaledLocalLine.w << uint(16)); + v_186.iFills[(fillIndex * 3u) + 2u] = fillLink; + } +} + +static inline __attribute__((always_inline)) +void adjustBackdrop(thread const int& backdropDelta, thread const int2& tileCoords, thread const int4& pathTileRect, thread const uint& pathTileOffset, thread const uint& pathBackdropOffset, device bTiles& v_165, device bBackdrops& v_251) +{ + int2 param = tileCoords; + int4 param_1 = pathTileRect; + bool4 outcodes = computeTileOutcodes(param, param_1); + if (any(outcodes)) + { + bool _230 = (!outcodes.x) && outcodes.y; + bool _236; + if (_230) + { + _236 = !outcodes.z; + } + else + { + _236 = _230; + } + if (_236) + { + uint backdropIndex = pathBackdropOffset + uint(tileCoords.x - pathTileRect.x); + uint _257 = atomic_fetch_add_explicit((device atomic_uint*)&v_251.iBackdrops[backdropIndex * 3u], uint(backdropDelta), memory_order_relaxed); + } + } + else + { + int2 param_2 = tileCoords; + int4 param_3 = pathTileRect; + uint param_4 = pathTileOffset; + uint tileIndex = computeTileIndexNoCheck(param_2, param_3, param_4); + uint _275 = atomic_fetch_add_explicit((device atomic_uint*)&v_165.iTiles[(tileIndex * 4u) + 2u], uint(backdropDelta) << uint(24), memory_order_relaxed); + } +} + +kernel void main0(constant int& uMaxFillCount [[buffer(2)]], constant int& uMicrolineCount [[buffer(5)]], device bIndirectDrawParams& v_155 [[buffer(0)]], device bTiles& v_165 [[buffer(1)]], device bFills& v_186 [[buffer(3)]], device bBackdrops& v_251 [[buffer(4)]], const device bMicrolines& _346 [[buffer(6)]], const device bMetadata& _360 [[buffer(7)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint segmentIndex = gl_GlobalInvocationID.x; + if (segmentIndex >= uint(uMicrolineCount)) + { + return; + } + uint4 param = _346.iMicrolines[segmentIndex]; + uint param_1; + float4 _354 = unpackMicroline(param, param_1); + uint pathIndex = param_1; + float4 lineSegment = _354; + int4 pathTileRect = _360.iMetadata[(pathIndex * 3u) + 0u]; + uint pathTileOffset = uint(_360.iMetadata[(pathIndex * 3u) + 1u].x); + uint pathBackdropOffset = uint(_360.iMetadata[(pathIndex * 3u) + 2u].x); + int2 tileSize = int2(16); + int4 tileLineSegment = int4(floor(lineSegment / float4(tileSize.xyxy))); + int2 fromTileCoords = tileLineSegment.xy; + int2 toTileCoords = tileLineSegment.zw; + float2 vector = lineSegment.zw - lineSegment.xy; + float2 vectorIsNegative = float2((vector.x < 0.0) ? (-1.0) : 0.0, (vector.y < 0.0) ? (-1.0) : 0.0); + int2 tileStep = int2((vector.x < 0.0) ? (-1) : 1, (vector.y < 0.0) ? (-1) : 1); + float2 firstTileCrossing = float2((fromTileCoords + int2(int(vector.x >= 0.0), int(vector.y >= 0.0))) * tileSize); + float2 tMax = (firstTileCrossing - lineSegment.xy) / vector; + float2 tDelta = abs(float2(tileSize) / vector); + float2 currentPosition = lineSegment.xy; + int2 tileCoords = fromTileCoords; + int lastStepDirection = 0; + uint iteration = 0u; + int nextStepDirection; + float _501; + float4 auxiliarySegment; + while (iteration < 1024u) + { + if (tMax.x < tMax.y) + { + nextStepDirection = 1; + } + else + { + if (tMax.x > tMax.y) + { + nextStepDirection = 2; + } + else + { + if (float(tileStep.x) > 0.0) + { + nextStepDirection = 1; + } + else + { + nextStepDirection = 2; + } + } + } + if (nextStepDirection == 1) + { + _501 = tMax.x; + } + else + { + _501 = tMax.y; + } + float nextT = fast::min(_501, 1.0); + if (all(tileCoords == toTileCoords)) + { + nextStepDirection = 0; + } + float2 nextPosition = mix(lineSegment.xy, lineSegment.zw, float2(nextT)); + float4 clippedLineSegment = float4(currentPosition, nextPosition); + float4 param_2 = clippedLineSegment; + int2 param_3 = tileCoords; + int4 param_4 = pathTileRect; + uint param_5 = pathTileOffset; + addFill(param_2, param_3, param_4, param_5, v_155, v_165, uMaxFillCount, v_186); + bool haveAuxiliarySegment = false; + if ((tileStep.y < 0) && (nextStepDirection == 2)) + { + auxiliarySegment = float4(clippedLineSegment.zw, float2(tileCoords * tileSize)); + haveAuxiliarySegment = true; + } + else + { + if ((tileStep.y > 0) && (lastStepDirection == 2)) + { + auxiliarySegment = float4(float2(tileCoords * tileSize), clippedLineSegment.xy); + haveAuxiliarySegment = true; + } + } + if (haveAuxiliarySegment) + { + float4 param_6 = auxiliarySegment; + int2 param_7 = tileCoords; + int4 param_8 = pathTileRect; + uint param_9 = pathTileOffset; + addFill(param_6, param_7, param_8, param_9, v_155, v_165, uMaxFillCount, v_186); + } + if ((tileStep.x < 0) && (lastStepDirection == 1)) + { + int param_10 = 1; + int2 param_11 = tileCoords; + int4 param_12 = pathTileRect; + uint param_13 = pathTileOffset; + uint param_14 = pathBackdropOffset; + adjustBackdrop(param_10, param_11, param_12, param_13, param_14, v_165, v_251); + } + else + { + if ((tileStep.x > 0) && (nextStepDirection == 1)) + { + int param_15 = -1; + int2 param_16 = tileCoords; + int4 param_17 = pathTileRect; + uint param_18 = pathTileOffset; + uint param_19 = pathBackdropOffset; + adjustBackdrop(param_15, param_16, param_17, param_18, param_19, v_165, v_251); + } + } + if (nextStepDirection == 1) + { + tMax.x += tDelta.x; + tileCoords.x += tileStep.x; + } + else + { + if (nextStepDirection == 2) + { + tMax.y += tDelta.y; + tileCoords.y += tileStep.y; + } + else + { + if (nextStepDirection == 0) + { + break; + } + } + } + currentPosition = nextPosition; + lastStepDirection = nextStepDirection; + iteration++; + } +} + diff --git a/resources/shaders/metal/d3d11/bound.cs.metal b/resources/shaders/metal/d3d11/bound.cs.metal new file mode 100644 index 00000000..b1a4bd01 --- /dev/null +++ b/resources/shaders/metal/d3d11/bound.cs.metal @@ -0,0 +1,77 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#include +#include + +using namespace metal; + +struct bTilePathInfo +{ + uint4 iTilePathInfo[1]; +}; + +struct bTiles +{ + uint iTiles[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +kernel void main0(constant int& uTileCount [[buffer(0)]], constant int& uPathCount [[buffer(1)]], const device bTilePathInfo& _64 [[buffer(2)]], device bTiles& _148 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint tileIndex = gl_GlobalInvocationID.x; + if (tileIndex >= uint(uTileCount)) + { + return; + } + uint lowPathIndex = 0u; + uint highPathIndex = uint(uPathCount); + int iteration = 0; + for (;;) + { + bool _42 = iteration < 1024; + bool _50; + if (_42) + { + _50 = (lowPathIndex + 1u) < highPathIndex; + } + else + { + _50 = _42; + } + if (_50) + { + uint midPathIndex = lowPathIndex + ((highPathIndex - lowPathIndex) / 2u); + uint midTileIndex = _64.iTilePathInfo[midPathIndex].z; + if (tileIndex < midTileIndex) + { + highPathIndex = midPathIndex; + } + else + { + lowPathIndex = midPathIndex; + if (tileIndex == midTileIndex) + { + break; + } + } + iteration++; + continue; + } + else + { + break; + } + } + uint pathIndex = lowPathIndex; + uint4 pathInfo = _64.iTilePathInfo[pathIndex]; + int2 packedTileRect = int2(pathInfo.xy); + int4 tileRect = int4((packedTileRect.x << 16) >> 16, packedTileRect.x >> 16, (packedTileRect.y << 16) >> 16, packedTileRect.y >> 16); + uint tileOffset = tileIndex - pathInfo.z; + uint tileWidth = uint(tileRect.z - tileRect.x); + int2 tileCoords = tileRect.xy + int2(int(tileOffset % tileWidth), int(tileOffset / tileWidth)); + _148.iTiles[(tileIndex * 4u) + 0u] = 4294967295u; + _148.iTiles[(tileIndex * 4u) + 1u] = 4294967295u; + _148.iTiles[(tileIndex * 4u) + 2u] = 16777215u; + _148.iTiles[(tileIndex * 4u) + 3u] = pathInfo.w; +} + diff --git a/resources/shaders/metal/d3d11/dice.cs.metal b/resources/shaders/metal/d3d11/dice.cs.metal new file mode 100644 index 00000000..54913e55 --- /dev/null +++ b/resources/shaders/metal/d3d11/dice.cs.metal @@ -0,0 +1,205 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct bMicrolines +{ + uint4 iMicrolines[1]; +}; + +struct bPoints +{ + float2 iPoints[1]; +}; + +struct bDiceMetadata +{ + uint4 iDiceMetadata[1]; +}; + +struct bInputIndices +{ + uint2 iInputIndices[1]; +}; + +struct bComputeIndirectParams +{ + uint iComputeIndirectParams[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +static inline __attribute__((always_inline)) +float2 getPoint(thread const uint& pointIndex, thread float2x2 uTransform, const device bPoints& v_194, thread float2 uTranslation) +{ + return (uTransform * v_194.iPoints[pointIndex]) + uTranslation; +} + +static inline __attribute__((always_inline)) +float2 sampleCurve(thread const float4& baseline, thread const float4& ctrl, thread const float& t) +{ + float2 p0 = baseline.xy; + float2 p1 = ctrl.xy; + float2 p2 = ctrl.zw; + float2 p3 = baseline.zw; + float2 p0p1 = mix(p0, p1, float2(t)); + float2 p1p2 = mix(p1, p2, float2(t)); + float2 p2p3 = mix(p2, p3, float2(t)); + float2 p0p1p2 = mix(p0p1, p1p2, float2(t)); + float2 p1p2p3 = mix(p1p2, p2p3, float2(t)); + return mix(p0p1p2, p1p2p3, float2(t)); +} + +static inline __attribute__((always_inline)) +float2 sampleLine(thread const float4& line, thread const float& t) +{ + return mix(line.xy, line.zw, float2(t)); +} + +static inline __attribute__((always_inline)) +void emitMicroline(thread const float4& microlineSegment, thread const uint& pathIndex, thread const uint& outputMicrolineIndex, thread int uMaxMicrolineCount, device bMicrolines& v_76) +{ + if (outputMicrolineIndex >= uint(uMaxMicrolineCount)) + { + return; + } + int4 microlineSubpixels = int4(round(fast::clamp(microlineSegment, float4(-32768.0), float4(32767.0)) * 256.0)); + int4 microlinePixels = int4(floor(float4(microlineSubpixels) / float4(256.0))); + int4 microlineFractPixels = microlineSubpixels - (microlinePixels * int4(256)); + v_76.iMicrolines[outputMicrolineIndex] = uint4((uint(microlinePixels.x) & 65535u) | (uint(microlinePixels.y) << uint(16)), (uint(microlinePixels.z) & 65535u) | (uint(microlinePixels.w) << uint(16)), ((uint(microlineFractPixels.x) | (uint(microlineFractPixels.y) << uint(8))) | (uint(microlineFractPixels.z) << uint(16))) | (uint(microlineFractPixels.w) << uint(24)), pathIndex); +} + +kernel void main0(constant int& uMaxMicrolineCount [[buffer(0)]], constant int& uLastBatchSegmentIndex [[buffer(5)]], constant int& uPathCount [[buffer(6)]], constant float2x2& uTransform [[buffer(2)]], constant float2& uTranslation [[buffer(4)]], device bMicrolines& v_76 [[buffer(1)]], const device bPoints& v_194 [[buffer(3)]], const device bDiceMetadata& _253 [[buffer(7)]], const device bInputIndices& _300 [[buffer(8)]], device bComputeIndirectParams& _439 [[buffer(9)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint batchSegmentIndex = gl_GlobalInvocationID.x; + if (batchSegmentIndex >= uint(uLastBatchSegmentIndex)) + { + return; + } + uint lowPathIndex = 0u; + uint highPathIndex = uint(uPathCount); + int iteration = 0; + for (;;) + { + bool _234 = iteration < 1024; + bool _241; + if (_234) + { + _241 = (lowPathIndex + 1u) < highPathIndex; + } + else + { + _241 = _234; + } + if (_241) + { + uint midPathIndex = lowPathIndex + ((highPathIndex - lowPathIndex) / 2u); + uint midBatchSegmentIndex = _253.iDiceMetadata[midPathIndex].z; + if (batchSegmentIndex < midBatchSegmentIndex) + { + highPathIndex = midPathIndex; + } + else + { + lowPathIndex = midPathIndex; + if (batchSegmentIndex == midBatchSegmentIndex) + { + break; + } + } + iteration++; + continue; + } + else + { + break; + } + } + uint batchPathIndex = lowPathIndex; + uint4 diceMetadata = _253.iDiceMetadata[batchPathIndex]; + uint firstGlobalSegmentIndexInPath = diceMetadata.y; + uint firstBatchSegmentIndexInPath = diceMetadata.z; + uint globalSegmentIndex = (batchSegmentIndex - firstBatchSegmentIndexInPath) + firstGlobalSegmentIndexInPath; + uint2 inputIndices = _300.iInputIndices[globalSegmentIndex]; + uint fromPointIndex = inputIndices.x; + uint flagsPathIndex = inputIndices.y; + uint toPointIndex = fromPointIndex; + if ((flagsPathIndex & 1073741824u) != 0u) + { + toPointIndex += 3u; + } + else + { + if ((flagsPathIndex & 2147483648u) != 0u) + { + toPointIndex += 2u; + } + else + { + toPointIndex++; + } + } + uint param = fromPointIndex; + uint param_1 = toPointIndex; + float4 baseline = float4(getPoint(param, uTransform, v_194, uTranslation), getPoint(param_1, uTransform, v_194, uTranslation)); + float4 ctrl = float4(0.0); + bool isCurve = (flagsPathIndex & 3221225472u) != 0u; + float segmentCountF; + if (isCurve) + { + uint param_2 = fromPointIndex + 1u; + float2 ctrl0 = getPoint(param_2, uTransform, v_194, uTranslation); + if ((flagsPathIndex & 2147483648u) != 0u) + { + float2 ctrl0_2 = ctrl0 * float2(2.0); + ctrl = (baseline + (ctrl0 * float2(2.0)).xyxy) * float4(0.3333333432674407958984375); + } + else + { + uint param_3 = fromPointIndex + 2u; + ctrl = float4(ctrl0, getPoint(param_3, uTransform, v_194, uTranslation)); + } + float2 bound = float2(6.0) * fast::max(abs((ctrl.zw - (ctrl.xy * 2.0)) + baseline.xy), abs((baseline.zw - (ctrl.zw * 2.0)) + ctrl.xy)); + segmentCountF = sqrt(length(bound) / 2.0); + } + else + { + segmentCountF = length(baseline.zw - baseline.xy) / 16.0; + } + int segmentCount = max(int(ceil(segmentCountF)), 1); + uint _444 = atomic_fetch_add_explicit((device atomic_uint*)&_439.iComputeIndirectParams[3], uint(segmentCount), memory_order_relaxed); + uint firstOutputMicrolineIndex = _444; + float prevT = 0.0; + float2 prevPoint = baseline.xy; + float2 nextPoint; + for (int segmentIndex = 0; segmentIndex < segmentCount; segmentIndex++) + { + float nextT = float(segmentIndex + 1) / float(segmentCount); + if (isCurve) + { + float4 param_4 = baseline; + float4 param_5 = ctrl; + float param_6 = nextT; + nextPoint = sampleCurve(param_4, param_5, param_6); + } + else + { + float4 param_7 = baseline; + float param_8 = nextT; + nextPoint = sampleLine(param_7, param_8); + } + float4 param_9 = float4(prevPoint, nextPoint); + uint param_10 = batchPathIndex; + uint param_11 = firstOutputMicrolineIndex + uint(segmentIndex); + emitMicroline(param_9, param_10, param_11, uMaxMicrolineCount, v_76); + prevT = nextT; + prevPoint = nextPoint; + } +} + diff --git a/resources/shaders/metal/d3d11/fill.cs.metal b/resources/shaders/metal/d3d11/fill.cs.metal new file mode 100644 index 00000000..c3f47df7 --- /dev/null +++ b/resources/shaders/metal/d3d11/fill.cs.metal @@ -0,0 +1,100 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct bFills +{ + uint iFills[1]; +}; + +struct bAlphaTiles +{ + uint iAlphaTiles[1]; +}; + +struct bTiles +{ + uint iTiles[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(16u, 4u, 1u); + +static inline __attribute__((always_inline)) +float4 computeCoverage(thread const float2& from, thread const float2& to, thread const texture2d areaLUT, thread const sampler areaLUTSmplr) +{ + float2 left = select(to, from, bool2(from.x < to.x)); + float2 right = select(from, to, bool2(from.x < to.x)); + float2 window = fast::clamp(float2(from.x, to.x), float2(-0.5), float2(0.5)); + float offset = mix(window.x, window.y, 0.5) - left.x; + float t = offset / (right.x - left.x); + float y = mix(left.y, right.y, t); + float d = (right.y - left.y) / (right.x - left.x); + float dX = window.x - window.y; + return areaLUT.sample(areaLUTSmplr, (float2(y + 8.0, abs(d * dX)) / float2(16.0)), level(0.0)) * dX; +} + +static inline __attribute__((always_inline)) +float4 accumulateCoverageForFillList(thread int& fillIndex, thread const int2& tileSubCoord, const device bFills& v_148, thread texture2d uAreaLUT, thread const sampler uAreaLUTSmplr) +{ + float2 tileFragCoord = float2(tileSubCoord) + float2(0.5); + float4 coverages = float4(0.0); + int iteration = 0; + do + { + uint fillFrom = v_148.iFills[(fillIndex * 3) + 0]; + uint fillTo = v_148.iFills[(fillIndex * 3) + 1]; + float4 lineSegment = float4(float(fillFrom & 65535u), float(fillFrom >> uint(16)), float(fillTo & 65535u), float(fillTo >> uint(16))) / float4(256.0); + lineSegment -= tileFragCoord.xyxy; + float2 param = lineSegment.xy; + float2 param_1 = lineSegment.zw; + coverages += computeCoverage(param, param_1, uAreaLUT, uAreaLUTSmplr); + fillIndex = int(v_148.iFills[(fillIndex * 3) + 2]); + iteration++; + } while ((fillIndex >= 0) && (iteration < 1024)); + return coverages; +} + +static inline __attribute__((always_inline)) +int2 computeTileCoord(thread const uint& alphaTileIndex, thread uint3& gl_LocalInvocationID) +{ + uint x = alphaTileIndex & 255u; + uint y = (alphaTileIndex >> 8u) & (255u + (((alphaTileIndex >> 16u) & 255u) << 8u)); + return (int2(16, 4) * int2(int(x), int(y))) + int2(gl_LocalInvocationID.xy); +} + +kernel void main0(constant int2& uAlphaTileRange [[buffer(1)]], const device bFills& v_148 [[buffer(0)]], const device bAlphaTiles& _284 [[buffer(2)]], device bTiles& _294 [[buffer(3)]], texture2d uAreaLUT [[texture(0)]], texture2d uDest [[texture(1)]], sampler uAreaLUTSmplr [[sampler(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) +{ + int2 tileSubCoord = int2(gl_LocalInvocationID.xy) * int2(1, 4); + uint batchAlphaTileIndex = gl_WorkGroupID.x | (gl_WorkGroupID.y << uint(15)); + uint alphaTileIndex = batchAlphaTileIndex + uint(uAlphaTileRange.x); + if (alphaTileIndex >= uint(uAlphaTileRange.y)) + { + return; + } + uint tileIndex = _284.iAlphaTiles[(batchAlphaTileIndex * 2u) + 0u]; + if ((int(_294.iTiles[(tileIndex * 4u) + 2u] << uint(8)) >> 8) < 0) + { + return; + } + int fillIndex = int(_294.iTiles[(tileIndex * 4u) + 1u]); + int backdrop = int(_294.iTiles[(tileIndex * 4u) + 3u]) >> 24; + float4 coverages = float4(float(backdrop)); + int param = fillIndex; + int2 param_1 = tileSubCoord; + float4 _334 = accumulateCoverageForFillList(param, param_1, v_148, uAreaLUT, uAreaLUTSmplr); + coverages += _334; + coverages = fast::clamp(abs(coverages), float4(0.0), float4(1.0)); + int clipTileIndex = int(_284.iAlphaTiles[(batchAlphaTileIndex * 2u) + 1u]); + if (clipTileIndex >= 0) + { + uint param_2 = uint(clipTileIndex); + coverages = fast::min(coverages, uDest.read(uint2(computeTileCoord(param_2, gl_LocalInvocationID)))); + } + uint param_3 = alphaTileIndex; + uDest.write(coverages, uint2(computeTileCoord(param_3, gl_LocalInvocationID))); +} + diff --git a/resources/shaders/metal/d3d11/propagate.cs.metal b/resources/shaders/metal/d3d11/propagate.cs.metal new file mode 100644 index 00000000..9cb008c6 --- /dev/null +++ b/resources/shaders/metal/d3d11/propagate.cs.metal @@ -0,0 +1,184 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#pragma clang diagnostic ignored "-Wmissing-prototypes" +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct bBackdrops +{ + int iBackdrops[1]; +}; + +struct bDrawMetadata +{ + uint4 iDrawMetadata[1]; +}; + +struct bClipMetadata +{ + uint4 iClipMetadata[1]; +}; + +struct bDrawTiles +{ + uint iDrawTiles[1]; +}; + +struct bClipTiles +{ + uint iClipTiles[1]; +}; + +struct bIndirectDrawParams +{ + uint iIndirectDrawParams[1]; +}; + +struct bAlphaTiles +{ + uint iAlphaTiles[1]; +}; + +struct bZBuffer +{ + int iZBuffer[1]; +}; + +struct bFirstTileMap +{ + int iFirstTileMap[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +static inline __attribute__((always_inline)) +uint calculateTileIndex(thread const uint& bufferOffset, thread const uint4& tileRect, thread const uint2& tileCoord) +{ + return (bufferOffset + (tileCoord.y * (tileRect.z - tileRect.x))) + tileCoord.x; +} + +kernel void main0(constant int& uColumnCount [[buffer(0)]], constant int& uFirstAlphaTileIndex [[buffer(8)]], constant int2& uFramebufferTileSize [[buffer(9)]], const device bBackdrops& _59 [[buffer(1)]], const device bDrawMetadata& _85 [[buffer(2)]], const device bClipMetadata& _126 [[buffer(3)]], device bDrawTiles& _175 [[buffer(4)]], device bClipTiles& _252 [[buffer(5)]], device bIndirectDrawParams& _303 [[buffer(6)]], device bAlphaTiles& _310 [[buffer(7)]], device bZBuffer& _381 [[buffer(10)]], device bFirstTileMap& _398 [[buffer(11)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint columnIndex = gl_GlobalInvocationID.x; + if (int(columnIndex) >= uColumnCount) + { + return; + } + int currentBackdrop = _59.iBackdrops[(columnIndex * 3u) + 0u]; + int tileX = _59.iBackdrops[(columnIndex * 3u) + 1u]; + uint drawPathIndex = uint(_59.iBackdrops[(columnIndex * 3u) + 2u]); + uint4 drawTileRect = _85.iDrawMetadata[(drawPathIndex * 3u) + 0u]; + uint4 drawOffsets = _85.iDrawMetadata[(drawPathIndex * 3u) + 1u]; + uint2 drawTileSize = drawTileRect.zw - drawTileRect.xy; + uint drawTileBufferOffset = drawOffsets.x; + bool zWrite = drawOffsets.z != 0u; + int clipPathIndex = int(drawOffsets.w); + uint4 clipTileRect = uint4(0u); + uint4 clipOffsets = uint4(0u); + if (clipPathIndex >= 0) + { + clipTileRect = _126.iClipMetadata[(clipPathIndex * 2) + 0]; + clipOffsets = _126.iClipMetadata[(clipPathIndex * 2) + 1]; + } + uint clipTileBufferOffset = clipOffsets.x; + uint clipBackdropOffset = clipOffsets.y; + for (uint tileY = 0u; tileY < drawTileSize.y; tileY++) + { + uint2 drawTileCoord = uint2(uint(tileX), tileY); + uint param = drawTileBufferOffset; + uint4 param_1 = drawTileRect; + uint2 param_2 = drawTileCoord; + uint drawTileIndex = calculateTileIndex(param, param_1, param_2); + int drawAlphaTileIndex = -1; + int clipAlphaTileIndex = -1; + int drawFirstFillIndex = int(_175.iDrawTiles[(drawTileIndex * 4u) + 1u]); + int drawBackdropDelta = int(_175.iDrawTiles[(drawTileIndex * 4u) + 2u]) >> 24; + uint drawTileWord = _175.iDrawTiles[(drawTileIndex * 4u) + 3u] & 16777215u; + int drawTileBackdrop = currentBackdrop; + bool haveDrawAlphaMask = drawFirstFillIndex >= 0; + bool needNewAlphaTile = haveDrawAlphaMask; + if (clipPathIndex >= 0) + { + uint2 tileCoord = drawTileCoord + drawTileRect.xy; + if (all(bool4(tileCoord >= clipTileRect.xy, tileCoord < clipTileRect.zw))) + { + uint2 clipTileCoord = tileCoord - clipTileRect.xy; + uint param_3 = clipTileBufferOffset; + uint4 param_4 = clipTileRect; + uint2 param_5 = clipTileCoord; + uint clipTileIndex = calculateTileIndex(param_3, param_4, param_5); + int thisClipAlphaTileIndex = int(_252.iClipTiles[(clipTileIndex * 4u) + 2u] << uint(8)) >> 8; + uint clipTileWord = _252.iClipTiles[(clipTileIndex * 4u) + 3u]; + int clipTileBackdrop = int(clipTileWord) >> 24; + if (thisClipAlphaTileIndex >= 0) + { + if (haveDrawAlphaMask) + { + clipAlphaTileIndex = thisClipAlphaTileIndex; + needNewAlphaTile = true; + } + else + { + if (drawTileBackdrop != 0) + { + drawAlphaTileIndex = thisClipAlphaTileIndex; + clipAlphaTileIndex = -1; + needNewAlphaTile = false; + } + else + { + drawAlphaTileIndex = -1; + clipAlphaTileIndex = -1; + needNewAlphaTile = false; + } + } + } + else + { + if (clipTileBackdrop == 0) + { + drawTileBackdrop = 0; + needNewAlphaTile = false; + } + else + { + needNewAlphaTile = true; + } + } + } + else + { + drawTileBackdrop = 0; + needNewAlphaTile = false; + } + } + if (needNewAlphaTile) + { + uint _306 = atomic_fetch_add_explicit((device atomic_uint*)&_303.iIndirectDrawParams[4], 1u, memory_order_relaxed); + uint drawBatchAlphaTileIndex = _306; + _310.iAlphaTiles[(drawBatchAlphaTileIndex * 2u) + 0u] = drawTileIndex; + _310.iAlphaTiles[(drawBatchAlphaTileIndex * 2u) + 1u] = uint(clipAlphaTileIndex); + drawAlphaTileIndex = int(drawBatchAlphaTileIndex) + uFirstAlphaTileIndex; + } + _175.iDrawTiles[(drawTileIndex * 4u) + 2u] = (uint(drawAlphaTileIndex) & 16777215u) | (uint(drawBackdropDelta) << uint(24)); + _175.iDrawTiles[(drawTileIndex * 4u) + 3u] = drawTileWord | (uint(drawTileBackdrop) << uint(24)); + int2 tileCoord_1 = int2(tileX, int(tileY)) + int2(drawTileRect.xy); + int tileMapIndex = (tileCoord_1.y * uFramebufferTileSize.x) + tileCoord_1.x; + if ((zWrite && (drawTileBackdrop != 0)) && (drawAlphaTileIndex < 0)) + { + int _386 = atomic_fetch_max_explicit((device atomic_int*)&_381.iZBuffer[tileMapIndex], int(drawTileIndex), memory_order_relaxed); + } + if ((drawTileBackdrop != 0) || (drawAlphaTileIndex >= 0)) + { + int _403 = atomic_exchange_explicit((device atomic_int*)&_398.iFirstTileMap[tileMapIndex], int(drawTileIndex), memory_order_relaxed); + int nextTileIndex = _403; + _175.iDrawTiles[(drawTileIndex * 4u) + 0u] = uint(nextTileIndex); + } + currentBackdrop += drawBackdropDelta; + } +} + diff --git a/resources/shaders/metal/d3d11/sort.cs.metal b/resources/shaders/metal/d3d11/sort.cs.metal new file mode 100644 index 00000000..ae01b505 --- /dev/null +++ b/resources/shaders/metal/d3d11/sort.cs.metal @@ -0,0 +1,94 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct bFirstTileMap +{ + int iFirstTileMap[1]; +}; + +struct bTiles +{ + uint iTiles[1]; +}; + +struct bZBuffer +{ + int iZBuffer[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); + +static inline __attribute__((always_inline)) +int getFirst(thread const uint& globalTileIndex, device bFirstTileMap& v_26) +{ + return v_26.iFirstTileMap[globalTileIndex]; +} + +static inline __attribute__((always_inline)) +int getNextTile(thread const int& tileIndex, device bTiles& v_37) +{ + return int(v_37.iTiles[(tileIndex * 4) + 0]); +} + +static inline __attribute__((always_inline)) +void setNextTile(thread const int& tileIndex, thread const int& newNextTileIndex, device bTiles& v_37) +{ + v_37.iTiles[(tileIndex * 4) + 0] = uint(newNextTileIndex); +} + +kernel void main0(constant int& uTileCount [[buffer(2)]], device bFirstTileMap& v_26 [[buffer(0)]], device bTiles& v_37 [[buffer(1)]], const device bZBuffer& _76 [[buffer(3)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + uint globalTileIndex = gl_GlobalInvocationID.x; + if (globalTileIndex >= uint(uTileCount)) + { + return; + } + int zValue = _76.iZBuffer[globalTileIndex]; + uint param = globalTileIndex; + int unsortedFirstTileIndex = getFirst(param, v_26); + int sortedFirstTileIndex = -1; + while (unsortedFirstTileIndex >= 0) + { + int currentTileIndex = unsortedFirstTileIndex; + int param_1 = currentTileIndex; + unsortedFirstTileIndex = getNextTile(param_1, v_37); + if (currentTileIndex >= zValue) + { + int prevTrialTileIndex = -1; + int trialTileIndex = sortedFirstTileIndex; + while (true) + { + if ((trialTileIndex < 0) || (currentTileIndex < trialTileIndex)) + { + if (prevTrialTileIndex < 0) + { + int param_2 = currentTileIndex; + int param_3 = sortedFirstTileIndex; + setNextTile(param_2, param_3, v_37); + sortedFirstTileIndex = currentTileIndex; + } + else + { + int param_4 = currentTileIndex; + int param_5 = trialTileIndex; + setNextTile(param_4, param_5, v_37); + int param_6 = prevTrialTileIndex; + int param_7 = currentTileIndex; + setNextTile(param_6, param_7, v_37); + } + break; + } + prevTrialTileIndex = trialTileIndex; + int param_8 = trialTileIndex; + trialTileIndex = getNextTile(param_8, v_37); + } + } + } + v_26.iFirstTileMap[globalTileIndex] = sortedFirstTileIndex; +} + diff --git a/resources/shaders/metal/d3d11/tile.cs.metal b/resources/shaders/metal/d3d11/tile.cs.metal new file mode 100644 index 00000000..27f98375 --- /dev/null +++ b/resources/shaders/metal/d3d11/tile.cs.metal @@ -0,0 +1,737 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct bFirstTileMap +{ + int iFirstTileMap[1]; +}; + +struct bTiles +{ + uint iTiles[1]; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(16u, 4u, 1u); + +constant float3 _1082 = {}; + +// Implementation of the GLSL mod() function, which is slightly different than Metal fmod() +template +inline Tx mod(Tx x, Ty y) +{ + return x - y * floor(x / y); +} + +static inline __attribute__((always_inline)) +int2 toImageCoords(thread const int2& coords, thread float2 uFramebufferSize) +{ + return int2(coords.x, int(uFramebufferSize.y - float(coords.y))); +} + +static inline __attribute__((always_inline)) +float4 fetchUnscaled(thread const texture2d srcTexture, thread const sampler srcTextureSmplr, thread const float2& scale, thread const float2& originCoord, thread const int& entry) +{ + return srcTexture.sample(srcTextureSmplr, (((originCoord + float2(0.5)) + float2(float(entry), 0.0)) * scale), level(0.0)); +} + +static inline __attribute__((always_inline)) +void computeTileVaryings(thread const float2& position, thread const int& colorEntry, thread const texture2d textureMetadata, thread const sampler textureMetadataSmplr, thread const int2& textureMetadataSize, thread float2& outColorTexCoord0, thread float4& outBaseColor, thread float4& outFilterParams0, thread float4& outFilterParams1, thread float4& outFilterParams2, thread int& outCtrl) +{ + float2 metadataScale = float2(1.0) / float2(textureMetadataSize); + float2 metadataEntryCoord = float2(float((colorEntry % 128) * 8), float(colorEntry / 128)); + float2 param = metadataScale; + float2 param_1 = metadataEntryCoord; + int param_2 = 0; + float4 colorTexMatrix0 = fetchUnscaled(textureMetadata, textureMetadataSmplr, param, param_1, param_2); + float2 param_3 = metadataScale; + float2 param_4 = metadataEntryCoord; + int param_5 = 1; + float4 colorTexOffsets = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_3, param_4, param_5); + float2 param_6 = metadataScale; + float2 param_7 = metadataEntryCoord; + int param_8 = 2; + float4 baseColor = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_6, param_7, param_8); + float2 param_9 = metadataScale; + float2 param_10 = metadataEntryCoord; + int param_11 = 3; + float4 filterParams0 = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_9, param_10, param_11); + float2 param_12 = metadataScale; + float2 param_13 = metadataEntryCoord; + int param_14 = 4; + float4 filterParams1 = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_12, param_13, param_14); + float2 param_15 = metadataScale; + float2 param_16 = metadataEntryCoord; + int param_17 = 5; + float4 filterParams2 = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_15, param_16, param_17); + float2 param_18 = metadataScale; + float2 param_19 = metadataEntryCoord; + int param_20 = 6; + float4 extra = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_18, param_19, param_20); + outColorTexCoord0 = (float2x2(float2(colorTexMatrix0.xy), float2(colorTexMatrix0.zw)) * position) + colorTexOffsets.xy; + outBaseColor = baseColor; + outFilterParams0 = filterParams0; + outFilterParams1 = filterParams1; + outFilterParams2 = filterParams2; + outCtrl = int(extra.x); +} + +static inline __attribute__((always_inline)) +float sampleMask(thread const float& maskAlpha, thread const texture2d maskTexture, thread const sampler maskTextureSmplr, thread const float2& maskTextureSize, thread const float3& maskTexCoord, thread const int& maskCtrl) +{ + if (maskCtrl == 0) + { + return maskAlpha; + } + int2 maskTexCoordI = int2(floor(maskTexCoord.xy)); + float4 texel = maskTexture.sample(maskTextureSmplr, ((float2(maskTexCoordI / int2(1, 4)) + float2(0.5)) / maskTextureSize), level(0.0)); + float coverage = texel[maskTexCoordI.y % 4] + maskTexCoord.z; + if ((maskCtrl & 1) != 0) + { + coverage = abs(coverage); + } + else + { + coverage = 1.0 - abs(1.0 - mod(coverage, 2.0)); + } + return fast::min(maskAlpha, coverage); +} + +static inline __attribute__((always_inline)) +float4 filterRadialGradient(thread const float2& colorTexCoord, thread const texture2d colorTexture, thread const sampler colorTextureSmplr, thread const float2& colorTextureSize, thread const float2& fragCoord, thread const float2& framebufferSize, thread const float4& filterParams0, thread const float4& filterParams1) +{ + float2 lineFrom = filterParams0.xy; + float2 lineVector = filterParams0.zw; + float2 radii = filterParams1.xy; + float2 uvOrigin = filterParams1.zw; + float2 dP = colorTexCoord - lineFrom; + float2 dC = lineVector; + float dR = radii.y - radii.x; + float a = dot(dC, dC) - (dR * dR); + float b = dot(dP, dC) + (radii.x * dR); + float c = dot(dP, dP) - (radii.x * radii.x); + float discrim = (b * b) - (a * c); + float4 color = float4(0.0); + if (abs(discrim) >= 9.9999997473787516355514526367188e-06) + { + float2 ts = float2((float2(1.0, -1.0) * sqrt(discrim)) + float2(b)) / float2(a); + if (ts.x > ts.y) + { + ts = ts.yx; + } + float _595; + if (ts.x >= 0.0) + { + _595 = ts.x; + } + else + { + _595 = ts.y; + } + float t = _595; + color = colorTexture.sample(colorTextureSmplr, (uvOrigin + float2(fast::clamp(t, 0.0, 1.0), 0.0)), level(0.0)); + } + return color; +} + +static inline __attribute__((always_inline)) +float4 filterBlur(thread const float2& colorTexCoord, thread const texture2d colorTexture, thread const sampler colorTextureSmplr, thread const float2& colorTextureSize, thread const float4& filterParams0, thread const float4& filterParams1) +{ + float2 srcOffsetScale = filterParams0.xy / colorTextureSize; + int support = int(filterParams0.z); + float3 gaussCoeff = filterParams1.xyz; + float gaussSum = gaussCoeff.x; + float4 color = colorTexture.sample(colorTextureSmplr, colorTexCoord, level(0.0)) * gaussCoeff.x; + float2 _640 = gaussCoeff.xy * gaussCoeff.yz; + gaussCoeff = float3(_640.x, _640.y, gaussCoeff.z); + for (int i = 1; i <= support; i += 2) + { + float gaussPartialSum = gaussCoeff.x; + float2 _660 = gaussCoeff.xy * gaussCoeff.yz; + gaussCoeff = float3(_660.x, _660.y, gaussCoeff.z); + gaussPartialSum += gaussCoeff.x; + float2 srcOffset = srcOffsetScale * (float(i) + (gaussCoeff.x / gaussPartialSum)); + color += ((colorTexture.sample(colorTextureSmplr, (colorTexCoord - srcOffset), level(0.0)) + colorTexture.sample(colorTextureSmplr, (colorTexCoord + srcOffset), level(0.0))) * gaussPartialSum); + gaussSum += (2.0 * gaussPartialSum); + float2 _700 = gaussCoeff.xy * gaussCoeff.yz; + gaussCoeff = float3(_700.x, _700.y, gaussCoeff.z); + } + return color / float4(gaussSum); +} + +static inline __attribute__((always_inline)) +float filterTextSample1Tap(thread const float& offset, thread const texture2d colorTexture, thread const sampler colorTextureSmplr, thread const float2& colorTexCoord) +{ + return colorTexture.sample(colorTextureSmplr, (colorTexCoord + float2(offset, 0.0)), level(0.0)).x; +} + +static inline __attribute__((always_inline)) +void filterTextSample9Tap(thread float4& outAlphaLeft, thread float& outAlphaCenter, thread float4& outAlphaRight, thread const texture2d colorTexture, thread const sampler colorTextureSmplr, thread const float2& colorTexCoord, thread const float4& kernel0, thread const float& onePixel) +{ + bool wide = kernel0.x > 0.0; + float _276; + if (wide) + { + float param = (-4.0) * onePixel; + float2 param_1 = colorTexCoord; + _276 = filterTextSample1Tap(param, colorTexture, colorTextureSmplr, param_1); + } + else + { + _276 = 0.0; + } + float param_2 = (-3.0) * onePixel; + float2 param_3 = colorTexCoord; + float param_4 = (-2.0) * onePixel; + float2 param_5 = colorTexCoord; + float param_6 = (-1.0) * onePixel; + float2 param_7 = colorTexCoord; + outAlphaLeft = float4(_276, filterTextSample1Tap(param_2, colorTexture, colorTextureSmplr, param_3), filterTextSample1Tap(param_4, colorTexture, colorTextureSmplr, param_5), filterTextSample1Tap(param_6, colorTexture, colorTextureSmplr, param_7)); + float param_8 = 0.0; + float2 param_9 = colorTexCoord; + outAlphaCenter = filterTextSample1Tap(param_8, colorTexture, colorTextureSmplr, param_9); + float param_10 = 1.0 * onePixel; + float2 param_11 = colorTexCoord; + float param_12 = 2.0 * onePixel; + float2 param_13 = colorTexCoord; + float param_14 = 3.0 * onePixel; + float2 param_15 = colorTexCoord; + float _336; + if (wide) + { + float param_16 = 4.0 * onePixel; + float2 param_17 = colorTexCoord; + _336 = filterTextSample1Tap(param_16, colorTexture, colorTextureSmplr, param_17); + } + else + { + _336 = 0.0; + } + outAlphaRight = float4(filterTextSample1Tap(param_10, colorTexture, colorTextureSmplr, param_11), filterTextSample1Tap(param_12, colorTexture, colorTextureSmplr, param_13), filterTextSample1Tap(param_14, colorTexture, colorTextureSmplr, param_15), _336); +} + +static inline __attribute__((always_inline)) +float filterTextConvolve7Tap(thread const float4& alpha0, thread const float3& alpha1, thread const float4& kernel0) +{ + return dot(alpha0, kernel0) + dot(alpha1, kernel0.zyx); +} + +static inline __attribute__((always_inline)) +float filterTextGammaCorrectChannel(thread const float& bgColor, thread const float& fgColor, thread const texture2d gammaLUT, thread const sampler gammaLUTSmplr) +{ + return gammaLUT.sample(gammaLUTSmplr, float2(fgColor, 1.0 - bgColor), level(0.0)).x; +} + +static inline __attribute__((always_inline)) +float3 filterTextGammaCorrect(thread const float3& bgColor, thread const float3& fgColor, thread const texture2d gammaLUT, thread const sampler gammaLUTSmplr) +{ + float param = bgColor.x; + float param_1 = fgColor.x; + float param_2 = bgColor.y; + float param_3 = fgColor.y; + float param_4 = bgColor.z; + float param_5 = fgColor.z; + return float3(filterTextGammaCorrectChannel(param, param_1, gammaLUT, gammaLUTSmplr), filterTextGammaCorrectChannel(param_2, param_3, gammaLUT, gammaLUTSmplr), filterTextGammaCorrectChannel(param_4, param_5, gammaLUT, gammaLUTSmplr)); +} + +static inline __attribute__((always_inline)) +float4 filterText(thread const float2& colorTexCoord, thread const texture2d colorTexture, thread const sampler colorTextureSmplr, thread const texture2d gammaLUT, thread const sampler gammaLUTSmplr, thread const float2& colorTextureSize, thread const float4& filterParams0, thread const float4& filterParams1, thread const float4& filterParams2) +{ + float4 kernel0 = filterParams0; + float3 bgColor = filterParams1.xyz; + float3 fgColor = filterParams2.xyz; + bool gammaCorrectionEnabled = filterParams2.w != 0.0; + float3 alpha; + if (kernel0.w == 0.0) + { + alpha = colorTexture.sample(colorTextureSmplr, colorTexCoord, level(0.0)).xxx; + } + else + { + float2 param_3 = colorTexCoord; + float4 param_4 = kernel0; + float param_5 = 1.0 / colorTextureSize.x; + float4 param; + float param_1; + float4 param_2; + filterTextSample9Tap(param, param_1, param_2, colorTexture, colorTextureSmplr, param_3, param_4, param_5); + float4 alphaLeft = param; + float alphaCenter = param_1; + float4 alphaRight = param_2; + float4 param_6 = alphaLeft; + float3 param_7 = float3(alphaCenter, alphaRight.xy); + float4 param_8 = kernel0; + float r = filterTextConvolve7Tap(param_6, param_7, param_8); + float4 param_9 = float4(alphaLeft.yzw, alphaCenter); + float3 param_10 = alphaRight.xyz; + float4 param_11 = kernel0; + float g = filterTextConvolve7Tap(param_9, param_10, param_11); + float4 param_12 = float4(alphaLeft.zw, alphaCenter, alphaRight.x); + float3 param_13 = alphaRight.yzw; + float4 param_14 = kernel0; + float b = filterTextConvolve7Tap(param_12, param_13, param_14); + alpha = float3(r, g, b); + } + if (gammaCorrectionEnabled) + { + float3 param_15 = bgColor; + float3 param_16 = alpha; + alpha = filterTextGammaCorrect(param_15, param_16, gammaLUT, gammaLUTSmplr); + } + return float4(mix(bgColor, fgColor, alpha), 1.0); +} + +static inline __attribute__((always_inline)) +float4 sampleColor(thread const texture2d colorTexture, thread const sampler colorTextureSmplr, thread const float2& colorTexCoord) +{ + return colorTexture.sample(colorTextureSmplr, colorTexCoord, level(0.0)); +} + +static inline __attribute__((always_inline)) +float4 filterNone(thread const float2& colorTexCoord, thread const texture2d colorTexture, thread const sampler colorTextureSmplr) +{ + float2 param = colorTexCoord; + return sampleColor(colorTexture, colorTextureSmplr, param); +} + +static inline __attribute__((always_inline)) +float4 filterColor(thread const float2& colorTexCoord, thread const texture2d colorTexture, thread const sampler colorTextureSmplr, thread const texture2d gammaLUT, thread const sampler gammaLUTSmplr, thread const float2& colorTextureSize, thread const float2& fragCoord, thread const float2& framebufferSize, thread const float4& filterParams0, thread const float4& filterParams1, thread const float4& filterParams2, thread const int& colorFilter) +{ + switch (colorFilter) + { + case 1: + { + float2 param = colorTexCoord; + float2 param_1 = colorTextureSize; + float2 param_2 = fragCoord; + float2 param_3 = framebufferSize; + float4 param_4 = filterParams0; + float4 param_5 = filterParams1; + return filterRadialGradient(param, colorTexture, colorTextureSmplr, param_1, param_2, param_3, param_4, param_5); + } + case 3: + { + float2 param_6 = colorTexCoord; + float2 param_7 = colorTextureSize; + float4 param_8 = filterParams0; + float4 param_9 = filterParams1; + return filterBlur(param_6, colorTexture, colorTextureSmplr, param_7, param_8, param_9); + } + case 2: + { + float2 param_10 = colorTexCoord; + float2 param_11 = colorTextureSize; + float4 param_12 = filterParams0; + float4 param_13 = filterParams1; + float4 param_14 = filterParams2; + return filterText(param_10, colorTexture, colorTextureSmplr, gammaLUT, gammaLUTSmplr, param_11, param_12, param_13, param_14); + } + } + float2 param_15 = colorTexCoord; + return filterNone(param_15, colorTexture, colorTextureSmplr); +} + +static inline __attribute__((always_inline)) +float4 combineColor0(thread const float4& destColor, thread const float4& srcColor, thread const int& op) +{ + switch (op) + { + case 1: + { + return float4(srcColor.xyz, srcColor.w * destColor.w); + } + case 2: + { + return float4(destColor.xyz, srcColor.w * destColor.w); + } + } + return destColor; +} + +static inline __attribute__((always_inline)) +float3 compositeScreen(thread const float3& destColor, thread const float3& srcColor) +{ + return (destColor + srcColor) - (destColor * srcColor); +} + +static inline __attribute__((always_inline)) +float3 compositeSelect(thread const bool3& cond, thread const float3& ifTrue, thread const float3& ifFalse) +{ + float _766; + if (cond.x) + { + _766 = ifTrue.x; + } + else + { + _766 = ifFalse.x; + } + float _777; + if (cond.y) + { + _777 = ifTrue.y; + } + else + { + _777 = ifFalse.y; + } + float _788; + if (cond.z) + { + _788 = ifTrue.z; + } + else + { + _788 = ifFalse.z; + } + return float3(_766, _777, _788); +} + +static inline __attribute__((always_inline)) +float3 compositeHardLight(thread const float3& destColor, thread const float3& srcColor) +{ + float3 param = destColor; + float3 param_1 = (float3(2.0) * srcColor) - float3(1.0); + bool3 param_2 = srcColor <= float3(0.5); + float3 param_3 = (destColor * float3(2.0)) * srcColor; + float3 param_4 = compositeScreen(param, param_1); + return compositeSelect(param_2, param_3, param_4); +} + +static inline __attribute__((always_inline)) +float3 compositeColorDodge(thread const float3& destColor, thread const float3& srcColor) +{ + bool3 destZero = destColor == float3(0.0); + bool3 srcOne = srcColor == float3(1.0); + bool3 param = srcOne; + float3 param_1 = float3(1.0); + float3 param_2 = destColor / (float3(1.0) - srcColor); + bool3 param_3 = destZero; + float3 param_4 = float3(0.0); + float3 param_5 = compositeSelect(param, param_1, param_2); + return compositeSelect(param_3, param_4, param_5); +} + +static inline __attribute__((always_inline)) +float3 compositeSoftLight(thread const float3& destColor, thread const float3& srcColor) +{ + bool3 param = destColor <= float3(0.25); + float3 param_1 = ((((float3(16.0) * destColor) - float3(12.0)) * destColor) + float3(4.0)) * destColor; + float3 param_2 = sqrt(destColor); + float3 darkenedDestColor = compositeSelect(param, param_1, param_2); + bool3 param_3 = srcColor <= float3(0.5); + float3 param_4 = destColor * (float3(1.0) - destColor); + float3 param_5 = darkenedDestColor - destColor; + float3 factor = compositeSelect(param_3, param_4, param_5); + return destColor + (((srcColor * 2.0) - float3(1.0)) * factor); +} + +static inline __attribute__((always_inline)) +float compositeDivide(thread const float& num, thread const float& denom) +{ + float _802; + if (denom != 0.0) + { + _802 = num / denom; + } + else + { + _802 = 0.0; + } + return _802; +} + +static inline __attribute__((always_inline)) +float3 compositeRGBToHSL(thread const float3& rgb) +{ + float v = fast::max(fast::max(rgb.x, rgb.y), rgb.z); + float xMin = fast::min(fast::min(rgb.x, rgb.y), rgb.z); + float c = v - xMin; + float l = mix(xMin, v, 0.5); + float3 _908; + if (rgb.x == v) + { + _908 = float3(0.0, rgb.yz); + } + else + { + float3 _921; + if (rgb.y == v) + { + _921 = float3(2.0, rgb.zx); + } + else + { + _921 = float3(4.0, rgb.xy); + } + _908 = _921; + } + float3 terms = _908; + float param = ((terms.x * c) + terms.y) - terms.z; + float param_1 = c; + float h = 1.0471975803375244140625 * compositeDivide(param, param_1); + float param_2 = c; + float param_3 = v; + float s = compositeDivide(param_2, param_3); + return float3(h, s, l); +} + +static inline __attribute__((always_inline)) +float3 compositeHSL(thread const float3& destColor, thread const float3& srcColor, thread const int& op) +{ + switch (op) + { + case 12: + { + return float3(srcColor.x, destColor.y, destColor.z); + } + case 13: + { + return float3(destColor.x, srcColor.y, destColor.z); + } + case 14: + { + return float3(srcColor.x, srcColor.y, destColor.z); + } + default: + { + return float3(destColor.x, destColor.y, srcColor.z); + } + } +} + +static inline __attribute__((always_inline)) +float3 compositeHSLToRGB(thread const float3& hsl) +{ + float a = hsl.y * fast::min(hsl.z, 1.0 - hsl.z); + float3 ks = mod(float3(0.0, 8.0, 4.0) + float3(hsl.x * 1.90985929965972900390625), float3(12.0)); + return hsl.zzz - (fast::clamp(fast::min(ks - float3(3.0), float3(9.0) - ks), float3(-1.0), float3(1.0)) * a); +} + +static inline __attribute__((always_inline)) +float3 compositeRGB(thread const float3& destColor, thread const float3& srcColor, thread const int& op) +{ + switch (op) + { + case 1: + { + return destColor * srcColor; + } + case 2: + { + float3 param = destColor; + float3 param_1 = srcColor; + return compositeScreen(param, param_1); + } + case 3: + { + float3 param_2 = srcColor; + float3 param_3 = destColor; + return compositeHardLight(param_2, param_3); + } + case 4: + { + return fast::min(destColor, srcColor); + } + case 5: + { + return fast::max(destColor, srcColor); + } + case 6: + { + float3 param_4 = destColor; + float3 param_5 = srcColor; + return compositeColorDodge(param_4, param_5); + } + case 7: + { + float3 param_6 = float3(1.0) - destColor; + float3 param_7 = float3(1.0) - srcColor; + return float3(1.0) - compositeColorDodge(param_6, param_7); + } + case 8: + { + float3 param_8 = destColor; + float3 param_9 = srcColor; + return compositeHardLight(param_8, param_9); + } + case 9: + { + float3 param_10 = destColor; + float3 param_11 = srcColor; + return compositeSoftLight(param_10, param_11); + } + case 10: + { + return abs(destColor - srcColor); + } + case 11: + { + return (destColor + srcColor) - ((float3(2.0) * destColor) * srcColor); + } + case 12: + case 13: + case 14: + case 15: + { + float3 param_12 = destColor; + float3 param_13 = srcColor; + float3 param_14 = compositeRGBToHSL(param_12); + float3 param_15 = compositeRGBToHSL(param_13); + int param_16 = op; + float3 param_17 = compositeHSL(param_14, param_15, param_16); + return compositeHSLToRGB(param_17); + } + } + return srcColor; +} + +static inline __attribute__((always_inline)) +float4 composite(thread const float4& srcColor, thread const texture2d destTexture, thread const sampler destTextureSmplr, thread const float2& destTextureSize, thread const float2& fragCoord, thread const int& op) +{ + if (op == 0) + { + return srcColor; + } + float2 destTexCoord = fragCoord / destTextureSize; + float4 destColor = destTexture.sample(destTextureSmplr, destTexCoord, level(0.0)); + float3 param = destColor.xyz; + float3 param_1 = srcColor.xyz; + int param_2 = op; + float3 blendedRGB = compositeRGB(param, param_1, param_2); + return float4(((srcColor.xyz * (srcColor.w * (1.0 - destColor.w))) + (blendedRGB * (srcColor.w * destColor.w))) + (destColor.xyz * (1.0 - srcColor.w)), 1.0); +} + +static inline __attribute__((always_inline)) +float4 calculateColor(thread const float2& fragCoord, thread const texture2d colorTexture0, thread const sampler colorTexture0Smplr, thread const texture2d maskTexture0, thread const sampler maskTexture0Smplr, thread const texture2d destTexture, thread const sampler destTextureSmplr, thread const texture2d gammaLUT, thread const sampler gammaLUTSmplr, thread const float2& colorTextureSize0, thread const float2& maskTextureSize0, thread const float4& filterParams0, thread const float4& filterParams1, thread const float4& filterParams2, thread const float2& framebufferSize, thread const int& ctrl, thread const float3& maskTexCoord0, thread const float2& colorTexCoord0, thread const float4& baseColor, thread const int& tileCtrl) +{ + int maskCtrl0 = (tileCtrl >> 0) & 3; + float maskAlpha = 1.0; + float param = maskAlpha; + float2 param_1 = maskTextureSize0; + float3 param_2 = maskTexCoord0; + int param_3 = maskCtrl0; + maskAlpha = sampleMask(param, maskTexture0, maskTexture0Smplr, param_1, param_2, param_3); + float4 color = baseColor; + int color0Combine = (ctrl >> 6) & 3; + if (color0Combine != 0) + { + int color0Filter = (ctrl >> 4) & 3; + float2 param_4 = colorTexCoord0; + float2 param_5 = colorTextureSize0; + float2 param_6 = fragCoord; + float2 param_7 = framebufferSize; + float4 param_8 = filterParams0; + float4 param_9 = filterParams1; + float4 param_10 = filterParams2; + int param_11 = color0Filter; + float4 color0 = filterColor(param_4, colorTexture0, colorTexture0Smplr, gammaLUT, gammaLUTSmplr, param_5, param_6, param_7, param_8, param_9, param_10, param_11); + float4 param_12 = color; + float4 param_13 = color0; + int param_14 = color0Combine; + color = combineColor0(param_12, param_13, param_14); + } + color.w *= maskAlpha; + int compositeOp = (ctrl >> 8) & 15; + float4 param_15 = color; + float2 param_16 = framebufferSize; + float2 param_17 = fragCoord; + int param_18 = compositeOp; + color = composite(param_15, destTexture, destTextureSmplr, param_16, param_17, param_18); + float3 _1364 = color.xyz * color.w; + color = float4(_1364.x, _1364.y, _1364.z, color.w); + return color; +} + +kernel void main0(constant int2& uFramebufferTileSize [[buffer(3)]], constant int& uLoadAction [[buffer(4)]], constant int2& uTextureMetadataSize [[buffer(7)]], constant float2& uFramebufferSize [[buffer(0)]], constant float2& uTileSize [[buffer(1)]], constant float4& uClearColor [[buffer(5)]], constant float2& uColorTextureSize0 [[buffer(8)]], constant float2& uMaskTextureSize0 [[buffer(9)]], const device bFirstTileMap& _1510 [[buffer(2)]], const device bTiles& _1603 [[buffer(6)]], texture2d uDestImage [[texture(0)]], texture2d uTextureMetadata [[texture(1)]], texture2d uColorTexture0 [[texture(2)]], texture2d uMaskTexture0 [[texture(3)]], texture2d uDestTexture [[texture(4)]], texture2d uGammaLUT [[texture(5)]], sampler uTextureMetadataSmplr [[sampler(0)]], sampler uColorTexture0Smplr [[sampler(1)]], sampler uMaskTexture0Smplr [[sampler(2)]], sampler uDestTextureSmplr [[sampler(3)]], sampler uGammaLUTSmplr [[sampler(4)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) +{ + int2 tileCoord = int2(gl_WorkGroupID.xy); + int2 firstTileSubCoord = int2(gl_LocalInvocationID.xy) * int2(1, 4); + int2 firstFragCoord = (tileCoord * int2(uTileSize)) + firstTileSubCoord; + int tileIndex = _1510.iFirstTileMap[tileCoord.x + (uFramebufferTileSize.x * tileCoord.y)]; + if ((tileIndex < 0) && (uLoadAction != 0)) + { + return; + } + float4x4 destColors; + for (int subY = 0; subY < 4; subY++) + { + if (uLoadAction == 0) + { + destColors[subY] = uClearColor; + } + else + { + int2 param = firstFragCoord + int2(0, subY); + int2 imageCoords = toImageCoords(param, uFramebufferSize); + destColors[subY] = uDestImage.read(uint2(imageCoords)); + } + } + int backdrop; + uint2 maskTileCoord; + float2 param_4; + float4 param_5; + float4 param_6; + float4 param_7; + float4 param_8; + int param_9; + while (tileIndex >= 0) + { + for (int subY_1 = 0; subY_1 < 4; subY_1++) + { + int2 tileSubCoord = firstTileSubCoord + int2(0, subY_1); + float2 fragCoord = float2(firstFragCoord + int2(0, subY_1)) + float2(0.5); + int alphaTileIndex = int(_1603.iTiles[(tileIndex * 4) + 2] << uint(8)) >> 8; + uint tileControlWord = _1603.iTiles[(tileIndex * 4) + 3]; + uint colorEntry = tileControlWord & 65535u; + int tileCtrl = int((tileControlWord >> uint(16)) & 255u); + if (alphaTileIndex >= 0) + { + backdrop = 0; + maskTileCoord = uint2(uint(alphaTileIndex & 255), uint(alphaTileIndex >> 8)) * uint2(uTileSize); + } + else + { + backdrop = int(tileControlWord) >> 24; + maskTileCoord = uint2(0u); + tileCtrl &= (-4); + } + float3 maskTexCoord0 = float3(float2(int2(maskTileCoord) + tileSubCoord), float(backdrop)); + float2 param_1 = fragCoord; + int param_2 = int(colorEntry); + int2 param_3 = uTextureMetadataSize; + computeTileVaryings(param_1, param_2, uTextureMetadata, uTextureMetadataSmplr, param_3, param_4, param_5, param_6, param_7, param_8, param_9); + float2 colorTexCoord0 = param_4; + float4 baseColor = param_5; + float4 filterParams0 = param_6; + float4 filterParams1 = param_7; + float4 filterParams2 = param_8; + int ctrl = param_9; + float2 param_10 = fragCoord; + float2 param_11 = uColorTextureSize0; + float2 param_12 = uMaskTextureSize0; + float4 param_13 = filterParams0; + float4 param_14 = filterParams1; + float4 param_15 = filterParams2; + float2 param_16 = uFramebufferSize; + int param_17 = ctrl; + float3 param_18 = maskTexCoord0; + float2 param_19 = colorTexCoord0; + float4 param_20 = baseColor; + int param_21 = tileCtrl; + float4 srcColor = calculateColor(param_10, uColorTexture0, uColorTexture0Smplr, uMaskTexture0, uMaskTexture0Smplr, uDestTexture, uDestTextureSmplr, uGammaLUT, uGammaLUTSmplr, param_11, param_12, param_13, param_14, param_15, param_16, param_17, param_18, param_19, param_20, param_21); + destColors[subY_1] = (destColors[subY_1] * (1.0 - srcColor.w)) + srcColor; + } + tileIndex = int(_1603.iTiles[(tileIndex * 4) + 0]); + } + for (int subY_2 = 0; subY_2 < 4; subY_2++) + { + int2 param_22 = firstFragCoord + int2(0, subY_2); + uDestImage.write(destColors[subY_2], uint2(toImageCoords(param_22, uFramebufferSize))); + } +} + diff --git a/resources/shaders/metal/fill.fs.metal b/resources/shaders/metal/d3d9/fill.fs.metal similarity index 100% rename from resources/shaders/metal/fill.fs.metal rename to resources/shaders/metal/d3d9/fill.fs.metal diff --git a/resources/shaders/metal/d3d9/fill.vs.metal b/resources/shaders/metal/d3d9/fill.vs.metal new file mode 100644 index 00000000..9cc7459d --- /dev/null +++ b/resources/shaders/metal/d3d9/fill.vs.metal @@ -0,0 +1,83 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float2 vFrom [[user(locn0)]]; + float2 vTo [[user(locn1)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + uint2 aTessCoord [[attribute(0)]]; + uint4 aLineSegment [[attribute(1)]]; + int aTileIndex [[attribute(2)]]; +}; + +static inline __attribute__((always_inline)) +float2 computeTileOffset(thread const uint& tileIndex, thread const float& stencilTextureWidth, thread const float2& tileSize) +{ + uint tilesPerRow = uint(stencilTextureWidth / tileSize.x); + uint2 tileOffset = uint2(tileIndex % tilesPerRow, tileIndex / tilesPerRow); + return (float2(tileOffset) * tileSize) * float2(1.0, 0.25); +} + +static inline __attribute__((always_inline)) +float4 computeVertexPosition(thread const uint& tileIndex, thread const uint2& tessCoord, thread const uint4& packedLineSegment, thread const float2& tileSize, thread const float2& framebufferSize, thread float2& outFrom, thread float2& outTo) +{ + uint param = tileIndex; + float param_1 = framebufferSize.x; + float2 param_2 = tileSize; + float2 tileOrigin = computeTileOffset(param, param_1, param_2); + float4 lineSegment = float4(packedLineSegment) / float4(256.0); + float2 from = lineSegment.xy; + float2 to = lineSegment.zw; + float2 position; + if (tessCoord.x == 0u) + { + position.x = floor(fast::min(from.x, to.x)); + } + else + { + position.x = ceil(fast::max(from.x, to.x)); + } + if (tessCoord.y == 0u) + { + position.y = floor(fast::min(from.y, to.y)); + } + else + { + position.y = tileSize.y; + } + position.y = floor(position.y * 0.25); + float2 offset = float2(0.0, 1.5) - (position * float2(1.0, 4.0)); + outFrom = from + offset; + outTo = to + offset; + float2 globalPosition = (((tileOrigin + position) / framebufferSize) * 2.0) - float2(1.0); + globalPosition.y = -globalPosition.y; + return float4(globalPosition, 0.0, 1.0); +} + +vertex main0_out main0(main0_in in [[stage_in]], constant float2& uTileSize [[buffer(0)]], constant float2& uFramebufferSize [[buffer(1)]]) +{ + main0_out out = {}; + uint param = uint(in.aTileIndex); + uint2 param_1 = in.aTessCoord; + uint4 param_2 = in.aLineSegment; + float2 param_3 = uTileSize; + float2 param_4 = uFramebufferSize; + float2 param_5; + float2 param_6; + float4 _190 = computeVertexPosition(param, param_1, param_2, param_3, param_4, param_5, param_6); + out.vFrom = param_5; + out.vTo = param_6; + out.gl_Position = _190; + return out; +} + diff --git a/resources/shaders/metal/tile.fs.metal b/resources/shaders/metal/d3d9/tile.fs.metal similarity index 81% rename from resources/shaders/metal/tile.fs.metal rename to resources/shaders/metal/d3d9/tile.fs.metal index 1b2c7e9d..90346ffc 100644 --- a/resources/shaders/metal/tile.fs.metal +++ b/resources/shaders/metal/d3d9/tile.fs.metal @@ -6,7 +6,7 @@ using namespace metal; -constant float3 _1042 = {}; +constant float3 _1056 = {}; struct main0_out { @@ -19,6 +19,10 @@ struct main0_in float2 vColorTexCoord0 [[user(locn1)]]; float4 vBaseColor [[user(locn2)]]; float vTileCtrl [[user(locn3)]]; + float4 vFilterParams0 [[user(locn4)]]; + float4 vFilterParams1 [[user(locn5)]]; + float4 vFilterParams2 [[user(locn6)]]; + float vCtrl [[user(locn7)]]; }; // Implementation of the GLSL mod() function, which is slightly different than Metal fmod() @@ -71,16 +75,16 @@ float4 filterRadialGradient(thread const float2& colorTexCoord, thread const tex { ts = ts.yx; } - float _555; + float _569; if (ts.x >= 0.0) { - _555 = ts.x; + _569 = ts.x; } else { - _555 = ts.y; + _569 = ts.y; } - float t = _555; + float t = _569; color = colorTexture.sample(colorTextureSmplr, (uvOrigin + float2(fast::clamp(t, 0.0, 1.0), 0.0))); } return color; @@ -94,19 +98,19 @@ float4 filterBlur(thread const float2& colorTexCoord, thread const texture2d colorTexture, thread const sampler colorTextureSmplr, thread const float2& colorTexCoord, thread const float4& kernel0, thread const float& onePixel) { bool wide = kernel0.x > 0.0; - float _236; + float _250; if (wide) { float param = (-4.0) * onePixel; float2 param_1 = colorTexCoord; - _236 = filterTextSample1Tap(param, colorTexture, colorTextureSmplr, param_1); + _250 = filterTextSample1Tap(param, colorTexture, colorTextureSmplr, param_1); } else { - _236 = 0.0; + _250 = 0.0; } float param_2 = (-3.0) * onePixel; float2 param_3 = colorTexCoord; @@ -138,7 +142,7 @@ void filterTextSample9Tap(thread float4& outAlphaLeft, thread float& outAlphaCen float2 param_5 = colorTexCoord; float param_6 = (-1.0) * onePixel; float2 param_7 = colorTexCoord; - outAlphaLeft = float4(_236, filterTextSample1Tap(param_2, colorTexture, colorTextureSmplr, param_3), filterTextSample1Tap(param_4, colorTexture, colorTextureSmplr, param_5), filterTextSample1Tap(param_6, colorTexture, colorTextureSmplr, param_7)); + outAlphaLeft = float4(_250, filterTextSample1Tap(param_2, colorTexture, colorTextureSmplr, param_3), filterTextSample1Tap(param_4, colorTexture, colorTextureSmplr, param_5), filterTextSample1Tap(param_6, colorTexture, colorTextureSmplr, param_7)); float param_8 = 0.0; float2 param_9 = colorTexCoord; outAlphaCenter = filterTextSample1Tap(param_8, colorTexture, colorTextureSmplr, param_9); @@ -148,18 +152,18 @@ void filterTextSample9Tap(thread float4& outAlphaLeft, thread float& outAlphaCen float2 param_13 = colorTexCoord; float param_14 = 3.0 * onePixel; float2 param_15 = colorTexCoord; - float _296; + float _310; if (wide) { float param_16 = 4.0 * onePixel; float2 param_17 = colorTexCoord; - _296 = filterTextSample1Tap(param_16, colorTexture, colorTextureSmplr, param_17); + _310 = filterTextSample1Tap(param_16, colorTexture, colorTextureSmplr, param_17); } else { - _296 = 0.0; + _310 = 0.0; } - outAlphaRight = float4(filterTextSample1Tap(param_10, colorTexture, colorTextureSmplr, param_11), filterTextSample1Tap(param_12, colorTexture, colorTextureSmplr, param_13), filterTextSample1Tap(param_14, colorTexture, colorTextureSmplr, param_15), _296); + outAlphaRight = float4(filterTextSample1Tap(param_10, colorTexture, colorTextureSmplr, param_11), filterTextSample1Tap(param_12, colorTexture, colorTextureSmplr, param_13), filterTextSample1Tap(param_14, colorTexture, colorTextureSmplr, param_15), _310); } static inline __attribute__((always_inline)) @@ -309,34 +313,34 @@ float3 compositeScreen(thread const float3& destColor, thread const float3& srcC static inline __attribute__((always_inline)) float3 compositeSelect(thread const bool3& cond, thread const float3& ifTrue, thread const float3& ifFalse) { - float _726; + float _740; if (cond.x) { - _726 = ifTrue.x; + _740 = ifTrue.x; } else { - _726 = ifFalse.x; + _740 = ifFalse.x; } - float _737; + float _751; if (cond.y) { - _737 = ifTrue.y; + _751 = ifTrue.y; } else { - _737 = ifFalse.y; + _751 = ifFalse.y; } - float _748; + float _762; if (cond.z) { - _748 = ifTrue.z; + _762 = ifTrue.z; } else { - _748 = ifFalse.z; + _762 = ifFalse.z; } - return float3(_726, _737, _748); + return float3(_740, _751, _762); } static inline __attribute__((always_inline)) @@ -381,16 +385,16 @@ float3 compositeSoftLight(thread const float3& destColor, thread const float3& s static inline __attribute__((always_inline)) float compositeDivide(thread const float& num, thread const float& denom) { - float _762; + float _776; if (denom != 0.0) { - _762 = num / denom; + _776 = num / denom; } else { - _762 = 0.0; + _776 = 0.0; } - return _762; + return _776; } static inline __attribute__((always_inline)) @@ -400,25 +404,25 @@ float3 compositeRGBToHSL(thread const float3& rgb) float xMin = fast::min(fast::min(rgb.x, rgb.y), rgb.z); float c = v - xMin; float l = mix(xMin, v, 0.5); - float3 _868; + float3 _882; if (rgb.x == v) { - _868 = float3(0.0, rgb.yz); + _882 = float3(0.0, rgb.yz); } else { - float3 _881; + float3 _895; if (rgb.y == v) { - _881 = float3(2.0, rgb.zx); + _895 = float3(2.0, rgb.zx); } else { - _881 = float3(4.0, rgb.xy); + _895 = float3(4.0, rgb.xy); } - _868 = _881; + _882 = _895; } - float3 terms = _868; + float3 terms = _882; float param = ((terms.x * c) + terms.y) - terms.z; float param_1 = c; float h = 1.0471975803375244140625 * compositeDivide(param, param_1); @@ -555,29 +559,29 @@ float4 composite(thread const float4& srcColor, thread const texture2d de } static inline __attribute__((always_inline)) -void calculateColor(thread const int& tileCtrl, thread const int& ctrl, thread texture2d uMaskTexture0, thread const sampler uMaskTexture0Smplr, thread float2 uMaskTextureSize0, thread float3& vMaskTexCoord0, thread float4& vBaseColor, thread float2& vColorTexCoord0, thread texture2d uColorTexture0, thread const sampler uColorTexture0Smplr, thread texture2d uGammaLUT, thread const sampler uGammaLUTSmplr, thread float2 uColorTextureSize0, thread float4& gl_FragCoord, thread float2 uFramebufferSize, thread float4 uFilterParams0, thread float4 uFilterParams1, thread float4 uFilterParams2, thread texture2d uDestTexture, thread const sampler uDestTextureSmplr, thread float4& oFragColor) +float4 calculateColor(thread const float2& fragCoord, thread const texture2d colorTexture0, thread const sampler colorTexture0Smplr, thread const texture2d maskTexture0, thread const sampler maskTexture0Smplr, thread const texture2d destTexture, thread const sampler destTextureSmplr, thread const texture2d gammaLUT, thread const sampler gammaLUTSmplr, thread const float2& colorTextureSize0, thread const float2& maskTextureSize0, thread const float4& filterParams0, thread const float4& filterParams1, thread const float4& filterParams2, thread const float2& framebufferSize, thread const int& ctrl, thread const float3& maskTexCoord0, thread const float2& colorTexCoord0, thread const float4& baseColor, thread const int& tileCtrl) { int maskCtrl0 = (tileCtrl >> 0) & 3; float maskAlpha = 1.0; float param = maskAlpha; - float2 param_1 = uMaskTextureSize0; - float3 param_2 = vMaskTexCoord0; + float2 param_1 = maskTextureSize0; + float3 param_2 = maskTexCoord0; int param_3 = maskCtrl0; - maskAlpha = sampleMask(param, uMaskTexture0, uMaskTexture0Smplr, param_1, param_2, param_3); - float4 color = vBaseColor; + maskAlpha = sampleMask(param, maskTexture0, maskTexture0Smplr, param_1, param_2, param_3); + float4 color = baseColor; int color0Combine = (ctrl >> 6) & 3; if (color0Combine != 0) { int color0Filter = (ctrl >> 4) & 3; - float2 param_4 = vColorTexCoord0; - float2 param_5 = uColorTextureSize0; - float2 param_6 = gl_FragCoord.xy; - float2 param_7 = uFramebufferSize; - float4 param_8 = uFilterParams0; - float4 param_9 = uFilterParams1; - float4 param_10 = uFilterParams2; + float2 param_4 = colorTexCoord0; + float2 param_5 = colorTextureSize0; + float2 param_6 = fragCoord; + float2 param_7 = framebufferSize; + float4 param_8 = filterParams0; + float4 param_9 = filterParams1; + float4 param_10 = filterParams2; int param_11 = color0Filter; - float4 color0 = filterColor(param_4, uColorTexture0, uColorTexture0Smplr, uGammaLUT, uGammaLUTSmplr, param_5, param_6, param_7, param_8, param_9, param_10, param_11); + float4 color0 = filterColor(param_4, colorTexture0, colorTexture0Smplr, gammaLUT, gammaLUTSmplr, param_5, param_6, param_7, param_8, param_9, param_10, param_11); float4 param_12 = color; float4 param_13 = color0; int param_14 = color0Combine; @@ -586,21 +590,31 @@ void calculateColor(thread const int& tileCtrl, thread const int& ctrl, thread t color.w *= maskAlpha; int compositeOp = (ctrl >> 8) & 15; float4 param_15 = color; - float2 param_16 = uFramebufferSize; - float2 param_17 = gl_FragCoord.xy; + float2 param_16 = framebufferSize; + float2 param_17 = fragCoord; int param_18 = compositeOp; - color = composite(param_15, uDestTexture, uDestTextureSmplr, param_16, param_17, param_18); - float3 _1347 = color.xyz * color.w; - color = float4(_1347.x, _1347.y, _1347.z, color.w); - oFragColor = color; + color = composite(param_15, destTexture, destTextureSmplr, param_16, param_17, param_18); + float3 _1340 = color.xyz * color.w; + color = float4(_1340.x, _1340.y, _1340.z, color.w); + return color; } -fragment main0_out main0(main0_in in [[stage_in]], constant int& uCtrl [[buffer(6)]], constant float2& uMaskTextureSize0 [[buffer(0)]], constant float2& uColorTextureSize0 [[buffer(1)]], constant float2& uFramebufferSize [[buffer(2)]], constant float4& uFilterParams0 [[buffer(3)]], constant float4& uFilterParams1 [[buffer(4)]], constant float4& uFilterParams2 [[buffer(5)]], texture2d uMaskTexture0 [[texture(0)]], texture2d uColorTexture0 [[texture(1)]], texture2d uGammaLUT [[texture(2)]], texture2d uDestTexture [[texture(3)]], sampler uMaskTexture0Smplr [[sampler(0)]], sampler uColorTexture0Smplr [[sampler(1)]], sampler uGammaLUTSmplr [[sampler(2)]], sampler uDestTextureSmplr [[sampler(3)]], float4 gl_FragCoord [[position]]) +fragment main0_out main0(main0_in in [[stage_in]], constant float2& uColorTextureSize0 [[buffer(0)]], constant float2& uMaskTextureSize0 [[buffer(1)]], constant float2& uFramebufferSize [[buffer(2)]], texture2d uColorTexture0 [[texture(0)]], texture2d uMaskTexture0 [[texture(1)]], texture2d uDestTexture [[texture(2)]], texture2d uGammaLUT [[texture(3)]], sampler uColorTexture0Smplr [[sampler(0)]], sampler uMaskTexture0Smplr [[sampler(1)]], sampler uDestTextureSmplr [[sampler(2)]], sampler uGammaLUTSmplr [[sampler(3)]], float4 gl_FragCoord [[position]]) { main0_out out = {}; - int param = int(in.vTileCtrl); - int param_1 = uCtrl; - calculateColor(param, param_1, uMaskTexture0, uMaskTexture0Smplr, uMaskTextureSize0, in.vMaskTexCoord0, in.vBaseColor, in.vColorTexCoord0, uColorTexture0, uColorTexture0Smplr, uGammaLUT, uGammaLUTSmplr, uColorTextureSize0, gl_FragCoord, uFramebufferSize, uFilterParams0, uFilterParams1, uFilterParams2, uDestTexture, uDestTextureSmplr, out.oFragColor); + float2 param = gl_FragCoord.xy; + float2 param_1 = uColorTextureSize0; + float2 param_2 = uMaskTextureSize0; + float4 param_3 = in.vFilterParams0; + float4 param_4 = in.vFilterParams1; + float4 param_5 = in.vFilterParams2; + float2 param_6 = uFramebufferSize; + int param_7 = int(in.vCtrl); + float3 param_8 = in.vMaskTexCoord0; + float2 param_9 = in.vColorTexCoord0; + float4 param_10 = in.vBaseColor; + int param_11 = int(in.vTileCtrl); + out.oFragColor = calculateColor(param, uColorTexture0, uColorTexture0Smplr, uMaskTexture0, uMaskTexture0Smplr, uDestTexture, uDestTextureSmplr, uGammaLUT, uGammaLUTSmplr, param_1, param_2, param_3, param_4, param_5, param_6, param_7, param_8, param_9, param_10, param_11); return out; } diff --git a/resources/shaders/metal/d3d9/tile.vs.metal b/resources/shaders/metal/d3d9/tile.vs.metal new file mode 100644 index 00000000..7e83626b --- /dev/null +++ b/resources/shaders/metal/d3d9/tile.vs.metal @@ -0,0 +1,130 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct main0_out +{ + float3 vMaskTexCoord0 [[user(locn0)]]; + float2 vColorTexCoord0 [[user(locn1)]]; + float4 vBaseColor [[user(locn2)]]; + float vTileCtrl [[user(locn3)]]; + float4 vFilterParams0 [[user(locn4)]]; + float4 vFilterParams1 [[user(locn5)]]; + float4 vFilterParams2 [[user(locn6)]]; + float vCtrl [[user(locn7)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + int2 aTileOffset [[attribute(0)]]; + int2 aTileOrigin [[attribute(1)]]; + uint4 aMaskTexCoord0 [[attribute(2)]]; + int2 aCtrlBackdrop [[attribute(3)]]; + int aPathIndex [[attribute(4)]]; + int aColor [[attribute(5)]]; +}; + +static inline __attribute__((always_inline)) +float4 fetchUnscaled(thread const texture2d srcTexture, thread const sampler srcTextureSmplr, thread const float2& scale, thread const float2& originCoord, thread const int& entry) +{ + return srcTexture.sample(srcTextureSmplr, (((originCoord + float2(0.5)) + float2(float(entry), 0.0)) * scale), level(0.0)); +} + +static inline __attribute__((always_inline)) +void computeTileVaryings(thread const float2& position, thread const int& colorEntry, thread const texture2d textureMetadata, thread const sampler textureMetadataSmplr, thread const int2& textureMetadataSize, thread float2& outColorTexCoord0, thread float4& outBaseColor, thread float4& outFilterParams0, thread float4& outFilterParams1, thread float4& outFilterParams2, thread int& outCtrl) +{ + float2 metadataScale = float2(1.0) / float2(textureMetadataSize); + float2 metadataEntryCoord = float2(float((colorEntry % 128) * 8), float(colorEntry / 128)); + float2 param = metadataScale; + float2 param_1 = metadataEntryCoord; + int param_2 = 0; + float4 colorTexMatrix0 = fetchUnscaled(textureMetadata, textureMetadataSmplr, param, param_1, param_2); + float2 param_3 = metadataScale; + float2 param_4 = metadataEntryCoord; + int param_5 = 1; + float4 colorTexOffsets = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_3, param_4, param_5); + float2 param_6 = metadataScale; + float2 param_7 = metadataEntryCoord; + int param_8 = 2; + float4 baseColor = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_6, param_7, param_8); + float2 param_9 = metadataScale; + float2 param_10 = metadataEntryCoord; + int param_11 = 3; + float4 filterParams0 = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_9, param_10, param_11); + float2 param_12 = metadataScale; + float2 param_13 = metadataEntryCoord; + int param_14 = 4; + float4 filterParams1 = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_12, param_13, param_14); + float2 param_15 = metadataScale; + float2 param_16 = metadataEntryCoord; + int param_17 = 5; + float4 filterParams2 = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_15, param_16, param_17); + float2 param_18 = metadataScale; + float2 param_19 = metadataEntryCoord; + int param_20 = 6; + float4 extra = fetchUnscaled(textureMetadata, textureMetadataSmplr, param_18, param_19, param_20); + outColorTexCoord0 = (float2x2(float2(colorTexMatrix0.xy), float2(colorTexMatrix0.zw)) * position) + colorTexOffsets.xy; + outBaseColor = baseColor; + outFilterParams0 = filterParams0; + outFilterParams1 = filterParams1; + outFilterParams2 = filterParams2; + outCtrl = int(extra.x); +} + +vertex main0_out main0(main0_in in [[stage_in]], constant int2& uZBufferSize [[buffer(1)]], constant int2& uTextureMetadataSize [[buffer(2)]], constant float2& uTileSize [[buffer(0)]], constant float4x4& uTransform [[buffer(3)]], texture2d uZBuffer [[texture(0)]], texture2d uTextureMetadata [[texture(1)]], sampler uZBufferSmplr [[sampler(0)]], sampler uTextureMetadataSmplr [[sampler(1)]]) +{ + main0_out out = {}; + float2 tileOrigin = float2(in.aTileOrigin); + float2 tileOffset = float2(in.aTileOffset); + float2 position = (tileOrigin + tileOffset) * uTileSize; + int4 zValue = int4(uZBuffer.sample(uZBufferSmplr, ((tileOrigin + float2(0.5)) / float2(uZBufferSize)), level(0.0)) * 255.0); + if (in.aPathIndex < (((zValue.x | (zValue.y << 8)) | (zValue.z << 16)) | (zValue.w << 24))) + { + out.gl_Position = float4(0.0); + return out; + } + uint2 maskTileCoord = uint2(in.aMaskTexCoord0.x, in.aMaskTexCoord0.y + (256u * in.aMaskTexCoord0.z)); + float2 maskTexCoord0 = (float2(maskTileCoord) + tileOffset) * uTileSize; + bool _244 = in.aCtrlBackdrop.y == 0; + bool _250; + if (_244) + { + _250 = in.aMaskTexCoord0.w != 0u; + } + else + { + _250 = _244; + } + if (_250) + { + out.gl_Position = float4(0.0); + return out; + } + float2 param = position; + int param_1 = in.aColor; + int2 param_2 = uTextureMetadataSize; + float2 param_3; + float4 param_4; + float4 param_5; + float4 param_6; + float4 param_7; + int param_8; + computeTileVaryings(param, param_1, uTextureMetadata, uTextureMetadataSmplr, param_2, param_3, param_4, param_5, param_6, param_7, param_8); + out.vColorTexCoord0 = param_3; + out.vBaseColor = param_4; + out.vFilterParams0 = param_5; + out.vFilterParams1 = param_6; + out.vFilterParams2 = param_7; + int ctrl = param_8; + out.vTileCtrl = float(in.aCtrlBackdrop.x); + out.vCtrl = float(ctrl); + out.vMaskTexCoord0 = float3(maskTexCoord0, float(in.aCtrlBackdrop.y)); + out.gl_Position = uTransform * float4(position, 0.0, 1.0); + return out; +} + diff --git a/resources/shaders/metal/d3d9/tile_clip_combine.fs.metal b/resources/shaders/metal/d3d9/tile_clip_combine.fs.metal new file mode 100644 index 00000000..f0fd36a2 --- /dev/null +++ b/resources/shaders/metal/d3d9/tile_clip_combine.fs.metal @@ -0,0 +1,26 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#include +#include + +using namespace metal; + +struct main0_out +{ + float4 oFragColor [[color(0)]]; +}; + +struct main0_in +{ + float2 vTexCoord0 [[user(locn0)]]; + float vBackdrop0 [[user(locn1)]]; + float2 vTexCoord1 [[user(locn2)]]; + float vBackdrop1 [[user(locn3)]]; +}; + +fragment main0_out main0(main0_in in [[stage_in]], texture2d uSrc [[texture(0)]], sampler uSrcSmplr [[sampler(0)]]) +{ + main0_out out = {}; + out.oFragColor = fast::min(abs(uSrc.sample(uSrcSmplr, in.vTexCoord0) + float4(in.vBackdrop0)), abs(uSrc.sample(uSrcSmplr, in.vTexCoord1) + float4(in.vBackdrop1))); + return out; +} + diff --git a/resources/shaders/metal/d3d9/tile_clip_combine.vs.metal b/resources/shaders/metal/d3d9/tile_clip_combine.vs.metal new file mode 100644 index 00000000..2a7864a7 --- /dev/null +++ b/resources/shaders/metal/d3d9/tile_clip_combine.vs.metal @@ -0,0 +1,44 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#include +#include + +using namespace metal; + +struct main0_out +{ + float2 vTexCoord0 [[user(locn0)]]; + float vBackdrop0 [[user(locn1)]]; + float2 vTexCoord1 [[user(locn2)]]; + float vBackdrop1 [[user(locn3)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + int2 aTileOffset [[attribute(0)]]; + int aDestTileIndex [[attribute(1)]]; + int aDestBackdrop [[attribute(2)]]; + int aSrcTileIndex [[attribute(3)]]; + int aSrcBackdrop [[attribute(4)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant float2& uFramebufferSize [[buffer(0)]]) +{ + main0_out out = {}; + float2 destPosition = float2(int2(in.aDestTileIndex % 256, in.aDestTileIndex / 256) + in.aTileOffset); + float2 srcPosition = float2(int2(in.aSrcTileIndex % 256, in.aSrcTileIndex / 256) + in.aTileOffset); + destPosition *= (float2(16.0, 4.0) / uFramebufferSize); + srcPosition *= (float2(16.0, 4.0) / uFramebufferSize); + out.vTexCoord0 = destPosition; + out.vTexCoord1 = srcPosition; + out.vBackdrop0 = float(in.aDestBackdrop); + out.vBackdrop1 = float(in.aSrcBackdrop); + if (in.aDestTileIndex < 0) + { + destPosition = float2(0.0); + } + destPosition.y = 1.0 - destPosition.y; + out.gl_Position = float4(mix(float2(-1.0), float2(1.0), destPosition), 0.0, 1.0); + return out; +} + diff --git a/resources/shaders/metal/tile_clip.fs.metal b/resources/shaders/metal/d3d9/tile_clip_copy.fs.metal similarity index 72% rename from resources/shaders/metal/tile_clip.fs.metal rename to resources/shaders/metal/d3d9/tile_clip_copy.fs.metal index de4bf10a..50cb0e43 100644 --- a/resources/shaders/metal/tile_clip.fs.metal +++ b/resources/shaders/metal/d3d9/tile_clip_copy.fs.metal @@ -12,13 +12,12 @@ struct main0_out struct main0_in { float2 vTexCoord [[user(locn0)]]; - float vBackdrop [[user(locn1)]]; }; fragment main0_out main0(main0_in in [[stage_in]], texture2d uSrc [[texture(0)]], sampler uSrcSmplr [[sampler(0)]]) { main0_out out = {}; - out.oFragColor = fast::clamp(abs(uSrc.sample(uSrcSmplr, in.vTexCoord) + float4(in.vBackdrop)), float4(0.0), float4(1.0)); + out.oFragColor = uSrc.sample(uSrcSmplr, in.vTexCoord); return out; } diff --git a/resources/shaders/metal/d3d9/tile_clip_copy.vs.metal b/resources/shaders/metal/d3d9/tile_clip_copy.vs.metal new file mode 100644 index 00000000..979ac7f8 --- /dev/null +++ b/resources/shaders/metal/d3d9/tile_clip_copy.vs.metal @@ -0,0 +1,33 @@ +// Automatically generated from files in pathfinder/shaders/. Do not edit! +#include +#include + +using namespace metal; + +struct main0_out +{ + float2 vTexCoord [[user(locn0)]]; + float4 gl_Position [[position]]; +}; + +struct main0_in +{ + int2 aTileOffset [[attribute(0)]]; + int aTileIndex [[attribute(1)]]; +}; + +vertex main0_out main0(main0_in in [[stage_in]], constant float2& uFramebufferSize [[buffer(0)]]) +{ + main0_out out = {}; + float2 position = float2(int2(in.aTileIndex % 256, in.aTileIndex / 256) + in.aTileOffset); + position *= (float2(16.0, 4.0) / uFramebufferSize); + out.vTexCoord = position; + if (in.aTileIndex < 0) + { + position = float2(0.0); + } + position.y = 1.0 - position.y; + out.gl_Position = float4(mix(float2(-1.0), float2(1.0), position), 0.0, 1.0); + return out; +} + diff --git a/resources/shaders/metal/tile_copy.fs.metal b/resources/shaders/metal/d3d9/tile_copy.fs.metal similarity index 100% rename from resources/shaders/metal/tile_copy.fs.metal rename to resources/shaders/metal/d3d9/tile_copy.fs.metal diff --git a/resources/shaders/metal/tile_copy.vs.metal b/resources/shaders/metal/d3d9/tile_copy.vs.metal similarity index 100% rename from resources/shaders/metal/tile_copy.vs.metal rename to resources/shaders/metal/d3d9/tile_copy.vs.metal diff --git a/resources/shaders/metal/debug_solid.fs.metal b/resources/shaders/metal/debug/solid.fs.metal similarity index 100% rename from resources/shaders/metal/debug_solid.fs.metal rename to resources/shaders/metal/debug/solid.fs.metal diff --git a/resources/shaders/metal/debug_solid.vs.metal b/resources/shaders/metal/debug/solid.vs.metal similarity index 100% rename from resources/shaders/metal/debug_solid.vs.metal rename to resources/shaders/metal/debug/solid.vs.metal diff --git a/resources/shaders/metal/debug_texture.fs.metal b/resources/shaders/metal/debug/texture.fs.metal similarity index 100% rename from resources/shaders/metal/debug_texture.fs.metal rename to resources/shaders/metal/debug/texture.fs.metal diff --git a/resources/shaders/metal/debug_texture.vs.metal b/resources/shaders/metal/debug/texture.vs.metal similarity index 100% rename from resources/shaders/metal/debug_texture.vs.metal rename to resources/shaders/metal/debug/texture.vs.metal diff --git a/resources/shaders/metal/fill.cs.metal b/resources/shaders/metal/fill.cs.metal deleted file mode 100644 index b1ad19ce..00000000 --- a/resources/shaders/metal/fill.cs.metal +++ /dev/null @@ -1,65 +0,0 @@ -// Automatically generated from files in pathfinder/shaders/. Do not edit! -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct bFillTileMap -{ - int iFillTileMap[1]; -}; - -struct bFills -{ - uint2 iFills[1]; -}; - -struct bNextFills -{ - int iNextFills[1]; -}; - -constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(16u, 4u, 1u); - -static inline __attribute__((always_inline)) -float4 computeCoverage(thread const float2& from, thread const float2& to, thread const texture2d areaLUT, thread const sampler areaLUTSmplr) -{ - float2 left = select(to, from, bool2(from.x < to.x)); - float2 right = select(from, to, bool2(from.x < to.x)); - float2 window = fast::clamp(float2(from.x, to.x), float2(-0.5), float2(0.5)); - float offset = mix(window.x, window.y, 0.5) - left.x; - float t = offset / (right.x - left.x); - float y = mix(left.y, right.y, t); - float d = (right.y - left.y) / (right.x - left.x); - float dX = window.x - window.y; - return areaLUT.sample(areaLUTSmplr, (float2(y + 8.0, abs(d * dX)) / float2(16.0)), level(0.0)) * dX; -} - -kernel void main0(constant int& uFirstTileIndex [[buffer(0)]], const device bFillTileMap& _150 [[buffer(1)]], const device bFills& _173 [[buffer(2)]], const device bNextFills& _256 [[buffer(3)]], texture2d uAreaLUT [[texture(0)]], texture2d uDest [[texture(1)]], sampler uAreaLUTSmplr [[sampler(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]]) -{ - int2 tileSubCoord = int2(gl_LocalInvocationID.xy) * int2(1, 4); - uint tileIndexOffset = gl_WorkGroupID.z; - uint tileIndex = tileIndexOffset + uint(uFirstTileIndex); - int fillIndex = _150.iFillTileMap[tileIndex]; - if (fillIndex < 0) - { - return; - } - float4 coverages = float4(0.0); - do - { - uint2 fill = _173.iFills[fillIndex]; - float2 from = float2(float(fill.y & 15u), float((fill.y >> 4u) & 15u)) + (float2(float(fill.x & 255u), float((fill.x >> 8u) & 255u)) / float2(256.0)); - float2 to = float2(float((fill.y >> 8u) & 15u), float((fill.y >> 12u) & 15u)) + (float2(float((fill.x >> 16u) & 255u), float((fill.x >> 24u) & 255u)) / float2(256.0)); - float2 param = from - (float2(tileSubCoord) + float2(0.5)); - float2 param_1 = to - (float2(tileSubCoord) + float2(0.5)); - coverages += computeCoverage(param, param_1, uAreaLUT, uAreaLUTSmplr); - fillIndex = _256.iNextFills[fillIndex]; - } while (fillIndex >= 0); - int2 tileOrigin = int2(int(tileIndex & 255u), int((tileIndex >> 8u) & 255u)) * int2(16, 4); - int2 destCoord = tileOrigin + int2(gl_LocalInvocationID.xy); - uDest.write(coverages, uint2(destCoord)); -} - diff --git a/resources/shaders/metal/fill.vs.metal b/resources/shaders/metal/fill.vs.metal deleted file mode 100644 index a8cf9a86..00000000 --- a/resources/shaders/metal/fill.vs.metal +++ /dev/null @@ -1,68 +0,0 @@ -// Automatically generated from files in pathfinder/shaders/. Do not edit! -#pragma clang diagnostic ignored "-Wmissing-prototypes" - -#include -#include - -using namespace metal; - -struct main0_out -{ - float2 vFrom [[user(locn0)]]; - float2 vTo [[user(locn1)]]; - float4 gl_Position [[position]]; -}; - -struct main0_in -{ - uint2 aTessCoord [[attribute(0)]]; - uint aFromPx [[attribute(1)]]; - uint aToPx [[attribute(2)]]; - float2 aFromSubpx [[attribute(3)]]; - float2 aToSubpx [[attribute(4)]]; - uint aTileIndex [[attribute(5)]]; -}; - -static inline __attribute__((always_inline)) -float2 computeTileOffset(thread const uint& tileIndex, thread const float& stencilTextureWidth, thread float2 uTileSize) -{ - uint tilesPerRow = uint(stencilTextureWidth / uTileSize.x); - uint2 tileOffset = uint2(tileIndex % tilesPerRow, tileIndex / tilesPerRow); - return (float2(tileOffset) * uTileSize) * float2(1.0, 0.25); -} - -vertex main0_out main0(main0_in in [[stage_in]], constant float2& uTileSize [[buffer(0)]], constant float2& uFramebufferSize [[buffer(1)]]) -{ - main0_out out = {}; - uint param = in.aTileIndex; - float param_1 = uFramebufferSize.x; - float2 tileOrigin = computeTileOffset(param, param_1, uTileSize); - float2 from = float2(float(in.aFromPx & 15u), float(in.aFromPx >> 4u)) + in.aFromSubpx; - float2 to = float2(float(in.aToPx & 15u), float(in.aToPx >> 4u)) + in.aToSubpx; - float2 position; - if (in.aTessCoord.x == 0u) - { - position.x = floor(fast::min(from.x, to.x)); - } - else - { - position.x = ceil(fast::max(from.x, to.x)); - } - if (in.aTessCoord.y == 0u) - { - position.y = floor(fast::min(from.y, to.y)); - } - else - { - position.y = uTileSize.y; - } - position.y = floor(position.y * 0.25); - float2 offset = float2(0.0, 1.5) - (position * float2(1.0, 4.0)); - out.vFrom = from + offset; - out.vTo = to + offset; - float2 globalPosition = (((tileOrigin + position) / uFramebufferSize) * 2.0) - float2(1.0); - globalPosition.y = -globalPosition.y; - out.gl_Position = float4(globalPosition, 0.0, 1.0); - return out; -} - diff --git a/resources/shaders/metal/tile.vs.metal b/resources/shaders/metal/tile.vs.metal deleted file mode 100644 index 6b297b65..00000000 --- a/resources/shaders/metal/tile.vs.metal +++ /dev/null @@ -1,48 +0,0 @@ -// Automatically generated from files in pathfinder/shaders/. Do not edit! -#include -#include - -using namespace metal; - -struct main0_out -{ - float3 vMaskTexCoord0 [[user(locn0)]]; - float2 vColorTexCoord0 [[user(locn1)]]; - float4 vBaseColor [[user(locn2)]]; - float vTileCtrl [[user(locn3)]]; - float4 gl_Position [[position]]; -}; - -struct main0_in -{ - int2 aTileOffset [[attribute(0)]]; - int2 aTileOrigin [[attribute(1)]]; - uint2 aMaskTexCoord0 [[attribute(2)]]; - int2 aMaskBackdrop [[attribute(3)]]; - int aColor [[attribute(4)]]; - int aTileCtrl [[attribute(5)]]; -}; - -vertex main0_out main0(main0_in in [[stage_in]], constant int2& uTextureMetadataSize [[buffer(1)]], constant float2& uTileSize [[buffer(0)]], constant float4x4& uTransform [[buffer(2)]], texture2d uTextureMetadata [[texture(0)]], sampler uTextureMetadataSmplr [[sampler(0)]]) -{ - main0_out out = {}; - float2 tileOrigin = float2(in.aTileOrigin); - float2 tileOffset = float2(in.aTileOffset); - float2 position = (tileOrigin + tileOffset) * uTileSize; - float2 maskTexCoord0 = (float2(in.aMaskTexCoord0) + tileOffset) * uTileSize; - float2 textureMetadataScale = float2(1.0) / float2(uTextureMetadataSize); - float2 metadataEntryCoord = float2(float((in.aColor % 128) * 4), float(in.aColor / 128)); - float2 colorTexMatrix0Coord = (metadataEntryCoord + float2(0.5)) * textureMetadataScale; - float2 colorTexOffsetsCoord = (metadataEntryCoord + float2(1.5, 0.5)) * textureMetadataScale; - float2 baseColorCoord = (metadataEntryCoord + float2(2.5, 0.5)) * textureMetadataScale; - float4 colorTexMatrix0 = uTextureMetadata.sample(uTextureMetadataSmplr, colorTexMatrix0Coord, level(0.0)); - float4 colorTexOffsets = uTextureMetadata.sample(uTextureMetadataSmplr, colorTexOffsetsCoord, level(0.0)); - float4 baseColor = uTextureMetadata.sample(uTextureMetadataSmplr, baseColorCoord, level(0.0)); - out.vColorTexCoord0 = (float2x2(float2(colorTexMatrix0.xy), float2(colorTexMatrix0.zw)) * position) + colorTexOffsets.xy; - out.vMaskTexCoord0 = float3(maskTexCoord0, float(in.aMaskBackdrop.x)); - out.vBaseColor = baseColor; - out.vTileCtrl = float(in.aTileCtrl); - out.gl_Position = uTransform * float4(position, 0.0, 1.0); - return out; -} - diff --git a/resources/shaders/metal/tile_clip.vs.metal b/resources/shaders/metal/tile_clip.vs.metal deleted file mode 100644 index 3e3a6baf..00000000 --- a/resources/shaders/metal/tile_clip.vs.metal +++ /dev/null @@ -1,32 +0,0 @@ -// Automatically generated from files in pathfinder/shaders/. Do not edit! -#include -#include - -using namespace metal; - -struct main0_out -{ - float2 vTexCoord [[user(locn0)]]; - float vBackdrop [[user(locn1)]]; - float4 gl_Position [[position]]; -}; - -struct main0_in -{ - int2 aTileOffset [[attribute(0)]]; - int2 aDestTileOrigin [[attribute(1)]]; - int2 aSrcTileOrigin [[attribute(2)]]; - int aSrcBackdrop [[attribute(3)]]; -}; - -vertex main0_out main0(main0_in in [[stage_in]]) -{ - main0_out out = {}; - float2 destPosition = float2(in.aDestTileOrigin + in.aTileOffset) / float2(256.0); - float2 srcPosition = float2(in.aSrcTileOrigin + in.aTileOffset) / float2(256.0); - out.vTexCoord = srcPosition; - out.vBackdrop = float(in.aSrcBackdrop); - out.gl_Position = float4(mix(float2(-1.0), float2(1.0), destPosition), 0.0, 1.0); - return out; -} - diff --git a/shaders/Makefile b/shaders/Makefile index c121d3a1..15d8a329 100644 --- a/shaders/Makefile +++ b/shaders/Makefile @@ -3,36 +3,47 @@ TARGET_DIR?=../resources/shaders EMPTY= SHADERS=\ + d3d9/fill.fs.glsl \ + d3d9/fill.vs.glsl \ + d3d9/tile.fs.glsl \ + d3d9/tile.vs.glsl \ + d3d9/tile_clip_combine.fs.glsl \ + d3d9/tile_clip_combine.vs.glsl \ + d3d9/tile_clip_copy.fs.glsl \ + d3d9/tile_clip_copy.vs.glsl \ + d3d9/tile_copy.fs.glsl \ + d3d9/tile_copy.vs.glsl \ + debug/solid.fs.glsl \ + debug/solid.vs.glsl \ + debug/texture.fs.glsl \ + debug/texture.vs.glsl \ blit.fs.glsl \ blit.vs.glsl \ clear.fs.glsl \ clear.vs.glsl \ - debug_solid.fs.glsl \ - debug_solid.vs.glsl \ - debug_texture.fs.glsl \ - debug_texture.vs.glsl \ demo_ground.fs.glsl \ demo_ground.vs.glsl \ - fill.fs.glsl \ - fill.vs.glsl \ reproject.fs.glsl \ reproject.vs.glsl \ stencil.fs.glsl \ stencil.vs.glsl \ - tile.fs.glsl \ - tile.vs.glsl \ - tile_clip.fs.glsl \ - tile_clip.vs.glsl \ - tile_copy.fs.glsl \ - tile_copy.vs.glsl \ $(EMPTY) COMPUTE_SHADERS=\ - fill.cs.glsl \ + d3d11/bin.cs.glsl \ + d3d11/bound.cs.glsl \ + d3d11/dice.cs.glsl \ + d3d11/fill.cs.glsl \ + d3d11/propagate.cs.glsl \ + d3d11/sort.cs.glsl \ + d3d11/tile.cs.glsl \ $(EMPTY) INCLUDES=\ - fill.inc.glsl \ + d3d11/fill_compute.inc.glsl \ + fill_area.inc.glsl \ + tile_fragment.inc.glsl \ + tile_vertex.inc.glsl \ $(EMPTY) OUT=\ @@ -58,6 +69,10 @@ HEADER="// Automatically generated from files in pathfinder/shaders/. Do not edi GLSL_SED_ARGS=-e "s/\#version .*//" -e "s/\#line.*$$//" +GLSL_SHADER_TYPE.fs=frag +GLSL_SHADER_TYPE.vs=vert +GLSL_SHADER_TYPE.cs=comp + all: $(OUT) .PHONY: clean @@ -65,29 +80,14 @@ all: $(OUT) clean: rm -f $(OUT) -build/metal/%.fs.spv: %.fs.glsl $(INCLUDES) - mkdir -p build/metal && glslangValidator $(GLSLANGFLAGS_METAL) -G$(GLSL_VERSION) -S frag -o $@ $< +build/metal/%.spv: %.glsl $(INCLUDES) + mkdir -p $(dir $@) && glslangValidator $(GLSLANGFLAGS_METAL) -G$(GLSL_VERSION) -S $(GLSL_SHADER_TYPE$(suffix $(basename $(notdir $<)))) -o $@ $< -$(TARGET_DIR)/gl3/%.fs.glsl: %.fs.glsl $(INCLUDES) - mkdir -p $(TARGET_DIR)/gl3 && echo $(GLSL_VERSION_HEADER) > $@ && echo $(HEADER) >> $@ && ( glslangValidator $(GLSLANGFLAGS) -S frag -E $< | sed $(GLSL_SED_ARGS) >> $@ ) || ( rm $@ && exit 1 ) +$(TARGET_DIR)/gl3/%.glsl: %.glsl $(INCLUDES) + mkdir -p $(dir $@) && echo $(GLSL_VERSION_HEADER) > $@ && echo $(HEADER) >> $@ && ( glslangValidator $(GLSLANGFLAGS) -S $(GLSL_SHADER_TYPE$(suffix $(basename $(notdir $<)))) -E $< | sed $(GLSL_SED_ARGS) >> $@ ) || ( rm $@ && exit 1 ) -$(TARGET_DIR)/gl4/%.fs.glsl: %.fs.glsl $(INCLUDES) - mkdir -p $(TARGET_DIR)/gl4 && echo $(GLSL_VERSION_HEADER) > $@ && echo $(HEADER) >> $@ && ( glslangValidator $(GLSLANGFLAGS) -S frag -E $< | sed $(GLSL_SED_ARGS) >> $@ ) || ( rm $@ && exit 1 ) +$(TARGET_DIR)/gl4/%.glsl: %.glsl $(INCLUDES) + mkdir -p $(dir $@) && echo $(GLSL_VERSION_HEADER) > $@ && echo $(HEADER) >> $@ && ( glslangValidator $(GLSLANGFLAGS) -S $(GLSL_SHADER_TYPE$(suffix $(basename $(notdir $<)))) -E $< | sed $(GLSL_SED_ARGS) >> $@ ) || ( rm $@ && exit 1 ) -build/metal/%.vs.spv: %.vs.glsl $(INCLUDES) - mkdir -p build/metal && glslangValidator $(GLSLANGFLAGS_METAL) -G$(GLSL_VERSION) -S vert -o $@ $< - -$(TARGET_DIR)/gl3/%.vs.glsl: %.vs.glsl $(INCLUDES) - mkdir -p $(TARGET_DIR)/gl3 && echo $(GLSL_VERSION_HEADER) > $@ && echo $(HEADER) >> $@ && ( glslangValidator $(GLSLANGFLAGS) -S vert -E $< | sed $(GLSL_SED_ARGS) >> $@ ) || ( rm $@ && exit 1 ) - -$(TARGET_DIR)/gl4/%.vs.glsl: %.vs.glsl $(INCLUDES) - mkdir -p $(TARGET_DIR)/gl3 && echo $(GLSL_VERSION_HEADER) > $@ && echo $(HEADER) >> $@ && ( glslangValidator $(GLSLANGFLAGS) -S vert -E $< | sed $(GLSL_SED_ARGS) >> $@ ) || ( rm $@ && exit 1 ) - -build/metal/%.cs.spv: %.cs.glsl $(INCLUDES) - mkdir -p build/metal && glslangValidator $(GLSLANGFLAGS_METAL) -G$(GLSL_COMPUTE_VERSION) -S comp -o $@ $< - -$(TARGET_DIR)/gl4/%.cs.glsl: %.cs.glsl $(INCLUDES) - mkdir -p $(TARGET_DIR)/gl4 && echo $(GLSL_VERSION_HEADER) > $@ && echo $(HEADER) >> $@ && ( glslangValidator $(GLSLANGFLAGS) -S vert -E $< | sed $(GLSL_SED_ARGS) >> $@ ) || ( rm $@ && exit 1 ) - -$(TARGET_DIR)/metal/%.metal: build/metal/%.spv - mkdir -p $(TARGET_DIR)/metal && echo $(HEADER) > $@ && ( $(SPIRVCROSS) $(SPIRVCROSSFLAGS) $< >> $@ ) || ( rm $@ && exit 1 ) +$(TARGET_DIR)/metal/%.metal: build/metal/%.spv + mkdir -p $(dir $@) && echo $(HEADER) > $@ && ( $(SPIRVCROSS) $(SPIRVCROSSFLAGS) $< >> $@ ) || ( rm $@ && exit 1 ) diff --git a/shaders/blit.fs.glsl b/shaders/blit.fs.glsl index 8d461ed4..e257ba64 100644 --- a/shaders/blit.fs.glsl +++ b/shaders/blit.fs.glsl @@ -24,5 +24,5 @@ out vec4 oFragColor; void main() { vec4 color = texture(uSrc, vTexCoord); - oFragColor = vec4(color.rgb * color.a, color.a); + oFragColor = color; } diff --git a/shaders/blit.vs.glsl b/shaders/blit.vs.glsl index ca3b0f7c..9077fa8a 100644 --- a/shaders/blit.vs.glsl +++ b/shaders/blit.vs.glsl @@ -16,15 +16,16 @@ precision highp float; precision highp sampler2D; #endif +uniform vec4 uDestRect; +uniform vec2 uFramebufferSize; + in ivec2 aPosition; out vec2 vTexCoord; void main() { + vec2 position = mix(uDestRect.xy, uDestRect.zw, vec2(aPosition)) / uFramebufferSize; vec2 texCoord = vec2(aPosition); -#ifdef PF_ORIGIN_UPPER_LEFT - texCoord.y = 1.0 - texCoord.y; -#endif vTexCoord = texCoord; - gl_Position = vec4(mix(vec2(-1.0), vec2(1.0), vec2(aPosition)), 0.0, 1.0); + gl_Position = vec4(mix(vec2(-1.0), vec2(1.0), position), 0.0, 1.0); } diff --git a/shaders/d3d11/bin.cs.glsl b/shaders/d3d11/bin.cs.glsl new file mode 100644 index 00000000..d34260eb --- /dev/null +++ b/shaders/d3d11/bin.cs.glsl @@ -0,0 +1,256 @@ +#version 430 + +// pathfinder/shaders/bin.cs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Assigns microlines to tiles. + +#extension GL_GOOGLE_include_directive : enable + +#define MAX_ITERATIONS 1024u + +#define STEP_DIRECTION_NONE 0 +#define STEP_DIRECTION_X 1 +#define STEP_DIRECTION_Y 2 + +#define TILE_FIELD_NEXT_TILE_ID 0 +#define TILE_FIELD_FIRST_FILL_ID 1 +#define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2 +#define TILE_FIELD_CONTROL 3 + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +layout(local_size_x = 64) in; + +uniform int uMicrolineCount; +// How many slots we have allocated for fills. +uniform int uMaxFillCount; + +layout(std430, binding = 0) buffer bMicrolines { + restrict readonly uvec4 iMicrolines[]; +}; + +layout(std430, binding = 1) buffer bMetadata { + // [0]: tile rect + // [1].x: tile offset + // [1].y: path ID + // [1].z: z write flag + // [1].w: clip path ID + // [2].x: backdrop offset + restrict readonly ivec4 iMetadata[]; +}; + +// [0]: vertexCount (6) +// [1]: instanceCount (of fills) +// [2]: vertexStart (0) +// [3]: baseInstance (0) +// [4]: alpha tile count +layout(std430, binding = 2) buffer bIndirectDrawParams { + restrict uint iIndirectDrawParams[]; +}; + +layout(std430, binding = 3) buffer bFills { + restrict writeonly uint iFills[]; +}; + +layout(std430, binding = 4) buffer bTiles { + // [0]: next tile ID (initialized to -1) + // [1]: first fill ID (initialized to -1) + // [2]: backdrop delta upper 8 bits, alpha tile ID lower 24 (initialized to 0, -1 respectively) + // [3]: color/ctrl/backdrop word + restrict uint iTiles[]; +}; + +layout(std430, binding = 5) buffer bBackdrops { + // [0]: backdrop + // [1]: tile X offset + // [2]: path ID + restrict uint iBackdrops[]; +}; + +uint computeTileIndexNoCheck(ivec2 tileCoords, ivec4 pathTileRect, uint pathTileOffset) { + ivec2 offsetCoords = tileCoords - pathTileRect.xy; + return pathTileOffset + offsetCoords.x + offsetCoords.y * (pathTileRect.z - pathTileRect.x); +} + +bvec4 computeTileOutcodes(ivec2 tileCoords, ivec4 pathTileRect) { + return bvec4(lessThan(tileCoords, pathTileRect.xy), + greaterThanEqual(tileCoords, pathTileRect.zw)); +} + +bool computeTileIndex(ivec2 tileCoords, + ivec4 pathTileRect, + uint pathTileOffset, + out uint outTileIndex) { + outTileIndex = computeTileIndexNoCheck(tileCoords, pathTileRect, pathTileOffset); + return !any(computeTileOutcodes(tileCoords, pathTileRect)); +} + +void addFill(vec4 lineSegment, ivec2 tileCoords, ivec4 pathTileRect, uint pathTileOffset) { + // Compute tile offset. If out of bounds, cull. + uint tileIndex; + if (!computeTileIndex(tileCoords, pathTileRect, pathTileOffset, tileIndex)) { + return; + } + + // Clip line. If too narrow, cull. + uvec4 scaledLocalLine = uvec4((lineSegment - vec4(tileCoords.xyxy * ivec4(16))) * vec4(256.0)); + if (scaledLocalLine.x == scaledLocalLine.z) + return; + + // Bump instance count. + uint fillIndex = atomicAdd(iIndirectDrawParams[1], 1); + + // Fill out the link field, inserting into the linked list. + uint fillLink = atomicExchange(iTiles[tileIndex * 4 + TILE_FIELD_FIRST_FILL_ID], + int(fillIndex)); + + // Write fill. + if (fillIndex < uMaxFillCount) { + iFills[fillIndex * 3 + 0] = scaledLocalLine.x | (scaledLocalLine.y << 16); + iFills[fillIndex * 3 + 1] = scaledLocalLine.z | (scaledLocalLine.w << 16); + iFills[fillIndex * 3 + 2] = fillLink; + } +} + +void adjustBackdrop(int backdropDelta, + ivec2 tileCoords, + ivec4 pathTileRect, + uint pathTileOffset, + uint pathBackdropOffset) { + bvec4 outcodes = computeTileOutcodes(tileCoords, pathTileRect); + if (any(outcodes)) { + if (!outcodes.x && outcodes.y && !outcodes.z) { + uint backdropIndex = pathBackdropOffset + uint(tileCoords.x - pathTileRect.x); + atomicAdd(iBackdrops[backdropIndex * 3], backdropDelta); + } + } else { + uint tileIndex = computeTileIndexNoCheck(tileCoords, pathTileRect, pathTileOffset); + atomicAdd(iTiles[tileIndex * 4 + TILE_FIELD_BACKDROP_ALPHA_TILE_ID], + uint(backdropDelta) << 24); + } +} + +vec4 unpackMicroline(uvec4 packedMicroline, out uint outPathIndex) { + outPathIndex = packedMicroline.w; + ivec4 signedMicroline = ivec4(packedMicroline); + return vec4((signedMicroline.x << 16) >> 16, signedMicroline.x >> 16, + (signedMicroline.y << 16) >> 16, signedMicroline.y >> 16) + + vec4(signedMicroline.z & 0xff, (signedMicroline.z >> 8) & 0xff, + (signedMicroline.z >> 16) & 0xff, (signedMicroline.z >> 24) & 0xff) / 256.0; +} + +void main() { + uint segmentIndex = gl_GlobalInvocationID.x; + if (segmentIndex >= uMicrolineCount) + return; + + uint pathIndex; + vec4 lineSegment = unpackMicroline(iMicrolines[segmentIndex], pathIndex); + + ivec4 pathTileRect = iMetadata[pathIndex * 3 + 0]; + uint pathTileOffset = uint(iMetadata[pathIndex * 3 + 1].x); + uint pathBackdropOffset = uint(iMetadata[pathIndex * 3 + 2].x); + + // Following is a straight port of `process_line_segment()`: + + ivec2 tileSize = ivec2(16); + + ivec4 tileLineSegment = ivec4(floor(lineSegment / vec4(tileSize.xyxy))); + ivec2 fromTileCoords = tileLineSegment.xy, toTileCoords = tileLineSegment.zw; + + vec2 vector = lineSegment.zw - lineSegment.xy; + vec2 vectorIsNegative = vec2(vector.x < 0.0 ? -1.0 : 0.0, vector.y < 0.0 ? -1.0 : 0.0); + ivec2 tileStep = ivec2(vector.x < 0.0 ? -1 : 1, vector.y < 0.0 ? -1 : 1); + + vec2 firstTileCrossing = vec2((fromTileCoords + ivec2(vector.x >= 0.0 ? 1 : 0, + vector.y >= 0.0 ? 1 : 0)) * tileSize); + + vec2 tMax = (firstTileCrossing - lineSegment.xy) / vector; + vec2 tDelta = abs(tileSize / vector); + + vec2 currentPosition = lineSegment.xy; + ivec2 tileCoords = fromTileCoords; + int lastStepDirection = STEP_DIRECTION_NONE; + uint iteration = 0; + + while (iteration < MAX_ITERATIONS) { + int nextStepDirection; + if (tMax.x < tMax.y) + nextStepDirection = STEP_DIRECTION_X; + else if (tMax.x > tMax.y) + nextStepDirection = STEP_DIRECTION_Y; + else if (tileStep.x > 0.0) + nextStepDirection = STEP_DIRECTION_X; + else + nextStepDirection = STEP_DIRECTION_Y; + + float nextT = min(nextStepDirection == STEP_DIRECTION_X ? tMax.x : tMax.y, 1.0); + + // If we've reached the end tile, don't step at all. + if (tileCoords == toTileCoords) + nextStepDirection = STEP_DIRECTION_NONE; + + vec2 nextPosition = mix(lineSegment.xy, lineSegment.zw, nextT); + vec4 clippedLineSegment = vec4(currentPosition, nextPosition); + addFill(clippedLineSegment, tileCoords, pathTileRect, pathTileOffset); + + // Add extra fills if necessary. + vec4 auxiliarySegment; + bool haveAuxiliarySegment = false; + if (tileStep.y < 0 && nextStepDirection == STEP_DIRECTION_Y) { + auxiliarySegment = vec4(clippedLineSegment.zw, vec2(tileCoords * tileSize)); + haveAuxiliarySegment = true; + } else if (tileStep.y > 0 && lastStepDirection == STEP_DIRECTION_Y) { + auxiliarySegment = vec4(vec2(tileCoords * tileSize), clippedLineSegment.xy); + haveAuxiliarySegment = true; + } + if (haveAuxiliarySegment) + addFill(auxiliarySegment, tileCoords, pathTileRect, pathTileOffset); + + // Adjust backdrop if necessary. + // + // NB: Do not refactor the calls below. This exact code sequence is needed to avoid a + // miscompilation on the Radeon Metal compiler. + if (tileStep.x < 0 && lastStepDirection == STEP_DIRECTION_X) { + adjustBackdrop(1, + tileCoords, + pathTileRect, + pathTileOffset, + pathBackdropOffset); + } else if (tileStep.x > 0 && nextStepDirection == STEP_DIRECTION_X) { + adjustBackdrop(-1, + tileCoords, + pathTileRect, + pathTileOffset, + pathBackdropOffset); + } + + // Take a step. + if (nextStepDirection == STEP_DIRECTION_X) { + tMax.x += tDelta.x; + tileCoords.x += tileStep.x; + } else if (nextStepDirection == STEP_DIRECTION_Y) { + tMax.y += tDelta.y; + tileCoords.y += tileStep.y; + } else if (nextStepDirection == STEP_DIRECTION_NONE) { + break; + } + + currentPosition = nextPosition; + lastStepDirection = nextStepDirection; + + iteration++; + } +} diff --git a/shaders/d3d11/bound.cs.glsl b/shaders/d3d11/bound.cs.glsl new file mode 100644 index 00000000..6f29b2f2 --- /dev/null +++ b/shaders/d3d11/bound.cs.glsl @@ -0,0 +1,84 @@ +#version 430 + +// pathfinder/shaders/bound.cs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Initializes the tile maps. + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +#define TILE_FIELD_NEXT_TILE_ID 0 +#define TILE_FIELD_FIRST_FILL_ID 1 +#define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2 +#define TILE_FIELD_CONTROL 3 + +layout(local_size_x = 64) in; + +uniform int uPathCount; +uniform int uTileCount; + +layout(std430, binding = 0) buffer bTilePathInfo { + // x: tile upper left, 16-bit packed x/y + // y: tile lower right, 16-bit packed x/y + // z: first tile index in this path + // w: color/ctrl/backdrop word + restrict readonly uvec4 iTilePathInfo[]; +}; + +layout(std430, binding = 1) buffer bTiles { + // [0]: next tile ID (initialized to -1) + // [1]: first fill ID (initialized to -1) + // [2]: backdrop delta upper 8 bits, alpha tile ID lower 24 (initialized to 0, -1 respectively) + // [3]: color/ctrl/backdrop word + restrict uint iTiles[]; +}; + +void main() { + uint tileIndex = gl_GlobalInvocationID.x; + if (tileIndex >= uint(uTileCount)) + return; + + uint lowPathIndex = 0, highPathIndex = uint(uPathCount); + int iteration = 0; + while (iteration < 1024 && lowPathIndex + 1 < highPathIndex) { + uint midPathIndex = lowPathIndex + (highPathIndex - lowPathIndex) / 2; + uint midTileIndex = iTilePathInfo[midPathIndex].z; + if (tileIndex < midTileIndex) { + highPathIndex = midPathIndex; + } else { + lowPathIndex = midPathIndex; + if (tileIndex == midTileIndex) + break; + } + iteration++; + } + + uint pathIndex = lowPathIndex; + uvec4 pathInfo = iTilePathInfo[pathIndex]; + + ivec2 packedTileRect = ivec2(pathInfo.xy); + ivec4 tileRect = ivec4((packedTileRect.x << 16) >> 16, packedTileRect.x >> 16, + (packedTileRect.y << 16) >> 16, packedTileRect.y >> 16); + + uint tileOffset = tileIndex - pathInfo.z; + uint tileWidth = uint(tileRect.z - tileRect.x); + ivec2 tileCoords = tileRect.xy + ivec2(tileOffset % tileWidth, tileOffset / tileWidth); + + iTiles[tileIndex * 4 + TILE_FIELD_NEXT_TILE_ID] = ~0u; + iTiles[tileIndex * 4 + TILE_FIELD_FIRST_FILL_ID] = ~0u; + iTiles[tileIndex * 4 + TILE_FIELD_BACKDROP_ALPHA_TILE_ID] = 0x00ffffffu; + iTiles[tileIndex * 4 + TILE_FIELD_CONTROL] = pathInfo.w; +} diff --git a/shaders/d3d11/dice.cs.glsl b/shaders/d3d11/dice.cs.glsl new file mode 100644 index 00000000..cc1ab72c --- /dev/null +++ b/shaders/d3d11/dice.cs.glsl @@ -0,0 +1,217 @@ +#version 430 + +// pathfinder/shaders/dice.cs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Chops lines and curves into microlines. + +#extension GL_GOOGLE_include_directive : enable + +#define BIN_WORKGROUP_SIZE 64 + +#define MAX_CURVE_STACK_SIZE 32 + +#define FLAGS_PATH_INDEX_CURVE_IS_QUADRATIC 0x80000000u +#define FLAGS_PATH_INDEX_CURVE_IS_CUBIC 0x40000000u + +#define BIN_INDIRECT_DRAW_PARAMS_MICROLINE_COUNT_INDEX 3 + +#define TOLERANCE 0.25 +#define MICROLINE_LENGTH 16.0 + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +layout(local_size_x = 64) in; + +uniform mat2 uTransform; +uniform vec2 uTranslation; +uniform int uPathCount; +uniform int uLastBatchSegmentIndex; +uniform int uMaxMicrolineCount; + +layout(std430, binding = 0) buffer bComputeIndirectParams { + // [0]: number of x workgroups + // [1]: number of y workgroups (always 1) + // [2]: number of z workgroups (always 1) + // [3]: number of output microlines + restrict uint iComputeIndirectParams[]; +}; + +// Indexed by batch path index. +layout(std430, binding = 1) buffer bDiceMetadata { + // x: global path ID + // y: first global segment index + // z: first batch segment index + // w: unused + restrict readonly uvec4 iDiceMetadata[]; +}; + +layout(std430, binding = 2) buffer bPoints { + restrict readonly vec2 iPoints[]; +}; + +layout(std430, binding = 3) buffer bInputIndices { + restrict readonly uvec2 iInputIndices[]; +}; + +layout(std430, binding = 4) buffer bMicrolines { + // x: from (X, Y) whole pixels, packed signed 16-bit + // y: to (X, Y) whole pixels, packed signed 16-bit + // z: (from X, from Y, to X, to Y) fractional pixels, packed unsigned 8-bit (0.8 fixed point) + // w: path ID + restrict uvec4 iMicrolines[]; +}; + +void emitMicroline(vec4 microlineSegment, uint pathIndex, uint outputMicrolineIndex) { + if (outputMicrolineIndex >= uMaxMicrolineCount) + return; + + ivec4 microlineSubpixels = ivec4(round(clamp(microlineSegment, -32768.0, 32767.0) * 256.0)); + ivec4 microlinePixels = ivec4(floor(vec4(microlineSubpixels) / 256.0)); + ivec4 microlineFractPixels = microlineSubpixels - microlinePixels * 256; + + iMicrolines[outputMicrolineIndex] = + uvec4((uint(microlinePixels.x) & 0xffff) | (uint(microlinePixels.y) << 16), + (uint(microlinePixels.z) & 0xffff) | (uint(microlinePixels.w) << 16), + uint(microlineFractPixels.x) | (uint(microlineFractPixels.y) << 8) | + (uint(microlineFractPixels.z) << 16) | (uint(microlineFractPixels.w) << 24), + pathIndex); +} + +// See Kaspar Fischer, "Piecewise Linear Approximation of Bézier Curves", 2000. +bool curveIsFlat(vec4 baseline, vec4 ctrl) { + vec4 uv = vec4(3.0) * ctrl - vec4(2.0) * baseline - baseline.zwxy; + uv *= uv; + uv = max(uv, uv.zwxy); + return uv.x + uv.y <= 16.0 * TOLERANCE * TOLERANCE; +} + +void subdivideCurve(vec4 baseline, + vec4 ctrl, + float t, + out vec4 prevBaseline, + out vec4 prevCtrl, + out vec4 nextBaseline, + out vec4 nextCtrl) { + vec2 p0 = baseline.xy, p1 = ctrl.xy, p2 = ctrl.zw, p3 = baseline.zw; + vec2 p0p1 = mix(p0, p1, t), p1p2 = mix(p1, p2, t), p2p3 = mix(p2, p3, t); + vec2 p0p1p2 = mix(p0p1, p1p2, t), p1p2p3 = mix(p1p2, p2p3, t); + vec2 p0p1p2p3 = mix(p0p1p2, p1p2p3, t); + prevBaseline = vec4(p0, p0p1p2p3); + prevCtrl = vec4(p0p1, p0p1p2); + nextBaseline = vec4(p0p1p2p3, p3); + nextCtrl = vec4(p1p2p3, p2p3); +} + +vec2 sampleCurve(vec4 baseline, vec4 ctrl, float t) { + vec2 p0 = baseline.xy, p1 = ctrl.xy, p2 = ctrl.zw, p3 = baseline.zw; + vec2 p0p1 = mix(p0, p1, t), p1p2 = mix(p1, p2, t), p2p3 = mix(p2, p3, t); + vec2 p0p1p2 = mix(p0p1, p1p2, t), p1p2p3 = mix(p1p2, p2p3, t); + return mix(p0p1p2, p1p2p3, t); +} + +vec2 sampleLine(vec4 line, float t) { + return mix(line.xy, line.zw, t); +} + +vec2 getPoint(uint pointIndex) { + return uTransform * iPoints[pointIndex] + uTranslation; +} + +void main() { + uint batchSegmentIndex = gl_GlobalInvocationID.x; + if (batchSegmentIndex >= uLastBatchSegmentIndex) + return; + + // Find the path index. + uint lowPathIndex = 0, highPathIndex = uint(uPathCount); + int iteration = 0; + while (iteration < 1024 && lowPathIndex + 1 < highPathIndex) { + uint midPathIndex = lowPathIndex + (highPathIndex - lowPathIndex) / 2; + uint midBatchSegmentIndex = iDiceMetadata[midPathIndex].z; + if (batchSegmentIndex < midBatchSegmentIndex) { + highPathIndex = midPathIndex; + } else { + lowPathIndex = midPathIndex; + if (batchSegmentIndex == midBatchSegmentIndex) + break; + } + iteration++; + } + + uint batchPathIndex = lowPathIndex; + uvec4 diceMetadata = iDiceMetadata[batchPathIndex]; + uint firstGlobalSegmentIndexInPath = diceMetadata.y; + uint firstBatchSegmentIndexInPath = diceMetadata.z; + uint globalSegmentIndex = batchSegmentIndex - firstBatchSegmentIndexInPath + + firstGlobalSegmentIndexInPath; + + uvec2 inputIndices = iInputIndices[globalSegmentIndex]; + uint fromPointIndex = inputIndices.x, flagsPathIndex = inputIndices.y; + + uint toPointIndex = fromPointIndex; + if ((flagsPathIndex & FLAGS_PATH_INDEX_CURVE_IS_CUBIC) != 0u) + toPointIndex += 3; + else if ((flagsPathIndex & FLAGS_PATH_INDEX_CURVE_IS_QUADRATIC) != 0u) + toPointIndex += 2; + else + toPointIndex += 1; + + vec4 baseline = vec4(getPoint(fromPointIndex), getPoint(toPointIndex)); + + // Read control points if applicable, and calculate number of segments. + // + // The technique is from Thomas Sederberg, "Computer-Aided Geometric Design" notes, section + // 10.6 "Error Bounds". + vec4 ctrl = vec4(0.0); + float segmentCountF; + bool isCurve = (flagsPathIndex & (FLAGS_PATH_INDEX_CURVE_IS_CUBIC | + FLAGS_PATH_INDEX_CURVE_IS_QUADRATIC)) != 0; + if (isCurve) { + vec2 ctrl0 = getPoint(fromPointIndex + 1); + if ((flagsPathIndex & FLAGS_PATH_INDEX_CURVE_IS_QUADRATIC) != 0) { + vec2 ctrl0_2 = ctrl0 * vec2(2.0); + ctrl = (baseline + (ctrl0 * vec2(2.0)).xyxy) * vec4(1.0 / 3.0); + } else { + ctrl = vec4(ctrl0, getPoint(fromPointIndex + 2)); + } + vec2 bound = vec2(6.0) * max(abs(ctrl.zw - 2.0 * ctrl.xy + baseline.xy), + abs(baseline.zw - 2.0 * ctrl.zw + ctrl.xy)); + segmentCountF = sqrt(length(bound) / (8.0 * TOLERANCE)); + } else { + segmentCountF = length(baseline.zw - baseline.xy) / MICROLINE_LENGTH; + } + + // Allocate space. + int segmentCount = max(int(ceil(segmentCountF)), 1); + uint firstOutputMicrolineIndex = + atomicAdd(iComputeIndirectParams[BIN_INDIRECT_DRAW_PARAMS_MICROLINE_COUNT_INDEX], + segmentCount); + + float prevT = 0.0; + vec2 prevPoint = baseline.xy; + for (int segmentIndex = 0; segmentIndex < segmentCount; segmentIndex++) { + float nextT = float(segmentIndex + 1) / float(segmentCount); + vec2 nextPoint; + if (isCurve) + nextPoint = sampleCurve(baseline, ctrl, nextT); + else + nextPoint = sampleLine(baseline, nextT); + emitMicroline(vec4(prevPoint, nextPoint), + batchPathIndex, + firstOutputMicrolineIndex + segmentIndex); + prevT = nextT; + prevPoint = nextPoint; + } +} diff --git a/shaders/d3d11/fill.cs.glsl b/shaders/d3d11/fill.cs.glsl new file mode 100644 index 00000000..c3e2ff35 --- /dev/null +++ b/shaders/d3d11/fill.cs.glsl @@ -0,0 +1,88 @@ +#version 430 + +// pathfinder/shaders/fill.cs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +#include "fill_area.inc.glsl" + +layout(local_size_x = 16, local_size_y = 4) in; + +#define TILE_FIELD_NEXT_TILE_ID 0 +#define TILE_FIELD_FIRST_FILL_ID 1 +#define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2 +#define TILE_FIELD_CONTROL 3 + +layout(rgba8) uniform image2D uDest; +uniform sampler2D uAreaLUT; +uniform ivec2 uAlphaTileRange; + +layout(std430, binding = 0) buffer bFills { + restrict readonly uint iFills[]; +}; + +layout(std430, binding = 1) buffer bTiles { + // [0]: path ID + // [1]: next tile ID + // [2]: first fill ID + // [3]: backdrop delta upper 8 bits, alpha tile ID lower 24 bits + // [4]: color/ctrl/backdrop word + restrict uint iTiles[]; +}; + +layout(std430, binding = 2) buffer bAlphaTiles { + // [0]: alpha tile index + // [1]: clip tile index + restrict readonly uint iAlphaTiles[]; +}; + +#include "fill_compute.inc.glsl" + +ivec2 computeTileCoord(uint alphaTileIndex) { + uint x = alphaTileIndex & 0xff; + uint y = (alphaTileIndex >> 8u) & 0xff + (((alphaTileIndex >> 16u) & 0xff) << 8u); + return ivec2(16, 4) * ivec2(x, y) + ivec2(gl_LocalInvocationID.xy); +} + +void main() { + ivec2 tileSubCoord = ivec2(gl_LocalInvocationID.xy) * ivec2(1, 4); + + // This is a workaround for the 64K workgroup dispatch limit in OpenGL. + uint batchAlphaTileIndex = (gl_WorkGroupID.x | (gl_WorkGroupID.y << 15)); + uint alphaTileIndex = batchAlphaTileIndex + uint(uAlphaTileRange.x); + if (alphaTileIndex >= uint(uAlphaTileRange.y)) + return; + + uint tileIndex = iAlphaTiles[batchAlphaTileIndex * 2 + 0]; + if ((int(iTiles[tileIndex * 4 + TILE_FIELD_BACKDROP_ALPHA_TILE_ID] << 8) >> 8) < 0) + return; + + int fillIndex = int(iTiles[tileIndex * 4 + TILE_FIELD_FIRST_FILL_ID]); + int backdrop = int(iTiles[tileIndex * 4 + TILE_FIELD_CONTROL]) >> 24; + + // TODO(pcwalton): Handle even-odd fill rule. + vec4 coverages = vec4(backdrop); + coverages += accumulateCoverageForFillList(fillIndex, tileSubCoord); + coverages = clamp(abs(coverages), 0.0, 1.0); + + // Handle clip if necessary. + int clipTileIndex = int(iAlphaTiles[batchAlphaTileIndex * 2 + 1]); + if (clipTileIndex >= 0) + coverages = min(coverages, imageLoad(uDest, computeTileCoord(clipTileIndex))); + + imageStore(uDest, computeTileCoord(alphaTileIndex), coverages); +} diff --git a/shaders/d3d11/fill_compute.inc.glsl b/shaders/d3d11/fill_compute.inc.glsl new file mode 100644 index 00000000..eaee1546 --- /dev/null +++ b/shaders/d3d11/fill_compute.inc.glsl @@ -0,0 +1,25 @@ +// pathfinder/shaders/fill_compute.inc.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +vec4 accumulateCoverageForFillList(int fillIndex, ivec2 tileSubCoord) { + vec2 tileFragCoord = vec2(tileSubCoord) + vec2(0.5); + vec4 coverages = vec4(0.0); + int iteration = 0; + do { + uint fillFrom = iFills[fillIndex * 3 + 0], fillTo = iFills[fillIndex * 3 + 1]; + vec4 lineSegment = vec4(fillFrom & 0xffff, fillFrom >> 16, + fillTo & 0xffff, fillTo >> 16) / 256.0; + lineSegment -= tileFragCoord.xyxy; + coverages += computeCoverage(lineSegment.xy, lineSegment.zw, uAreaLUT); + fillIndex = int(iFills[fillIndex * 3 + 2]); + iteration++; + } while (fillIndex >= 0 && iteration < 1024); + return coverages; +} diff --git a/shaders/d3d11/propagate.cs.glsl b/shaders/d3d11/propagate.cs.glsl new file mode 100644 index 00000000..6dd00060 --- /dev/null +++ b/shaders/d3d11/propagate.cs.glsl @@ -0,0 +1,224 @@ +#version 430 + +// pathfinder/shaders/propagate.cs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +// Sum up backdrops to propagate fills across tiles, and allocate alpha tiles. + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +layout(local_size_x = 64) in; + +#define TILE_FIELD_NEXT_TILE_ID 0 +#define TILE_FIELD_FIRST_FILL_ID 1 +#define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2 +#define TILE_FIELD_CONTROL 3 + +uniform ivec2 uFramebufferTileSize; +uniform int uColumnCount; +uniform int uFirstAlphaTileIndex; + +layout(std430, binding = 0) buffer bDrawMetadata { + // [0]: tile rect + // [1].x: tile offset + // [1].y: path ID + // [1].z: Z write enabled? + // [1].w: clip path ID, or ~0 + // [2].x: backdrop column offset + restrict readonly uvec4 iDrawMetadata[]; +}; + +layout(std430, binding = 1) buffer bClipMetadata { + // [0]: tile rect + // [1].x: tile offset + // [1].y: unused + // [1].z: unused + // [1].w: unused + restrict readonly uvec4 iClipMetadata[]; +}; + +layout(std430, binding = 2) buffer bBackdrops { + // [0]: backdrop + // [1]: tile X offset + // [2]: path ID + restrict readonly int iBackdrops[]; +}; + +layout(std430, binding = 3) buffer bDrawTiles { + // [0]: next tile ID + // [1]: first fill ID + // [2]: backdrop delta upper 8 bits, alpha tile ID lower 24 + // [3]: color/ctrl/backdrop word + restrict uint iDrawTiles[]; +}; + +layout(std430, binding = 4) buffer bClipTiles { + // [0]: next tile ID + // [1]: first fill ID + // [2]: backdrop delta upper 8 bits, alpha tile ID lower 24 + // [3]: color/ctrl/backdrop word + restrict uint iClipTiles[]; +}; + +layout(std430, binding = 5) buffer bZBuffer { + restrict int iZBuffer[]; +}; + +layout(std430, binding = 6) buffer bFirstTileMap { + restrict int iFirstTileMap[]; +}; + +layout(std430, binding = 7) buffer bIndirectDrawParams { + // [0]: vertexCount (6) + // [1]: instanceCount (of fills) + // [2]: vertexStart (0) + // [3]: baseInstance (0) + // [4]: alpha tile count + restrict uint iIndirectDrawParams[]; +}; + +layout(std430, binding = 8) buffer bAlphaTiles { + // [0]: alpha tile index + // [1]: clip tile index + restrict uint iAlphaTiles[]; +}; + +uint calculateTileIndex(uint bufferOffset, uvec4 tileRect, uvec2 tileCoord) { + return bufferOffset + tileCoord.y * (tileRect.z - tileRect.x) + tileCoord.x; +} + +void main() { + uint columnIndex = gl_GlobalInvocationID.x; + if (int(columnIndex) >= uColumnCount) + return; + + int currentBackdrop = iBackdrops[columnIndex * 3 + 0]; + int tileX = iBackdrops[columnIndex * 3 + 1]; + uint drawPathIndex = uint(iBackdrops[columnIndex * 3 + 2]); + + uvec4 drawTileRect = iDrawMetadata[drawPathIndex * 3 + 0]; + uvec4 drawOffsets = iDrawMetadata[drawPathIndex * 3 + 1]; + uvec2 drawTileSize = drawTileRect.zw - drawTileRect.xy; + uint drawTileBufferOffset = drawOffsets.x; + bool zWrite = drawOffsets.z != 0; + + int clipPathIndex = int(drawOffsets.w); + uvec4 clipTileRect = uvec4(0u), clipOffsets = uvec4(0u); + if (clipPathIndex >= 0) { + clipTileRect = iClipMetadata[clipPathIndex * 2 + 0]; + clipOffsets = iClipMetadata[clipPathIndex * 2 + 1]; + } + uint clipTileBufferOffset = clipOffsets.x, clipBackdropOffset = clipOffsets.y; + + for (uint tileY = 0; tileY < drawTileSize.y; tileY++) { + uvec2 drawTileCoord = uvec2(tileX, tileY); + uint drawTileIndex = calculateTileIndex(drawTileBufferOffset, drawTileRect, drawTileCoord); + + int drawAlphaTileIndex = -1; + int clipAlphaTileIndex = -1; + int drawFirstFillIndex = int(iDrawTiles[drawTileIndex * 4 + TILE_FIELD_FIRST_FILL_ID]); + int drawBackdropDelta = + int(iDrawTiles[drawTileIndex * 4 + TILE_FIELD_BACKDROP_ALPHA_TILE_ID]) >> 24; + uint drawTileWord = iDrawTiles[drawTileIndex * 4 + TILE_FIELD_CONTROL] & 0x00ffffff; + + int drawTileBackdrop = currentBackdrop; + bool haveDrawAlphaMask = drawFirstFillIndex >= 0; + bool needNewAlphaTile = haveDrawAlphaMask; + + // Handle clip if necessary. + if (clipPathIndex >= 0) { + uvec2 tileCoord = drawTileCoord + drawTileRect.xy; + if (all(bvec4(greaterThanEqual(tileCoord, clipTileRect.xy), + lessThan (tileCoord, clipTileRect.zw)))) { + uvec2 clipTileCoord = tileCoord - clipTileRect.xy; + uint clipTileIndex = calculateTileIndex(clipTileBufferOffset, + clipTileRect, + clipTileCoord); + +/* + clipAlphaTileIndex = + int(iClipTiles[clipTileIndex * 4 + + TILE_FIELD_BACKDROP_ALPHA_TILE_ID] << 8) >> 8; + */ + int thisClipAlphaTileIndex = + int(iClipTiles[clipTileIndex * 4 + + TILE_FIELD_BACKDROP_ALPHA_TILE_ID] << 8) >> 8; + + uint clipTileWord = iClipTiles[clipTileIndex * 4 + TILE_FIELD_CONTROL]; + int clipTileBackdrop = int(clipTileWord) >> 24; + + if (thisClipAlphaTileIndex >= 0) { + if (haveDrawAlphaMask) { + clipAlphaTileIndex = thisClipAlphaTileIndex; + needNewAlphaTile = true; + } else { + if (drawTileBackdrop != 0) { + // This is a solid draw tile, but there's a clip applied. Replace it with an + // alpha tile pointing directly to the clip mask. + drawAlphaTileIndex = thisClipAlphaTileIndex; + clipAlphaTileIndex = -1; + needNewAlphaTile = false; + } else { + // No draw alpha tile index, no clip alpha tile index. + drawAlphaTileIndex = -1; + clipAlphaTileIndex = -1; + needNewAlphaTile = false; + } + } + } else { + // No clip tile. + if (clipTileBackdrop == 0) { + // This is a blank clip tile. Cull the draw tile entirely. + drawTileBackdrop = 0; + needNewAlphaTile = false; + } else { + needNewAlphaTile = true; + } + } + } else { + // This draw tile is outside the clip path bounding rect. Cull the draw tile. + drawTileBackdrop = 0; + needNewAlphaTile = false; + } + } + + if (needNewAlphaTile) { + uint drawBatchAlphaTileIndex = atomicAdd(iIndirectDrawParams[4], 1); + iAlphaTiles[drawBatchAlphaTileIndex * 2 + 0] = drawTileIndex; + iAlphaTiles[drawBatchAlphaTileIndex * 2 + 1] = clipAlphaTileIndex; + drawAlphaTileIndex = int(drawBatchAlphaTileIndex) + uFirstAlphaTileIndex; + } + + iDrawTiles[drawTileIndex * 4 + TILE_FIELD_BACKDROP_ALPHA_TILE_ID] = + (uint(drawAlphaTileIndex) & 0x00ffffffu) | (uint(drawBackdropDelta) << 24); + iDrawTiles[drawTileIndex * 4 + TILE_FIELD_CONTROL] = + drawTileWord | (uint(drawTileBackdrop) << 24); + + // Write to Z-buffer if necessary. + ivec2 tileCoord = ivec2(tileX, tileY) + ivec2(drawTileRect.xy); + int tileMapIndex = tileCoord.y * uFramebufferTileSize.x + tileCoord.x; + if (zWrite && drawTileBackdrop != 0 && drawAlphaTileIndex < 0) + atomicMax(iZBuffer[tileMapIndex], int(drawTileIndex)); + + // Stitch into the linked list if necessary. + if (drawTileBackdrop != 0 || drawAlphaTileIndex >= 0) { + int nextTileIndex = atomicExchange(iFirstTileMap[tileMapIndex], int(drawTileIndex)); + iDrawTiles[drawTileIndex * 4 + TILE_FIELD_NEXT_TILE_ID] = nextTileIndex; + } + + currentBackdrop += drawBackdropDelta; + } +} diff --git a/shaders/d3d11/sort.cs.glsl b/shaders/d3d11/sort.cs.glsl new file mode 100644 index 00000000..b89f1664 --- /dev/null +++ b/shaders/d3d11/sort.cs.glsl @@ -0,0 +1,93 @@ +#version 430 + +// pathfinder/shaders/sort.cs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +#define TILE_FIELD_NEXT_TILE_ID 0 +#define TILE_FIELD_FIRST_FILL_ID 1 +#define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2 +#define TILE_FIELD_CONTROL 3 + +uniform int uTileCount; + +layout(std430, binding = 0) buffer bTiles { + // [0]: next tile ID + // [1]: first fill ID + // [2]: backdrop delta upper 8 bits, alpha tile ID lower 24 + // [3]: color/ctrl/backdrop word + restrict uint iTiles[]; +}; + +layout(std430, binding = 1) buffer bFirstTileMap { + restrict int iFirstTileMap[]; +}; + +layout(std430, binding = 2) buffer bZBuffer { + restrict readonly int iZBuffer[]; +}; + +layout(local_size_x = 64) in; + +int getFirst(uint globalTileIndex) { + return iFirstTileMap[globalTileIndex]; +} + +int getNextTile(int tileIndex) { + return int(iTiles[tileIndex * 4 + TILE_FIELD_NEXT_TILE_ID]); +} + +void setNextTile(int tileIndex, int newNextTileIndex) { + iTiles[tileIndex * 4 + TILE_FIELD_NEXT_TILE_ID] = uint(newNextTileIndex); +} + +void main() { + uint globalTileIndex = gl_GlobalInvocationID.x; + if (globalTileIndex >= uint(uTileCount)) + return; + + int zValue = iZBuffer[globalTileIndex]; + + int unsortedFirstTileIndex = getFirst(globalTileIndex); + int sortedFirstTileIndex = -1; + + while (unsortedFirstTileIndex >= 0) { + int currentTileIndex = unsortedFirstTileIndex; + unsortedFirstTileIndex = getNextTile(currentTileIndex); + + if (currentTileIndex >= zValue) { + int prevTrialTileIndex = -1; + int trialTileIndex = sortedFirstTileIndex; + while (true) { + if (trialTileIndex < 0 || currentTileIndex < trialTileIndex) { + if (prevTrialTileIndex < 0) { + setNextTile(currentTileIndex, sortedFirstTileIndex); + sortedFirstTileIndex = currentTileIndex; + } else { + setNextTile(currentTileIndex, trialTileIndex); + setNextTile(prevTrialTileIndex, currentTileIndex); + } + break; + } + prevTrialTileIndex = trialTileIndex; + trialTileIndex = getNextTile(trialTileIndex); + } + } + } + + iFirstTileMap[globalTileIndex] = sortedFirstTileIndex; +} diff --git a/shaders/d3d11/tile.cs.glsl b/shaders/d3d11/tile.cs.glsl new file mode 100644 index 00000000..da171693 --- /dev/null +++ b/shaders/d3d11/tile.cs.glsl @@ -0,0 +1,157 @@ +#version 430 + +// pathfinder/shaders/tile.cs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +layout(local_size_x = 16, local_size_y = 4) in; + +#include "tile_fragment.inc.glsl" +#include "tile_vertex.inc.glsl" + +#define LOAD_ACTION_CLEAR 0 +#define LOAD_ACTION_LOAD 1 + +#define TILE_FIELD_NEXT_TILE_ID 0 +#define TILE_FIELD_FIRST_FILL_ID 1 +#define TILE_FIELD_BACKDROP_ALPHA_TILE_ID 2 +#define TILE_FIELD_CONTROL 3 + +uniform int uLoadAction; +uniform vec4 uClearColor; +uniform vec2 uTileSize; +uniform sampler2D uTextureMetadata; +uniform ivec2 uTextureMetadataSize; +uniform sampler2D uZBuffer; +uniform ivec2 uZBufferSize; +uniform sampler2D uColorTexture0; +uniform sampler2D uMaskTexture0; +uniform sampler2D uDestTexture; +uniform sampler2D uGammaLUT; +uniform vec2 uColorTextureSize0; +uniform vec2 uMaskTextureSize0; +uniform vec2 uFramebufferSize; +uniform ivec2 uFramebufferTileSize; +layout(rgba8) uniform image2D uDestImage; + +layout(std430, binding = 0) buffer bTiles { + // [0]: path ID + // [1]: next tile ID + // [2]: first fill ID + // [3]: backdrop delta upper 8 bits, alpha tile ID lower 24 bits + // [4]: color/ctrl/backdrop word + restrict readonly uint iTiles[]; +}; + +layout(std430, binding = 1) buffer bFirstTileMap { + restrict readonly int iFirstTileMap[]; +}; + +uint calculateTileIndex(uint bufferOffset, uvec4 tileRect, uvec2 tileCoord) { + return bufferOffset + tileCoord.y * (tileRect.z - tileRect.x) + tileCoord.x; +} + +ivec2 toImageCoords(ivec2 coords) { + return ivec2(coords.x, uFramebufferSize.y - coords.y); +} + +void main() { + ivec2 tileCoord = ivec2(gl_WorkGroupID.xy); + ivec2 firstTileSubCoord = ivec2(gl_LocalInvocationID.xy) * ivec2(1, 4); + ivec2 firstFragCoord = tileCoord * ivec2(uTileSize) + firstTileSubCoord; + + // Quick exit if this is guaranteed to be empty. + int tileIndex = iFirstTileMap[tileCoord.x + uFramebufferTileSize.x * tileCoord.y]; + if (tileIndex < 0 && uLoadAction != LOAD_ACTION_CLEAR) + return; + + mat4 destColors; + for (int subY = 0; subY < 4; subY++) { + if (uLoadAction == LOAD_ACTION_CLEAR) { + destColors[subY] = uClearColor; + } else { + ivec2 imageCoords = toImageCoords(firstFragCoord + ivec2(0, subY)); + destColors[subY] = imageLoad(uDestImage, imageCoords); + } + } + + while (tileIndex >= 0) { + for (int subY = 0; subY < 4; subY++) { + ivec2 tileSubCoord = firstTileSubCoord + ivec2(0, subY); + vec2 fragCoord = vec2(firstFragCoord + ivec2(0, subY)) + vec2(0.5); + + int alphaTileIndex = + int(iTiles[tileIndex * 4 + TILE_FIELD_BACKDROP_ALPHA_TILE_ID] << 8) >> 8; + uint tileControlWord = iTiles[tileIndex * 4 + TILE_FIELD_CONTROL]; + uint colorEntry = tileControlWord & 0xffff; + int tileCtrl = int((tileControlWord >> 16) & 0xff); + + int backdrop; + uvec2 maskTileCoord; + if (alphaTileIndex >= 0) { + backdrop = 0; + maskTileCoord = uvec2(alphaTileIndex & 0xff, alphaTileIndex >> 8) * + uvec2(uTileSize); + } else { + // We have no alpha mask. Clear the mask bits so we don't try to look one up. + backdrop = int(tileControlWord) >> 24; + maskTileCoord = uvec2(0u); + tileCtrl &= ~(TILE_CTRL_MASK_MASK << TILE_CTRL_MASK_0_SHIFT); + } + + vec3 maskTexCoord0 = vec3(vec2(ivec2(maskTileCoord) + tileSubCoord), backdrop); + + vec2 colorTexCoord0; + vec4 baseColor, filterParams0, filterParams1, filterParams2; + int ctrl; + computeTileVaryings(fragCoord, + int(colorEntry), + uTextureMetadata, + uTextureMetadataSize, + colorTexCoord0, + baseColor, + filterParams0, + filterParams1, + filterParams2, + ctrl); + + vec4 srcColor = calculateColor(fragCoord, + uColorTexture0, + uMaskTexture0, + uDestTexture, + uGammaLUT, + uColorTextureSize0, + uMaskTextureSize0, + filterParams0, + filterParams1, + filterParams2, + uFramebufferSize, + ctrl, + maskTexCoord0, + colorTexCoord0, + baseColor, + tileCtrl); + + destColors[subY] = destColors[subY] * (1.0 - srcColor.a) + srcColor; + } + + tileIndex = int(iTiles[tileIndex * 4 + TILE_FIELD_NEXT_TILE_ID]); + } + + for (int subY = 0; subY < 4; subY++) + imageStore(uDestImage, toImageCoords(firstFragCoord + ivec2(0, subY)), destColors[subY]); +} diff --git a/shaders/fill.fs.glsl b/shaders/d3d9/fill.fs.glsl similarity index 95% rename from shaders/fill.fs.glsl rename to shaders/d3d9/fill.fs.glsl index 6ed65f9c..9543f9c3 100644 --- a/shaders/fill.fs.glsl +++ b/shaders/d3d9/fill.fs.glsl @@ -18,7 +18,7 @@ precision highp float; precision highp sampler2D; #endif -#include "fill.inc.glsl" +#include "fill_area.inc.glsl" uniform sampler2D uAreaLUT; diff --git a/shaders/fill.vs.glsl b/shaders/d3d9/fill.vs.glsl similarity index 51% rename from shaders/fill.vs.glsl rename to shaders/d3d9/fill.vs.glsl index d513b5ef..640beb04 100644 --- a/shaders/fill.vs.glsl +++ b/shaders/d3d9/fill.vs.glsl @@ -10,6 +10,8 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. +#extension GL_GOOGLE_include_directive : enable + precision highp float; #ifdef GL_ES @@ -20,36 +22,39 @@ uniform vec2 uFramebufferSize; uniform vec2 uTileSize; in uvec2 aTessCoord; -in uint aFromPx; -in uint aToPx; -in vec2 aFromSubpx; -in vec2 aToSubpx; -in uint aTileIndex; +in uvec4 aLineSegment; +in int aTileIndex; out vec2 vFrom; out vec2 vTo; -vec2 computeTileOffset(uint tileIndex, float stencilTextureWidth) { - uint tilesPerRow = uint(stencilTextureWidth / uTileSize.x); +vec2 computeTileOffset(uint tileIndex, float stencilTextureWidth, vec2 tileSize) { + uint tilesPerRow = uint(stencilTextureWidth / tileSize.x); uvec2 tileOffset = uvec2(tileIndex % tilesPerRow, tileIndex / tilesPerRow); - return vec2(tileOffset) * uTileSize * vec2(1.0, 0.25); + return vec2(tileOffset) * tileSize * vec2(1.0, 0.25); } -void main() { - vec2 tileOrigin = computeTileOffset(aTileIndex, uFramebufferSize.x); +vec4 computeVertexPosition(uint tileIndex, + uvec2 tessCoord, + uvec4 packedLineSegment, + vec2 tileSize, + vec2 framebufferSize, + out vec2 outFrom, + out vec2 outTo) { + vec2 tileOrigin = computeTileOffset(uint(tileIndex), framebufferSize.x, tileSize); - vec2 from = vec2(aFromPx & 15u, aFromPx >> 4u) + aFromSubpx; - vec2 to = vec2(aToPx & 15u, aToPx >> 4u) + aToSubpx; + vec4 lineSegment = vec4(packedLineSegment) / 256.0; + vec2 from = lineSegment.xy, to = lineSegment.zw; vec2 position; - if (aTessCoord.x == 0u) + if (tessCoord.x == 0u) position.x = floor(min(from.x, to.x)); else position.x = ceil(max(from.x, to.x)); - if (aTessCoord.y == 0u) + if (tessCoord.y == 0u) position.y = floor(min(from.y, to.y)); else - position.y = uTileSize.y; + position.y = tileSize.y; position.y = floor(position.y * 0.25); // Since each fragment corresponds to 4 pixels on a scanline, the varying interpolation will @@ -57,12 +62,22 @@ void main() { // do our coverage calculation on the center of the first pixel in the strip instead, at pixel // offset 0.5. This adjustment of 1.5 accomplishes that. vec2 offset = vec2(0.0, 1.5) - position * vec2(1.0, 4.0); - vFrom = from + offset; - vTo = to + offset; + outFrom = from + offset; + outTo = to + offset; - vec2 globalPosition = (tileOrigin + position) / uFramebufferSize * 2.0 - 1.0; + vec2 globalPosition = (tileOrigin + position) / framebufferSize * 2.0 - 1.0; #ifdef PF_ORIGIN_UPPER_LEFT globalPosition.y = -globalPosition.y; #endif - gl_Position = vec4(globalPosition, 0.0, 1.0); + return vec4(globalPosition, 0.0, 1.0); +} + +void main() { + gl_Position = computeVertexPosition(uint(aTileIndex), + aTessCoord, + aLineSegment, + uTileSize, + uFramebufferSize, + vFrom, + vTo); } diff --git a/shaders/d3d9/tile.fs.glsl b/shaders/d3d9/tile.fs.glsl new file mode 100644 index 00000000..25afb515 --- /dev/null +++ b/shaders/d3d9/tile.fs.glsl @@ -0,0 +1,63 @@ +#version 330 + +// pathfinder/shaders/tile.fs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +#include "tile_fragment.inc.glsl" + +uniform sampler2D uColorTexture0; +uniform sampler2D uMaskTexture0; +uniform sampler2D uDestTexture; +uniform sampler2D uGammaLUT; +uniform vec2 uColorTextureSize0; +uniform vec2 uMaskTextureSize0; +uniform vec2 uFramebufferSize; + +in vec3 vMaskTexCoord0; +in vec2 vColorTexCoord0; +in vec4 vBaseColor; +in float vTileCtrl; +in vec4 vFilterParams0; +in vec4 vFilterParams1; +in vec4 vFilterParams2; +in float vCtrl; + +out vec4 oFragColor; + +// Entry point +// +// TODO(pcwalton): Generate this dynamically. + +void main() { + oFragColor = calculateColor(gl_FragCoord.xy, + uColorTexture0, + uMaskTexture0, + uDestTexture, + uGammaLUT, + uColorTextureSize0, + uMaskTextureSize0, + vFilterParams0, + vFilterParams1, + vFilterParams2, + uFramebufferSize, + int(vCtrl), + vMaskTexCoord0, + vColorTexCoord0, + vBaseColor, + int(vTileCtrl)); +} diff --git a/shaders/d3d9/tile.vs.glsl b/shaders/d3d9/tile.vs.glsl new file mode 100644 index 00000000..bab4d802 --- /dev/null +++ b/shaders/d3d9/tile.vs.glsl @@ -0,0 +1,79 @@ +#version 330 + +// pathfinder/shaders/tile.vs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +#extension GL_GOOGLE_include_directive : enable + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +#include "tile_vertex.inc.glsl" + +uniform mat4 uTransform; +uniform vec2 uTileSize; +uniform sampler2D uTextureMetadata; +uniform ivec2 uTextureMetadataSize; +uniform sampler2D uZBuffer; +uniform ivec2 uZBufferSize; + +in ivec2 aTileOffset; +in ivec2 aTileOrigin; +in uvec4 aMaskTexCoord0; +in ivec2 aCtrlBackdrop; +in int aPathIndex; +in int aColor; + +out vec3 vMaskTexCoord0; +out vec2 vColorTexCoord0; +out vec4 vBaseColor; +out float vTileCtrl; +out vec4 vFilterParams0; +out vec4 vFilterParams1; +out vec4 vFilterParams2; +out float vCtrl; + +void main() { + vec2 tileOrigin = vec2(aTileOrigin), tileOffset = vec2(aTileOffset); + vec2 position = (tileOrigin + tileOffset) * uTileSize; + + ivec4 zValue = ivec4(texture(uZBuffer, (tileOrigin + vec2(0.5)) / vec2(uZBufferSize)) * 255.0); + if (aPathIndex < (zValue.x | (zValue.y << 8) | (zValue.z << 16) | (zValue.w << 24))) { + gl_Position = vec4(0.0); + return; + } + + uvec2 maskTileCoord = uvec2(aMaskTexCoord0.x, aMaskTexCoord0.y + 256u * aMaskTexCoord0.z); + vec2 maskTexCoord0 = (vec2(maskTileCoord) + tileOffset) * uTileSize; + if (aCtrlBackdrop.y == 0 && aMaskTexCoord0.w != 0u) { + gl_Position = vec4(0.0); + return; + } + + int ctrl; + computeTileVaryings(position, + aColor, + uTextureMetadata, + uTextureMetadataSize, + vColorTexCoord0, + vBaseColor, + vFilterParams0, + vFilterParams1, + vFilterParams2, + ctrl); + + vTileCtrl = float(aCtrlBackdrop.x); + vCtrl = float(ctrl); + vMaskTexCoord0 = vec3(maskTexCoord0, float(aCtrlBackdrop.y)); + gl_Position = uTransform * vec4(position, 0.0, 1.0); +} diff --git a/shaders/d3d9/tile_clip_combine.fs.glsl b/shaders/d3d9/tile_clip_combine.fs.glsl new file mode 100644 index 00000000..ccb2523b --- /dev/null +++ b/shaders/d3d9/tile_clip_combine.fs.glsl @@ -0,0 +1,31 @@ +#version 330 + +// pathfinder/shaders/tile_clip_combine.fs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +uniform sampler2D uSrc; + +in vec2 vTexCoord0; +in float vBackdrop0; +in vec2 vTexCoord1; +in float vBackdrop1; + +out vec4 oFragColor; + +void main() { + oFragColor = min(abs(texture(uSrc, vTexCoord0) + vBackdrop0), + abs(texture(uSrc, vTexCoord1) + vBackdrop1)); +} diff --git a/shaders/d3d9/tile_clip_combine.vs.glsl b/shaders/d3d9/tile_clip_combine.vs.glsl new file mode 100644 index 00000000..03e6236b --- /dev/null +++ b/shaders/d3d9/tile_clip_combine.vs.glsl @@ -0,0 +1,51 @@ +#version 330 + +// pathfinder/shaders/tile_clip_combine.vs.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +precision highp float; + +#ifdef GL_ES +precision highp sampler2D; +#endif + +uniform vec2 uFramebufferSize; + +in ivec2 aTileOffset; +in int aDestTileIndex; +in int aDestBackdrop; +in int aSrcTileIndex; +in int aSrcBackdrop; + +out vec2 vTexCoord0; +out float vBackdrop0; +out vec2 vTexCoord1; +out float vBackdrop1; + +void main() { + vec2 destPosition = vec2(ivec2(aDestTileIndex % 256, aDestTileIndex / 256) + aTileOffset); + vec2 srcPosition = vec2(ivec2(aSrcTileIndex % 256, aSrcTileIndex / 256) + aTileOffset); + destPosition *= vec2(16.0, 4.0) / uFramebufferSize; + srcPosition *= vec2(16.0, 4.0) / uFramebufferSize; + + vTexCoord0 = destPosition; + vTexCoord1 = srcPosition; + + vBackdrop0 = float(aDestBackdrop); + vBackdrop1 = float(aSrcBackdrop); + + if (aDestTileIndex < 0) + destPosition = vec2(0.0); + +#ifdef PF_ORIGIN_UPPER_LEFT + destPosition.y = 1.0 - destPosition.y; +#endif + gl_Position = vec4(mix(vec2(-1.0), vec2(1.0), destPosition), 0.0, 1.0); +} diff --git a/shaders/tile_clip.fs.glsl b/shaders/d3d9/tile_clip_copy.fs.glsl similarity index 79% rename from shaders/tile_clip.fs.glsl rename to shaders/d3d9/tile_clip_copy.fs.glsl index e6c7b6ad..fcbb569f 100644 --- a/shaders/tile_clip.fs.glsl +++ b/shaders/d3d9/tile_clip_copy.fs.glsl @@ -1,6 +1,6 @@ #version 330 -// pathfinder/shaders/tile_clip.fs.glsl +// pathfinder/shaders/tile_clip_copy.fs.glsl // // Copyright © 2020 The Pathfinder Project Developers. // @@ -19,10 +19,9 @@ precision highp sampler2D; uniform sampler2D uSrc; in vec2 vTexCoord; -in float vBackdrop; out vec4 oFragColor; void main() { - oFragColor = clamp(abs(texture(uSrc, vTexCoord) + vBackdrop), 0.0, 1.0); + oFragColor = texture(uSrc, vTexCoord); } diff --git a/shaders/tile_clip.vs.glsl b/shaders/d3d9/tile_clip_copy.vs.glsl similarity index 52% rename from shaders/tile_clip.vs.glsl rename to shaders/d3d9/tile_clip_copy.vs.glsl index 47186462..cdc25913 100644 --- a/shaders/tile_clip.vs.glsl +++ b/shaders/d3d9/tile_clip_copy.vs.glsl @@ -1,6 +1,6 @@ #version 330 -// pathfinder/shaders/tile_clip.vs.glsl +// pathfinder/shaders/tile_clip_copy.vs.glsl // // Copyright © 2020 The Pathfinder Project Developers. // @@ -16,18 +16,24 @@ precision highp float; precision highp sampler2D; #endif +uniform vec2 uFramebufferSize; + in ivec2 aTileOffset; -in ivec2 aDestTileOrigin; -in ivec2 aSrcTileOrigin; -in int aSrcBackdrop; +in int aTileIndex; out vec2 vTexCoord; -out float vBackdrop; void main() { - vec2 destPosition = vec2(aDestTileOrigin + aTileOffset) / vec2(256.0); - vec2 srcPosition = vec2(aSrcTileOrigin + aTileOffset) / vec2(256.0); - vTexCoord = srcPosition; - vBackdrop = float(aSrcBackdrop); - gl_Position = vec4(mix(vec2(-1.0), vec2(1.0), destPosition), 0.0, 1.0); + vec2 position = vec2(ivec2(aTileIndex % 256, aTileIndex / 256) + aTileOffset); + position *= vec2(16.0, 4.0) / uFramebufferSize; + + vTexCoord = position; + + if (aTileIndex < 0) + position = vec2(0.0); + +#ifdef PF_ORIGIN_UPPER_LEFT + position.y = 1.0 - position.y; +#endif + gl_Position = vec4(mix(vec2(-1.0), vec2(1.0), position), 0.0, 1.0); } diff --git a/shaders/tile_copy.fs.glsl b/shaders/d3d9/tile_copy.fs.glsl similarity index 100% rename from shaders/tile_copy.fs.glsl rename to shaders/d3d9/tile_copy.fs.glsl diff --git a/shaders/tile_copy.vs.glsl b/shaders/d3d9/tile_copy.vs.glsl similarity index 100% rename from shaders/tile_copy.vs.glsl rename to shaders/d3d9/tile_copy.vs.glsl diff --git a/shaders/debug_solid.fs.glsl b/shaders/debug/solid.fs.glsl similarity index 93% rename from shaders/debug_solid.fs.glsl rename to shaders/debug/solid.fs.glsl index 5ceb65f9..8dacbc8e 100644 --- a/shaders/debug_solid.fs.glsl +++ b/shaders/debug/solid.fs.glsl @@ -1,6 +1,6 @@ #version 330 -// pathfinder/shaders/debug_solid.fs.glsl +// pathfinder/shaders/debug/solid.fs.glsl // // Copyright © 2019 The Pathfinder Project Developers. // diff --git a/shaders/debug_solid.vs.glsl b/shaders/debug/solid.vs.glsl similarity index 93% rename from shaders/debug_solid.vs.glsl rename to shaders/debug/solid.vs.glsl index b10dde44..eb71b1a6 100644 --- a/shaders/debug_solid.vs.glsl +++ b/shaders/debug/solid.vs.glsl @@ -1,6 +1,6 @@ #version 330 -// pathfinder/shaders/debug_solid.vs.glsl +// pathfinder/shaders/debug/solid.vs.glsl // // Copyright © 2019 The Pathfinder Project Developers. // diff --git a/shaders/debug_texture.fs.glsl b/shaders/debug/texture.fs.glsl similarity index 93% rename from shaders/debug_texture.fs.glsl rename to shaders/debug/texture.fs.glsl index c01d386a..67e8f979 100644 --- a/shaders/debug_texture.fs.glsl +++ b/shaders/debug/texture.fs.glsl @@ -1,6 +1,6 @@ #version 330 -// pathfinder/shaders/debug_texture.fs.glsl +// pathfinder/shaders/debug/texture.fs.glsl // // Copyright © 2019 The Pathfinder Project Developers. // diff --git a/shaders/debug_texture.vs.glsl b/shaders/debug/texture.vs.glsl similarity index 94% rename from shaders/debug_texture.vs.glsl rename to shaders/debug/texture.vs.glsl index 2f918e29..6477ca4e 100644 --- a/shaders/debug_texture.vs.glsl +++ b/shaders/debug/texture.vs.glsl @@ -1,6 +1,6 @@ #version 330 -// pathfinder/shaders/debug_texture.vs.glsl +// pathfinder/shaders/debug/texture.vs.glsl // // Copyright © 2019 The Pathfinder Project Developers. // diff --git a/shaders/fill.cs.glsl b/shaders/fill.cs.glsl deleted file mode 100644 index 700c2946..00000000 --- a/shaders/fill.cs.glsl +++ /dev/null @@ -1,69 +0,0 @@ -#version 430 - -// pathfinder/shaders/fill.cs.glsl -// -// Copyright © 2020 The Pathfinder Project Developers. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -#extension GL_GOOGLE_include_directive : enable - -precision highp float; - -#ifdef GL_ES -precision highp sampler2D; -#endif - -#include "fill.inc.glsl" - -layout(local_size_x = 16, local_size_y = 4) in; - -uniform writeonly image2D uDest; -uniform sampler2D uAreaLUT; -uniform int uFirstTileIndex; - -layout(std430, binding = 0) buffer bFills { - restrict readonly uvec2 iFills[]; -}; - -layout(std430, binding = 1) buffer bNextFills { - restrict readonly int iNextFills[]; -}; - -layout(std430, binding = 2) buffer bFillTileMap { - restrict readonly int iFillTileMap[]; -}; - -void main() { - ivec2 tileSubCoord = ivec2(gl_LocalInvocationID.xy) * ivec2(1, 4); - uint tileIndexOffset = gl_WorkGroupID.z; - - uint tileIndex = tileIndexOffset + uint(uFirstTileIndex); - - int fillIndex = iFillTileMap[tileIndex]; - if (fillIndex < 0) - return; - - vec4 coverages = vec4(0.0); - do { - uvec2 fill = iFills[fillIndex]; - vec2 from = vec2(fill.y & 0xf, (fill.y >> 4u) & 0xf) + - vec2(fill.x & 0xff, (fill.x >> 8u) & 0xff) / 256.0; - vec2 to = vec2((fill.y >> 8u) & 0xf, (fill.y >> 12u) & 0xf) + - vec2((fill.x >> 16u) & 0xff, (fill.x >> 24u) & 0xff) / 256.0; - - coverages += computeCoverage(from - (vec2(tileSubCoord) + vec2(0.5)), - to - (vec2(tileSubCoord) + vec2(0.5)), - uAreaLUT); - - fillIndex = iNextFills[fillIndex]; - } while (fillIndex >= 0); - - ivec2 tileOrigin = ivec2(tileIndex & 0xff, (tileIndex >> 8u) & 0xff) * ivec2(16, 4); - ivec2 destCoord = tileOrigin + ivec2(gl_LocalInvocationID.xy); - imageStore(uDest, destCoord, coverages); -} diff --git a/shaders/fill.inc.glsl b/shaders/fill_area.inc.glsl similarity index 96% rename from shaders/fill.inc.glsl rename to shaders/fill_area.inc.glsl index c22125ae..02c3b6e3 100644 --- a/shaders/fill.inc.glsl +++ b/shaders/fill_area.inc.glsl @@ -1,4 +1,4 @@ -// pathfinder/shaders/fill.inc.glsl +// pathfinder/shaders/fill_area.inc.glsl // // Copyright © 2020 The Pathfinder Project Developers. // diff --git a/shaders/tile.vs.glsl b/shaders/tile.vs.glsl deleted file mode 100644 index 3366e129..00000000 --- a/shaders/tile.vs.glsl +++ /dev/null @@ -1,56 +0,0 @@ -#version 330 - -// pathfinder/shaders/tile.vs.glsl -// -// Copyright © 2020 The Pathfinder Project Developers. -// -// Licensed under the Apache License, Version 2.0 or the MIT license -// , at your -// option. This file may not be copied, modified, or distributed -// except according to those terms. - -precision highp float; - -#ifdef GL_ES -precision highp sampler2D; -#endif - -uniform mat4 uTransform; -uniform vec2 uTileSize; -uniform sampler2D uTextureMetadata; -uniform ivec2 uTextureMetadataSize; - -in ivec2 aTileOffset; -in ivec2 aTileOrigin; -in uvec2 aMaskTexCoord0; -in ivec2 aMaskBackdrop; -in int aColor; -in int aTileCtrl; - -out vec3 vMaskTexCoord0; -out vec2 vColorTexCoord0; -out vec4 vBaseColor; -out float vTileCtrl; - -void main() { - vec2 tileOrigin = vec2(aTileOrigin), tileOffset = vec2(aTileOffset); - vec2 position = (tileOrigin + tileOffset) * uTileSize; - - vec2 maskTexCoord0 = (vec2(aMaskTexCoord0) + tileOffset) * uTileSize; - - vec2 textureMetadataScale = vec2(1.0) / vec2(uTextureMetadataSize); - vec2 metadataEntryCoord = vec2(aColor % 128 * 4, aColor / 128); - vec2 colorTexMatrix0Coord = (metadataEntryCoord + vec2(0.5, 0.5)) * textureMetadataScale; - vec2 colorTexOffsetsCoord = (metadataEntryCoord + vec2(1.5, 0.5)) * textureMetadataScale; - vec2 baseColorCoord = (metadataEntryCoord + vec2(2.5, 0.5)) * textureMetadataScale; - vec4 colorTexMatrix0 = texture(uTextureMetadata, colorTexMatrix0Coord); - vec4 colorTexOffsets = texture(uTextureMetadata, colorTexOffsetsCoord); - vec4 baseColor = texture(uTextureMetadata, baseColorCoord); - - vColorTexCoord0 = mat2(colorTexMatrix0) * position + colorTexOffsets.xy; - vMaskTexCoord0 = vec3(maskTexCoord0, float(aMaskBackdrop.x)); - vBaseColor = baseColor; - vTileCtrl = float(aTileCtrl); - gl_Position = uTransform * vec4(position, 0.0, 1.0); -} diff --git a/shaders/tile.fs.glsl b/shaders/tile_fragment.inc.glsl similarity index 93% rename from shaders/tile.fs.glsl rename to shaders/tile_fragment.inc.glsl index 487fcb5c..a03a13d2 100644 --- a/shaders/tile.fs.glsl +++ b/shaders/tile_fragment.inc.glsl @@ -1,6 +1,4 @@ -#version 330 - -// pathfinder/shaders/tile.fs.glsl +// pathfinder/shaders/tile_fragment.inc.glsl // // Copyright © 2020 The Pathfinder Project Developers. // @@ -31,14 +29,6 @@ // + + // Color UV 0 Color UV 1 -#extension GL_GOOGLE_include_directive : enable - -precision highp float; - -#ifdef GL_ES -precision highp sampler2D; -#endif - #define EPSILON 0.00001 #define FRAC_6_PI 1.9098593171027443 @@ -81,25 +71,6 @@ precision highp sampler2D; #define COMBINER_CTRL_COLOR_COMBINE_SHIFT 6 #define COMBINER_CTRL_COMPOSITE_SHIFT 8 -uniform sampler2D uColorTexture0; -uniform sampler2D uMaskTexture0; -uniform sampler2D uDestTexture; -uniform sampler2D uGammaLUT; -uniform vec2 uColorTextureSize0; -uniform vec2 uMaskTextureSize0; -uniform vec4 uFilterParams0; -uniform vec4 uFilterParams1; -uniform vec4 uFilterParams2; -uniform vec2 uFramebufferSize; -uniform int uCtrl; - -in vec3 vMaskTexCoord0; -in vec2 vColorTexCoord0; -in vec4 vBaseColor; -in float vTileCtrl; - -out vec4 oFragColor; - // Color sampling vec4 sampleColor(sampler2D colorTexture, vec2 colorTexCoord) { @@ -565,27 +536,42 @@ float sampleMask(float maskAlpha, // Main function -void calculateColor(int tileCtrl, int ctrl) { +vec4 calculateColor(vec2 fragCoord, + sampler2D colorTexture0, + sampler2D maskTexture0, + sampler2D destTexture, + sampler2D gammaLUT, + vec2 colorTextureSize0, + vec2 maskTextureSize0, + vec4 filterParams0, + vec4 filterParams1, + vec4 filterParams2, + vec2 framebufferSize, + int ctrl, + vec3 maskTexCoord0, + vec2 colorTexCoord0, + vec4 baseColor, + int tileCtrl) { // Sample mask. int maskCtrl0 = (tileCtrl >> TILE_CTRL_MASK_0_SHIFT) & TILE_CTRL_MASK_MASK; float maskAlpha = 1.0; - maskAlpha = sampleMask(maskAlpha, uMaskTexture0, uMaskTextureSize0, vMaskTexCoord0, maskCtrl0); + maskAlpha = sampleMask(maskAlpha, maskTexture0, maskTextureSize0, maskTexCoord0, maskCtrl0); // Sample color. - vec4 color = vBaseColor; + vec4 color = baseColor; int color0Combine = (ctrl >> COMBINER_CTRL_COLOR_COMBINE_SHIFT) & COMBINER_CTRL_COLOR_COMBINE_MASK; if (color0Combine != 0) { int color0Filter = (ctrl >> COMBINER_CTRL_COLOR_FILTER_SHIFT) & COMBINER_CTRL_FILTER_MASK; - vec4 color0 = filterColor(vColorTexCoord0, - uColorTexture0, - uGammaLUT, - uColorTextureSize0, - gl_FragCoord.xy, - uFramebufferSize, - uFilterParams0, - uFilterParams1, - uFilterParams2, + vec4 color0 = filterColor(colorTexCoord0, + colorTexture0, + gammaLUT, + colorTextureSize0, + fragCoord, + framebufferSize, + filterParams0, + filterParams1, + filterParams2, color0Filter); color = combineColor0(color, color0, color0Combine); } @@ -595,17 +581,9 @@ void calculateColor(int tileCtrl, int ctrl) { // Apply composite. int compositeOp = (ctrl >> COMBINER_CTRL_COMPOSITE_SHIFT) & COMBINER_CTRL_COMPOSITE_MASK; - color = composite(color, uDestTexture, uFramebufferSize, gl_FragCoord.xy, compositeOp); + color = composite(color, destTexture, framebufferSize, fragCoord, compositeOp); // Premultiply alpha. color.rgb *= color.a; - oFragColor = color; -} - -// Entry point -// -// TODO(pcwalton): Generate this dynamically. - -void main() { - calculateColor(int(vTileCtrl), uCtrl); + return color; } diff --git a/shaders/tile_vertex.inc.glsl b/shaders/tile_vertex.inc.glsl new file mode 100644 index 00000000..c99778a4 --- /dev/null +++ b/shaders/tile_vertex.inc.glsl @@ -0,0 +1,40 @@ +// pathfinder/shaders/tile_vertex.inc.glsl +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +vec4 fetchUnscaled(sampler2D srcTexture, vec2 scale, vec2 originCoord, int entry) { + return texture(srcTexture, (originCoord + vec2(0.5) + vec2(entry, 0)) * scale); +} + +void computeTileVaryings(vec2 position, + int colorEntry, + sampler2D textureMetadata, + ivec2 textureMetadataSize, + out vec2 outColorTexCoord0, + out vec4 outBaseColor, + out vec4 outFilterParams0, + out vec4 outFilterParams1, + out vec4 outFilterParams2, + out int outCtrl) { + vec2 metadataScale = vec2(1.0) / vec2(textureMetadataSize); + vec2 metadataEntryCoord = vec2(colorEntry % 128 * 8, colorEntry / 128); + vec4 colorTexMatrix0 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 0); + vec4 colorTexOffsets = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 1); + vec4 baseColor = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 2); + vec4 filterParams0 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 3); + vec4 filterParams1 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 4); + vec4 filterParams2 = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 5); + vec4 extra = fetchUnscaled(textureMetadata, metadataScale, metadataEntryCoord, 6); + outColorTexCoord0 = mat2(colorTexMatrix0) * position + colorTexOffsets.xy; + outBaseColor = baseColor; + outFilterParams0 = filterParams0; + outFilterParams1 = filterParams1; + outFilterParams2 = filterParams2; + outCtrl = int(extra.x); +} diff --git a/svg/src/lib.rs b/svg/src/lib.rs index 7f7de163..1abbd470 100644 --- a/svg/src/lib.rs +++ b/svg/src/lib.rs @@ -280,7 +280,7 @@ impl BuiltSVG { path.set_clip_path(state.clip_path); path.set_fill_rule(fill_rule); path.set_name(name); - self.scene.push_path(path); + self.scene.push_draw_path(path); } } diff --git a/swf/src/lib.rs b/swf/src/lib.rs index 911181fc..39c5593a 100644 --- a/swf/src/lib.rs +++ b/swf/src/lib.rs @@ -194,7 +194,7 @@ pub fn draw_paths_into_scene(library: &SymbolLibrary, scene: &mut Scene) { let mut path = DrawPath::new(path, paint_id); path.set_fill_rule(FillRule::EvenOdd); - scene.push_path(path); + scene.push_draw_path(path); } } } diff --git a/text/src/lib.rs b/text/src/lib.rs index 575e7f4c..c4300987 100644 --- a/text/src/lib.rs +++ b/text/src/lib.rs @@ -137,7 +137,7 @@ impl FontContext where F: Loader { path.set_clip_path(render_options.clip_path); path.set_blend_mode(render_options.blend_mode); - scene.push_path(path); + scene.push_draw_path(path); Ok(()) } diff --git a/ui/src/lib.rs b/ui/src/lib.rs index 775032db..c9558cb5 100644 --- a/ui/src/lib.rs +++ b/ui/src/lib.rs @@ -20,9 +20,10 @@ use hashbrown::HashMap; use pathfinder_color::ColorU; use pathfinder_geometry::rect::RectI; use pathfinder_geometry::vector::{Vector2F, Vector2I, vec2i}; -use pathfinder_gpu::{BlendFactor, BlendState, BufferData, BufferTarget, BufferUploadMode, Device}; -use pathfinder_gpu::{Primitive, RenderOptions, RenderState, RenderTarget, TextureFormat}; -use pathfinder_gpu::{UniformData, VertexAttrClass, VertexAttrDescriptor, VertexAttrType}; +use pathfinder_gpu::allocator::{BufferTag, GPUMemoryAllocator}; +use pathfinder_gpu::{BlendFactor, BlendState, BufferTarget, Device, Primitive, RenderOptions}; +use pathfinder_gpu::{RenderState, RenderTarget, TextureFormat, UniformData, VertexAttrClass}; +use pathfinder_gpu::{VertexAttrDescriptor, VertexAttrType}; use pathfinder_resources::ResourceLoader; use pathfinder_simd::default::F32x4; use serde_json; @@ -71,9 +72,7 @@ pub struct UIPresenter where D: Device { framebuffer_size: Vector2I, texture_program: DebugTextureProgram, - texture_vertex_array: DebugTextureVertexArray, solid_program: DebugSolidProgram, - solid_vertex_array: DebugSolidVertexArray, font: DebugFont, font_texture: D::Texture, @@ -85,11 +84,9 @@ impl UIPresenter where D: Device { pub fn new(device: &D, resources: &dyn ResourceLoader, framebuffer_size: Vector2I) -> UIPresenter { let texture_program = DebugTextureProgram::new(device, resources); - let texture_vertex_array = DebugTextureVertexArray::new(device, &texture_program); let font = DebugFont::load(resources); let solid_program = DebugSolidProgram::new(device, resources); - let solid_vertex_array = DebugSolidVertexArray::new(device, &solid_program); let font_texture = device.create_texture_from_png(resources, FONT_PNG_NAME, @@ -108,10 +105,8 @@ impl UIPresenter where D: Device { framebuffer_size, texture_program, - texture_vertex_array, font, solid_program, - solid_vertex_array, font_texture, corner_fill_texture, @@ -128,16 +123,25 @@ impl UIPresenter where D: Device { } - pub fn draw_solid_rect(&self, device: &D, rect: RectI, color: ColorU) { - self.draw_rect(device, rect, color, true); + pub fn draw_solid_rect(&self, + device: &D, + allocator: &mut GPUMemoryAllocator, + rect: RectI, + color: ColorU) { + self.draw_rect(device, allocator, rect, color, true); } - pub fn draw_rect_outline(&self, device: &D, rect: RectI, color: ColorU) { - self.draw_rect(device, rect, color, false); + pub fn draw_rect_outline(&self, + device: &D, + allocator: &mut GPUMemoryAllocator, + rect: RectI, + color: ColorU) { + self.draw_rect(device, allocator, rect, color, false); } fn draw_rect(&self, device: &D, + allocator: &mut GPUMemoryAllocator, rect: RectI, color: ColorU, filled: bool) { @@ -150,12 +154,14 @@ impl UIPresenter where D: Device { if filled { self.draw_solid_rects_with_vertex_data(device, + allocator, &vertex_data, &QUAD_INDICES, color, true); } else { self.draw_solid_rects_with_vertex_data(device, + allocator, &vertex_data, &RECT_LINE_INDICES, color, @@ -165,39 +171,60 @@ impl UIPresenter where D: Device { fn draw_solid_rects_with_vertex_data(&self, device: &D, + allocator: &mut GPUMemoryAllocator, vertex_data: &[DebugSolidVertex], index_data: &[u32], color: ColorU, filled: bool) { - device.allocate_buffer(&self.solid_vertex_array.vertex_buffer, - BufferData::Memory(vertex_data), - BufferTarget::Vertex); - device.allocate_buffer(&self.solid_vertex_array.index_buffer, - BufferData::Memory(index_data), - BufferTarget::Index); + let vertex_buffer_id = + allocator.allocate_buffer::(device, + vertex_data.len() as u64, + BufferTag("SolidVertexDebug")); + let index_buffer_id = allocator.allocate_buffer::(device, + index_data.len() as u64, + BufferTag("SolidIndexDebug")); + { + let vertex_buffer = allocator.get_buffer(vertex_buffer_id); + let index_buffer = allocator.get_buffer(index_buffer_id); + device.upload_to_buffer(&vertex_buffer, 0, vertex_data, BufferTarget::Vertex); + device.upload_to_buffer(&index_buffer, 0, index_data, BufferTarget::Index); + let solid_vertex_array = DebugSolidVertexArray::new(device, + &self.solid_program, + vertex_buffer, + index_buffer); - let primitive = if filled { Primitive::Triangles } else { Primitive::Lines }; - device.draw_elements(index_data.len() as u32, &RenderState { - target: &RenderTarget::Default, - program: &self.solid_program.program, - vertex_array: &self.solid_vertex_array.vertex_array, - primitive, - uniforms: &[ - (&self.solid_program.framebuffer_size_uniform, - UniformData::Vec2(self.framebuffer_size.0.to_f32x2())), - (&self.solid_program.color_uniform, get_color_uniform(color)), - ], - textures: &[], - images: &[], - viewport: RectI::new(Vector2I::default(), self.framebuffer_size), - options: RenderOptions { - blend: Some(alpha_blend_state()), - ..RenderOptions::default() - }, - }); + let primitive = if filled { Primitive::Triangles } else { Primitive::Lines }; + device.draw_elements(index_data.len() as u32, &RenderState { + target: &RenderTarget::Default, + program: &self.solid_program.program, + vertex_array: &solid_vertex_array.vertex_array, + primitive, + uniforms: &[ + (&self.solid_program.framebuffer_size_uniform, + UniformData::Vec2(self.framebuffer_size.0.to_f32x2())), + (&self.solid_program.color_uniform, get_color_uniform(color)), + ], + textures: &[], + images: &[], + storage_buffers: &[], + viewport: RectI::new(Vector2I::default(), self.framebuffer_size), + options: RenderOptions { + blend: Some(alpha_blend_state()), + ..RenderOptions::default() + }, + }); + } + + allocator.free_buffer(index_buffer_id); + allocator.free_buffer(vertex_buffer_id); } - pub fn draw_text(&self, device: &D, string: &str, origin: Vector2I, invert: bool) { + pub fn draw_text(&self, + device: &D, + allocator: &mut GPUMemoryAllocator, + string: &str, + origin: Vector2I, + invert: bool) { let mut next = origin; let char_count = string.chars().count(); let mut vertex_data = Vec::with_capacity(char_count * 4); @@ -227,6 +254,7 @@ impl UIPresenter where D: Device { let color = if invert { INVERTED_TEXT_COLOR } else { TEXT_COLOR }; self.draw_texture_with_vertex_data(device, + allocator, &vertex_data, &index_data, &self.font_texture, @@ -235,6 +263,7 @@ impl UIPresenter where D: Device { pub fn draw_texture(&self, device: &D, + allocator: &mut GPUMemoryAllocator, origin: Vector2I, texture: &D::Texture, color: ColorU) { @@ -247,7 +276,12 @@ impl UIPresenter where D: Device { DebugTextureVertex::new(position_rect.lower_left(), tex_coord_rect.lower_left()), ]; - self.draw_texture_with_vertex_data(device, &vertex_data, &QUAD_INDICES, texture, color); + self.draw_texture_with_vertex_data(device, + allocator, + &vertex_data, + &QUAD_INDICES, + texture, + color); } pub fn measure_text(&self, string: &str) -> i32 { @@ -268,10 +302,14 @@ impl UIPresenter where D: Device { SEGMENT_SIZE * segment_count as i32 + (segment_count - 1) as i32 } - pub fn draw_solid_rounded_rect(&self, device: &D, rect: RectI, color: ColorU) { + pub fn draw_solid_rounded_rect(&self, + device: &D, + allocator: &mut GPUMemoryAllocator, + rect: RectI, + color: ColorU) { let corner_texture = self.corner_texture(true); let corner_rects = CornerRects::new(device, rect, corner_texture); - self.draw_rounded_rect_corners(device, color, corner_texture, &corner_rects); + self.draw_rounded_rect_corners(device, allocator, color, corner_texture, &corner_rects); let solid_rect_mid = RectI::from_points(corner_rects.upper_left.upper_right(), corner_rects.lower_right.lower_left()); @@ -302,16 +340,21 @@ impl UIPresenter where D: Device { index_data.extend(QUAD_INDICES.iter().map(|&index| index + 8)); self.draw_solid_rects_with_vertex_data(device, + allocator, &vertex_data, &index_data[0..18], color, true); } - pub fn draw_rounded_rect_outline(&self, device: &D, rect: RectI, color: ColorU) { + pub fn draw_rounded_rect_outline(&self, + device: &D, + allocator: &mut GPUMemoryAllocator, + rect: RectI, + color: ColorU) { let corner_texture = self.corner_texture(false); let corner_rects = CornerRects::new(device, rect, corner_texture); - self.draw_rounded_rect_corners(device, color, corner_texture, &corner_rects); + self.draw_rounded_rect_corners(device, allocator, color, corner_texture, &corner_rects); let vertex_data = vec![ DebugSolidVertex::new(corner_rects.upper_left.upper_right()), @@ -325,18 +368,34 @@ impl UIPresenter where D: Device { ]; let index_data = &OUTLINE_RECT_LINE_INDICES; - self.draw_solid_rects_with_vertex_data(device, &vertex_data, index_data, color, false); + self.draw_solid_rects_with_vertex_data(device, + allocator, + &vertex_data, + index_data, + color, + false); } // TODO(pcwalton): `LineSegment2I`. - fn draw_line(&self, device: &D, from: Vector2I, to: Vector2I, color: ColorU) { + fn draw_line(&self, + device: &D, + allocator: &mut GPUMemoryAllocator, + from: Vector2I, + to: Vector2I, + color: ColorU) { let vertex_data = vec![DebugSolidVertex::new(from), DebugSolidVertex::new(to)]; - self.draw_solid_rects_with_vertex_data(device, &vertex_data, &[0, 1], color, false); + self.draw_solid_rects_with_vertex_data(device, + allocator, + &vertex_data, + &[0, 1], + color, + false); } fn draw_rounded_rect_corners(&self, device: &D, + allocator: &mut GPUMemoryAllocator, color: ColorU, texture: &D::Texture, corner_rects: &CornerRects) { @@ -387,7 +446,12 @@ impl UIPresenter where D: Device { index_data.extend(QUAD_INDICES.iter().map(|&index| index + 8)); index_data.extend(QUAD_INDICES.iter().map(|&index| index + 12)); - self.draw_texture_with_vertex_data(device, &vertex_data, &index_data, texture, color); + self.draw_texture_with_vertex_data(device, + allocator, + &vertex_data, + &index_data, + texture, + color); } fn corner_texture(&self, filled: bool) -> &D::Texture { @@ -396,44 +460,66 @@ impl UIPresenter where D: Device { fn draw_texture_with_vertex_data(&self, device: &D, + allocator: &mut GPUMemoryAllocator, vertex_data: &[DebugTextureVertex], index_data: &[u32], texture: &D::Texture, color: ColorU) { - device.allocate_buffer(&self.texture_vertex_array.vertex_buffer, - BufferData::Memory(vertex_data), - BufferTarget::Vertex); - device.allocate_buffer(&self.texture_vertex_array.index_buffer, - BufferData::Memory(index_data), - BufferTarget::Index); + let vertex_buffer_id = + allocator.allocate_buffer::(device, + vertex_data.len() as u64, + BufferTag("TextureVertexDebug")); + let index_buffer_id = allocator.allocate_buffer::(device, + index_data.len() as u64, + BufferTag("TextureIndexDebug")); + { + let vertex_buffer = allocator.get_buffer(vertex_buffer_id); + let index_buffer = allocator.get_buffer(index_buffer_id); + device.upload_to_buffer(&vertex_buffer, 0, vertex_data, BufferTarget::Vertex); + device.upload_to_buffer(&index_buffer, 0, index_data, BufferTarget::Index); - device.draw_elements(index_data.len() as u32, &RenderState { - target: &RenderTarget::Default, - program: &self.texture_program.program, - vertex_array: &self.texture_vertex_array.vertex_array, - primitive: Primitive::Triangles, - textures: &[(&self.texture_program.texture, &texture)], - images: &[], - uniforms: &[ - (&self.texture_program.framebuffer_size_uniform, - UniformData::Vec2(self.framebuffer_size.0.to_f32x2())), - (&self.texture_program.color_uniform, get_color_uniform(color)), - (&self.texture_program.texture_size_uniform, - UniformData::Vec2(device.texture_size(&texture).0.to_f32x2())) - ], - viewport: RectI::new(Vector2I::default(), self.framebuffer_size), - options: RenderOptions { - blend: Some(alpha_blend_state()), - ..RenderOptions::default() - }, - }); + let texture_vertex_array = DebugTextureVertexArray::new(device, + &self.texture_program, + vertex_buffer, + index_buffer); + + device.draw_elements(index_data.len() as u32, &RenderState { + target: &RenderTarget::Default, + program: &self.texture_program.program, + vertex_array: &texture_vertex_array.vertex_array, + primitive: Primitive::Triangles, + textures: &[(&self.texture_program.texture, &texture)], + images: &[], + storage_buffers: &[], + uniforms: &[ + (&self.texture_program.framebuffer_size_uniform, + UniformData::Vec2(self.framebuffer_size.0.to_f32x2())), + (&self.texture_program.color_uniform, get_color_uniform(color)), + (&self.texture_program.texture_size_uniform, + UniformData::Vec2(device.texture_size(&texture).0.to_f32x2())) + ], + viewport: RectI::new(Vector2I::default(), self.framebuffer_size), + options: RenderOptions { + blend: Some(alpha_blend_state()), + ..RenderOptions::default() + }, + }); + } + + allocator.free_buffer(index_buffer_id); + allocator.free_buffer(vertex_buffer_id); } - pub fn draw_button(&mut self, device: &D, origin: Vector2I, texture: &D::Texture) -> bool { + pub fn draw_button(&mut self, + device: &D, + allocator: &mut GPUMemoryAllocator, + origin: Vector2I, + texture: &D::Texture) -> bool { let button_rect = RectI::new(origin, vec2i(BUTTON_WIDTH, BUTTON_HEIGHT)); - self.draw_solid_rounded_rect(device, button_rect, WINDOW_COLOR); - self.draw_rounded_rect_outline(device, button_rect, OUTLINE_COLOR); + self.draw_solid_rounded_rect(device, allocator, button_rect, WINDOW_COLOR); + self.draw_rounded_rect_outline(device, allocator, button_rect, OUTLINE_COLOR); self.draw_texture(device, + allocator, origin + vec2i(PADDING, PADDING), texture, BUTTON_ICON_COLOR); @@ -442,11 +528,13 @@ impl UIPresenter where D: Device { pub fn draw_text_switch(&mut self, device: &D, + allocator: &mut GPUMemoryAllocator, mut origin: Vector2I, segment_labels: &[&str], mut value: u8) -> u8 { if let Some(new_value) = self.draw_segmented_control(device, + allocator, origin, Some(value), segment_labels.len() as u8) { @@ -458,6 +546,7 @@ impl UIPresenter where D: Device { let label_width = self.measure_text(segment_label); let offset = SEGMENT_SIZE / 2 - label_width / 2; self.draw_text(device, + allocator, segment_label, origin + vec2i(offset, 0), segment_index as u8 == value); @@ -469,12 +558,14 @@ impl UIPresenter where D: Device { pub fn draw_image_segmented_control(&mut self, device: &D, + allocator: &mut GPUMemoryAllocator, mut origin: Vector2I, segment_textures: &[&D::Texture], mut value: Option) -> Option { let mut clicked_segment = None; if let Some(segment_index) = self.draw_segmented_control(device, + allocator, origin, value, segment_textures.len() as u8) { @@ -493,7 +584,7 @@ impl UIPresenter where D: Device { TEXT_COLOR }; - self.draw_texture(device, origin + offset, segment_texture, color); + self.draw_texture(device, allocator, origin + offset, segment_texture, color); origin += vec2i(SEGMENT_SIZE + 1, 0); } @@ -502,6 +593,7 @@ impl UIPresenter where D: Device { fn draw_segmented_control(&mut self, device: &D, + allocator: &mut GPUMemoryAllocator, origin: Vector2I, mut value: Option, segment_count: u8) @@ -518,13 +610,14 @@ impl UIPresenter where D: Device { clicked_segment = Some(segment); } - self.draw_solid_rounded_rect(device, widget_rect, WINDOW_COLOR); - self.draw_rounded_rect_outline(device, widget_rect, OUTLINE_COLOR); + self.draw_solid_rounded_rect(device, allocator, widget_rect, WINDOW_COLOR); + self.draw_rounded_rect_outline(device, allocator, widget_rect, OUTLINE_COLOR); if let Some(value) = value { let highlight_size = vec2i(SEGMENT_SIZE, BUTTON_HEIGHT); let x_offset = value as i32 * SEGMENT_SIZE + (value as i32 - 1); self.draw_solid_rounded_rect(device, + allocator, RectI::new(origin + vec2i(x_offset, 0), highlight_size), TEXT_COLOR); } @@ -536,6 +629,7 @@ impl UIPresenter where D: Device { Some(value) if value == prev_segment_index || value == next_segment_index => {} _ => { self.draw_line(device, + allocator, segment_origin, segment_origin + vec2i(0, BUTTON_HEIGHT), TEXT_COLOR); @@ -547,7 +641,11 @@ impl UIPresenter where D: Device { clicked_segment } - pub fn draw_tooltip(&self, device: &D, string: &str, rect: RectI) { + pub fn draw_tooltip(&self, + device: &D, + allocator: &mut GPUMemoryAllocator, + string: &str, + rect: RectI) { if !rect.to_f32().contains_point(self.mouse_position) { return; } @@ -556,8 +654,15 @@ impl UIPresenter where D: Device { let window_size = vec2i(text_size + PADDING * 2, TOOLTIP_HEIGHT); let origin = rect.origin() - vec2i(0, window_size.y() + PADDING); - self.draw_solid_rounded_rect(device, RectI::new(origin, window_size), WINDOW_COLOR); - self.draw_text(device, string, origin + vec2i(PADDING, PADDING + FONT_ASCENT), false); + self.draw_solid_rounded_rect(device, + allocator, + RectI::new(origin, window_size), + WINDOW_COLOR); + self.draw_text(device, + allocator, + string, + origin + vec2i(PADDING, PADDING + FONT_ASCENT), + false); } } @@ -571,7 +676,7 @@ struct DebugTextureProgram where D: Device { impl DebugTextureProgram where D: Device { fn new(device: &D, resources: &dyn ResourceLoader) -> DebugTextureProgram { - let program = device.create_raster_program(resources, "debug_texture"); + let program = device.create_raster_program(resources, "debug/texture"); let framebuffer_size_uniform = device.get_uniform(&program, "FramebufferSize"); let texture_size_uniform = device.get_uniform(&program, "TextureSize"); let color_uniform = device.get_uniform(&program, "Color"); @@ -588,15 +693,14 @@ impl DebugTextureProgram where D: Device { struct DebugTextureVertexArray where D: Device { vertex_array: D::VertexArray, - vertex_buffer: D::Buffer, - index_buffer: D::Buffer, } impl DebugTextureVertexArray where D: Device { - fn new(device: &D, debug_texture_program: &DebugTextureProgram) + fn new(device: &D, + debug_texture_program: &DebugTextureProgram, + vertex_buffer: &D::Buffer, + index_buffer: &D::Buffer) -> DebugTextureVertexArray { - let vertex_buffer = device.create_buffer(BufferUploadMode::Dynamic); - let index_buffer = device.create_buffer(BufferUploadMode::Dynamic); let vertex_array = device.create_vertex_array(); let position_attr = device.get_vertex_attr(&debug_texture_program.program, "Position") @@ -604,8 +708,8 @@ impl DebugTextureVertexArray where D: Device { let tex_coord_attr = device.get_vertex_attr(&debug_texture_program.program, "TexCoord") .unwrap(); - device.bind_buffer(&vertex_array, &vertex_buffer, BufferTarget::Vertex); - device.bind_buffer(&vertex_array, &index_buffer, BufferTarget::Index); + device.bind_buffer(&vertex_array, vertex_buffer, BufferTarget::Vertex); + device.bind_buffer(&vertex_array, index_buffer, BufferTarget::Index); device.configure_vertex_attr(&vertex_array, &position_attr, &VertexAttrDescriptor { size: 2, class: VertexAttrClass::Int, @@ -625,20 +729,20 @@ impl DebugTextureVertexArray where D: Device { buffer_index: 0, }); - DebugTextureVertexArray { vertex_array, vertex_buffer, index_buffer } + DebugTextureVertexArray { vertex_array } } } struct DebugSolidVertexArray where D: Device { vertex_array: D::VertexArray, - vertex_buffer: D::Buffer, - index_buffer: D::Buffer, } impl DebugSolidVertexArray where D: Device { - fn new(device: &D, debug_solid_program: &DebugSolidProgram) -> DebugSolidVertexArray { - let vertex_buffer = device.create_buffer(BufferUploadMode::Dynamic); - let index_buffer = device.create_buffer(BufferUploadMode::Dynamic); + fn new(device: &D, + debug_solid_program: &DebugSolidProgram, + vertex_buffer: &D::Buffer, + index_buffer: &D::Buffer) + -> DebugSolidVertexArray { let vertex_array = device.create_vertex_array(); let position_attr = @@ -655,7 +759,7 @@ impl DebugSolidVertexArray where D: Device { buffer_index: 0, }); - DebugSolidVertexArray { vertex_array, vertex_buffer, index_buffer } + DebugSolidVertexArray { vertex_array } } } @@ -667,7 +771,7 @@ struct DebugSolidProgram where D: Device { impl DebugSolidProgram where D: Device { fn new(device: &D, resources: &dyn ResourceLoader) -> DebugSolidProgram { - let program = device.create_raster_program(resources, "debug_solid"); + let program = device.create_raster_program(resources, "debug/solid"); let framebuffer_size_uniform = device.get_uniform(&program, "FramebufferSize"); let color_uniform = device.get_uniform(&program, "Color"); DebugSolidProgram { program, framebuffer_size_uniform, color_uniform } diff --git a/webgl/src/lib.rs b/webgl/src/lib.rs index 08fd3620..dc5c0464 100644 --- a/webgl/src/lib.rs +++ b/webgl/src/lib.rs @@ -13,6 +13,7 @@ #[macro_use] extern crate log; +use js_sys::{Uint8Array, Uint16Array, Float32Array, Object}; use pathfinder_geometry::rect::RectI; use pathfinder_geometry::vector::Vector2I; use pathfinder_gpu::{BlendFactor, BlendOp, BufferData, BufferTarget, BufferUploadMode, ClearOps}; @@ -24,11 +25,11 @@ use pathfinder_gpu::{VertexAttrClass, VertexAttrDescriptor, VertexAttrType}; use pathfinder_resources::ResourceLoader; use std::cell::RefCell; use std::mem; +use std::ops::Range; use std::str; use std::time::Duration; use wasm_bindgen::JsCast; use web_sys::WebGl2RenderingContext as WebGl; -use js_sys::{Uint8Array, Uint16Array, Float32Array, Object}; pub struct WebGlDevice { context: web_sys::WebGl2RenderingContext, @@ -426,6 +427,7 @@ unsafe fn check_and_extract_data( impl Device for WebGlDevice { type Buffer = WebGlBuffer; + type BufferDataReceiver = (); type Fence = (); type Framebuffer = WebGlFramebuffer; type ImageParameter = (); @@ -943,9 +945,23 @@ impl Device for WebGlDevice { fn recv_timer_query(&self, _query: &WebGlTimerQuery) -> Duration { Duration::from_millis(0) } + + fn try_recv_buffer(&self, _: &()) -> Option> { + unimplemented!() + } + + fn recv_buffer(&self, _: &()) -> Vec { + unimplemented!() + } + + fn read_buffer(&self, _: &Self::Buffer, _: BufferTarget, _: Range) { + unimplemented!() + } + fn try_recv_texture_data(&self, _receiver: &Self::TextureDataReceiver) -> Option { None } + fn recv_texture_data(&self, _receiver: &Self::TextureDataReceiver) -> TextureData { unimplemented!() } @@ -1175,6 +1191,7 @@ impl VertexAttrTypeExt for VertexAttrType { fn to_gl_type(self) -> u32 { match self { VertexAttrType::F32 => WebGl::FLOAT, + VertexAttrType::I32 => WebGl::INT, VertexAttrType::I16 => WebGl::SHORT, VertexAttrType::I8 => WebGl::BYTE, VertexAttrType::U16 => WebGl::UNSIGNED_SHORT,