From 7771fd877db8b8d4d99098d57f34a3d87aff6ee6 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Tue, 23 Jun 2020 12:30:02 -0700 Subject: [PATCH] Add a unified GPU allocator --- gpu/Cargo.toml | 2 + gpu/src/allocator.rs | 400 +++++++++++++++++++++++++++++++++++++++++++ gpu/src/lib.rs | 44 ++++- 3 files changed, 443 insertions(+), 3 deletions(-) create mode 100644 gpu/src/allocator.rs diff --git a/gpu/Cargo.toml b/gpu/Cargo.toml index d9585819..72c023b1 100644 --- a/gpu/Cargo.toml +++ b/gpu/Cargo.toml @@ -10,7 +10,9 @@ homepage = "https://github.com/servo/pathfinder" [dependencies] bitflags = "1.0" +fxhash = "0.2" half = "1.5" +log = "0.4" [dependencies.image] version = "0.23" diff --git a/gpu/src/allocator.rs b/gpu/src/allocator.rs new file mode 100644 index 00000000..fd248c1b --- /dev/null +++ b/gpu/src/allocator.rs @@ -0,0 +1,400 @@ +// pathfinder/gpu/src/gpu/allocator.rs +// +// Copyright © 2020 The Pathfinder Project Developers. +// +// Licensed under the Apache License, Version 2.0 or the MIT license +// , at your +// option. This file may not be copied, modified, or distributed +// except according to those terms. + +//! GPU memory management. + +use crate::{BufferData, BufferTarget, BufferUploadMode, Device, TextureFormat}; +use fxhash::FxHashMap; +use pathfinder_geometry::vector::Vector2I; +use std::collections::VecDeque; +use std::default::Default; +use std::mem; +use std::time::Instant; + +// Everything above 16 MB is allocated exactly. +const MAX_BUFFER_SIZE_CLASS: u64 = 16 * 1024 * 1024; + +// Number of seconds before unused memory is purged. +// +// TODO(pcwalton): jemalloc uses a sigmoidal decay curve here. Consider something similar. +const DECAY_TIME: f32 = 0.250; + +// Number of seconds before we can reuse an object buffer. +// +// This helps avoid stalls. This is admittedly a bit of a hack. +const REUSE_TIME: f32 = 0.015; + +pub struct GPUMemoryAllocator where D: Device { + buffers_in_use: FxHashMap>, + textures_in_use: FxHashMap>, + framebuffers_in_use: FxHashMap>, + free_objects: VecDeque>, + next_buffer_id: BufferID, + next_texture_id: TextureID, + next_framebuffer_id: FramebufferID, + bytes_committed: u64, + bytes_allocated: u64, +} + +struct BufferAllocation where D: Device { + buffer: D::Buffer, + size: u64, + tag: BufferTag, +} + +struct TextureAllocation where D: Device { + texture: D::Texture, + descriptor: TextureDescriptor, + tag: TextureTag, +} + +struct FramebufferAllocation where D: Device { + framebuffer: D::Framebuffer, + descriptor: TextureDescriptor, + tag: FramebufferTag, +} + +struct FreeObject where D: Device { + timestamp: Instant, + kind: FreeObjectKind, +} + +enum FreeObjectKind where D: Device { + Buffer { id: BufferID, allocation: BufferAllocation }, + Texture { id: TextureID, allocation: TextureAllocation }, + Framebuffer { id: FramebufferID, allocation: FramebufferAllocation }, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TextureDescriptor { + width: u32, + height: u32, + format: TextureFormat, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct BufferID(pub u64); + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct TextureID(pub u64); + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct FramebufferID(pub u64); + +// For debugging and profiling. +#[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] +pub struct BufferTag(pub &'static str); + +// For debugging and profiling. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct TextureTag(pub &'static str); + +// For debugging and profiling. +#[derive(Clone, Copy, Debug, PartialEq)] +pub struct FramebufferTag(pub &'static str); + +impl GPUMemoryAllocator where D: Device { + pub fn new() -> GPUMemoryAllocator { + GPUMemoryAllocator { + buffers_in_use: FxHashMap::default(), + textures_in_use: FxHashMap::default(), + framebuffers_in_use: FxHashMap::default(), + free_objects: VecDeque::new(), + next_buffer_id: BufferID(0), + next_texture_id: TextureID(0), + next_framebuffer_id: FramebufferID(0), + bytes_committed: 0, + bytes_allocated: 0, + } + } + + pub fn allocate_buffer(&mut self, device: &D, size: u64, tag: BufferTag) -> BufferID { + let mut byte_size = size * mem::size_of::() as u64; + if byte_size < MAX_BUFFER_SIZE_CLASS { + byte_size = byte_size.next_power_of_two(); + } + + let now = Instant::now(); + + for free_object_index in 0..self.free_objects.len() { + match self.free_objects[free_object_index] { + FreeObject { + ref timestamp, + kind: FreeObjectKind::Buffer { ref allocation, .. }, + } if allocation.size == byte_size && + (now - *timestamp).as_secs_f32() >= REUSE_TIME => {} + _ => continue, + } + + let (id, mut allocation) = match self.free_objects.remove(free_object_index) { + Some(FreeObject { kind: FreeObjectKind::Buffer { id, allocation }, .. }) => { + (id, allocation) + } + _ => unreachable!(), + }; + + allocation.tag = tag; + self.bytes_committed += allocation.size; + self.buffers_in_use.insert(id, allocation); + return id; + } + + let buffer = device.create_buffer(BufferUploadMode::Dynamic); + device.allocate_buffer::(&buffer, + BufferData::Uninitialized(byte_size as usize), + BufferTarget::Vertex); + + let id = self.next_buffer_id; + self.next_buffer_id.0 += 1; + + debug!("mapping buffer: {:?} {} ({}x{}) {:?}", + id, + byte_size, + size, + mem::size_of::(), + tag); + + self.buffers_in_use.insert(id, BufferAllocation { buffer, size: byte_size, tag }); + self.bytes_allocated += byte_size; + self.bytes_committed += byte_size; + + id + } + + pub fn allocate_texture(&mut self, + device: &D, + size: Vector2I, + format: TextureFormat, + tag: TextureTag) + -> TextureID { + let descriptor = TextureDescriptor { + width: size.x() as u32, + height: size.y() as u32, + format, + }; + let byte_size = descriptor.byte_size(); + + for free_object_index in 0..self.free_objects.len() { + match self.free_objects[free_object_index] { + FreeObject { kind: FreeObjectKind::Texture { ref allocation, .. }, .. } if + allocation.descriptor == descriptor => {} + _ => continue, + } + + let (id, mut allocation) = match self.free_objects.remove(free_object_index) { + Some(FreeObject { kind: FreeObjectKind::Texture { id, allocation }, .. }) => { + (id, allocation) + } + _ => unreachable!(), + }; + + allocation.tag = tag; + self.bytes_committed += allocation.descriptor.byte_size(); + self.textures_in_use.insert(id, allocation); + return id; + } + + debug!("mapping texture: {:?} {:?}", descriptor, tag); + + let texture = device.create_texture(format, size); + let id = self.next_texture_id; + self.next_texture_id.0 += 1; + + self.textures_in_use.insert(id, TextureAllocation { texture, descriptor, tag }); + + self.bytes_allocated += byte_size; + self.bytes_committed += byte_size; + + id + } + + pub fn allocate_framebuffer(&mut self, + device: &D, + size: Vector2I, + format: TextureFormat, + tag: FramebufferTag) + -> FramebufferID { + let descriptor = TextureDescriptor { + width: size.x() as u32, + height: size.y() as u32, + format, + }; + let byte_size = descriptor.byte_size(); + + for free_object_index in 0..self.free_objects.len() { + match self.free_objects[free_object_index].kind { + FreeObjectKind::Framebuffer { ref allocation, .. } if allocation.descriptor == + descriptor => {} + _ => continue, + } + + let (id, mut allocation) = match self.free_objects.remove(free_object_index) { + Some(FreeObject { kind: FreeObjectKind::Framebuffer { id, allocation }, .. }) => { + (id, allocation) + } + _ => unreachable!(), + }; + + allocation.tag = tag; + self.bytes_committed += allocation.descriptor.byte_size(); + self.framebuffers_in_use.insert(id, allocation); + return id; + } + + debug!("mapping framebuffer: {:?} {:?}", descriptor, tag); + + let texture = device.create_texture(format, size); + let framebuffer = device.create_framebuffer(texture); + let id = self.next_framebuffer_id; + self.next_framebuffer_id.0 += 1; + + self.framebuffers_in_use.insert(id, FramebufferAllocation { + framebuffer, + descriptor, + tag, + }); + + self.bytes_allocated += byte_size; + self.bytes_committed += byte_size; + + id + } + + pub fn purge_if_needed(&mut self) { + let now = Instant::now(); + loop { + match self.free_objects.front() { + Some(FreeObject { timestamp, .. }) if (now - *timestamp).as_secs_f32() >= + DECAY_TIME => {} + _ => break, + } + match self.free_objects.pop_front() { + None => break, + Some(FreeObject { kind: FreeObjectKind::Buffer { allocation, .. }, .. }) => { + debug!("purging buffer: {}", allocation.size); + self.bytes_allocated -= allocation.size; + } + Some(FreeObject { kind: FreeObjectKind::Texture { allocation, .. }, .. }) => { + debug!("purging texture: {:?}", allocation.descriptor); + self.bytes_allocated -= allocation.descriptor.byte_size(); + } + Some(FreeObject { kind: FreeObjectKind::Framebuffer { allocation, .. }, .. }) => { + debug!("purging framebuffer: {:?}", allocation.descriptor); + self.bytes_allocated -= allocation.descriptor.byte_size(); + } + } + } + } + + pub fn free_buffer(&mut self, id: BufferID) { + let allocation = self.buffers_in_use + .remove(&id) + .expect("Attempted to free unallocated buffer!"); + self.bytes_committed -= allocation.size; + self.free_objects.push_back(FreeObject { + timestamp: Instant::now(), + kind: FreeObjectKind::Buffer { id, allocation }, + }); + } + + pub fn free_texture(&mut self, id: TextureID) { + let allocation = self.textures_in_use + .remove(&id) + .expect("Attempted to free unallocated texture!"); + let byte_size = allocation.descriptor.byte_size(); + self.bytes_committed -= byte_size; + self.free_objects.push_back(FreeObject { + timestamp: Instant::now(), + kind: FreeObjectKind::Texture { id, allocation }, + }); + } + + pub fn free_framebuffer(&mut self, id: FramebufferID) { + let allocation = self.framebuffers_in_use + .remove(&id) + .expect("Attempted to free unallocated framebuffer!"); + let byte_size = allocation.descriptor.byte_size(); + self.bytes_committed -= byte_size; + self.free_objects.push_back(FreeObject { + timestamp: Instant::now(), + kind: FreeObjectKind::Framebuffer { id, allocation }, + }); + } + + pub fn get_buffer(&self, id: BufferID) -> &D::Buffer { + &self.buffers_in_use[&id].buffer + } + + pub fn get_texture(&self, id: TextureID) -> &D::Texture { + &self.textures_in_use[&id].texture + } + + pub fn get_framebuffer(&self, id: FramebufferID) -> &D::Framebuffer { + &self.framebuffers_in_use[&id].framebuffer + } + + #[inline] + pub fn bytes_allocated(&self) -> u64 { + self.bytes_allocated + } + + #[inline] + pub fn bytes_committed(&self) -> u64 { + self.bytes_committed + } + + #[allow(dead_code)] + pub fn dump(&self) { + println!("GPU memory dump"); + println!("---------------"); + + println!("Buffers:"); + let mut ids: Vec = self.buffers_in_use.keys().cloned().collect(); + ids.sort(); + for id in ids { + let allocation = &self.buffers_in_use[&id]; + println!("id {:?}: {:?} ({:?} B)", id, allocation.tag, allocation.size); + } + + println!("Textures:"); + let mut ids: Vec = self.textures_in_use.keys().cloned().collect(); + ids.sort(); + for id in ids { + let allocation = &self.textures_in_use[&id]; + println!("id {:?}: {:?} {:?}x{:?} {:?} ({:?} B)", + id, + allocation.tag, + allocation.descriptor.width, + allocation.descriptor.height, + allocation.descriptor.format, + allocation.descriptor.byte_size()); + } + + println!("Framebuffers:"); + let mut ids: Vec = self.framebuffers_in_use.keys().cloned().collect(); + ids.sort(); + for id in ids { + let allocation = &self.framebuffers_in_use[&id]; + println!("id {:?}: {:?} {:?}x{:?} {:?} ({:?} B)", + id, + allocation.tag, + allocation.descriptor.width, + allocation.descriptor.height, + allocation.descriptor.format, + allocation.descriptor.byte_size()); + } + } +} + +impl TextureDescriptor { + fn byte_size(&self) -> u64 { + self.width as u64 * self.height as u64 * self.format.bytes_per_pixel() as u64 + } +} diff --git a/gpu/src/lib.rs b/gpu/src/lib.rs index 34f7e3ec..440194ba 100644 --- a/gpu/src/lib.rs +++ b/gpu/src/lib.rs @@ -12,6 +12,10 @@ #[macro_use] extern crate bitflags; +#[macro_use] +extern crate log; + +pub mod allocator; use half::f16; use image::ImageFormat; @@ -21,11 +25,13 @@ use pathfinder_geometry::transform3d::Transform4F; use pathfinder_geometry::vector::{Vector2I, vec2i}; use pathfinder_resources::ResourceLoader; use pathfinder_simd::default::{F32x2, F32x4, I32x2}; +use std::ops::Range; use std::os::raw::c_void; use std::time::Duration; pub trait Device: Sized { type Buffer; + type BufferDataReceiver; type Fence; type Framebuffer; type ImageParameter; @@ -40,6 +46,8 @@ pub trait Device: Sized { type VertexArray; type VertexAttr; + fn backend_name(&self) -> &'static str; + fn device_name(&self) -> String; fn feature_level(&self) -> FeatureLevel; fn create_texture(&self, format: TextureFormat, size: Vector2I) -> Self::Texture; fn create_texture_from_data(&self, format: TextureFormat, size: Vector2I, data: TextureDataRef) @@ -90,6 +98,8 @@ pub trait Device: Sized { fn upload_to_texture(&self, texture: &Self::Texture, rect: RectI, data: TextureDataRef); fn read_pixels(&self, target: &RenderTarget, viewport: RectI) -> Self::TextureDataReceiver; + fn read_buffer(&self, buffer: &Self::Buffer, target: BufferTarget, range: Range) + -> Self::BufferDataReceiver; fn begin_commands(&self); fn end_commands(&self); fn draw_arrays(&self, index_count: u32, render_state: &RenderState); @@ -108,6 +118,8 @@ pub trait Device: Sized { fn recv_timer_query(&self, query: &Self::TimerQuery) -> Duration; fn try_recv_texture_data(&self, receiver: &Self::TextureDataReceiver) -> Option; fn recv_texture_data(&self, receiver: &Self::TextureDataReceiver) -> TextureData; + fn try_recv_buffer(&self, receiver: &Self::BufferDataReceiver) -> Option>; + fn recv_buffer(&self, receiver: &Self::BufferDataReceiver) -> Vec; fn create_texture_from_png(&self, resources: &dyn ResourceLoader, @@ -131,6 +143,30 @@ pub trait Device: Sized { } } + fn upload_png_to_texture(&self, + resources: &dyn ResourceLoader, + name: &str, + texture: &Self::Texture, + format: TextureFormat) { + let data = resources.slurp(&format!("textures/{}.png", name)).unwrap(); + let image = image::load_from_memory_with_format(&data, ImageFormat::Png).unwrap(); + match format { + TextureFormat::R8 => { + let image = image.to_luma(); + let size = vec2i(image.width() as i32, image.height() as i32); + let rect = RectI::new(Vector2I::default(), size); + self.upload_to_texture(&texture, rect, TextureDataRef::U8(&image)) + } + TextureFormat::RGBA8 => { + let image = image.to_rgba(); + let size = vec2i(image.width() as i32, image.height() as i32); + let rect = RectI::new(Vector2I::default(), size); + self.upload_to_texture(&texture, rect, TextureDataRef::U8(&image)) + } + _ => unimplemented!(), + } + } + fn create_program_from_shader_names( &self, resources: &dyn ResourceLoader, @@ -170,7 +206,7 @@ pub enum FeatureLevel { D3D11, } -#[derive(Clone, Copy, Debug, PartialEq)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub enum TextureFormat { R8, R16F, @@ -182,10 +218,11 @@ pub enum TextureFormat { #[derive(Clone, Copy, Debug)] pub enum VertexAttrType { F32, - I16, I8, - U16, + I16, + I32, U8, + U16, } #[derive(Clone, Copy, Debug)] @@ -258,6 +295,7 @@ pub struct RenderState<'a, D> where D: Device { pub uniforms: &'a [UniformBinding<'a, D::Uniform>], pub textures: &'a [TextureBinding<'a, D::TextureParameter, D::Texture>], pub images: &'a [ImageBinding<'a, D::ImageParameter, D::Texture>], + pub storage_buffers: &'a [(&'a D::StorageBuffer, &'a D::Buffer)], pub viewport: RectI, pub options: RenderOptions, }