From 2bc0db01656a3d49f0e2dfb30ab4b4c0b5aac237 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Wed, 15 Jun 2022 22:40:20 +0200 Subject: [PATCH 1/4] implement Queue::write_buffer_with --- wgpu-core/src/device/queue.rs | 240 ++++++++++++++++++++++++---------- wgpu-core/src/hub.rs | 6 +- wgpu-core/src/id.rs | 1 + wgpu-core/src/resource.rs | 18 +++ wgpu/src/backend/direct.rs | 57 ++++++++ wgpu/src/backend/mod.rs | 4 +- wgpu/src/backend/web.rs | 35 +++++ wgpu/src/lib.rs | 75 ++++++++++- 8 files changed, 363 insertions(+), 73 deletions(-) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 6e0f589cae..94ec3b69aa 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -8,10 +8,10 @@ use crate::{ conv, device::{DeviceError, WaitIdleError}, get_lowest_common_denom, - hub::{Global, GlobalIdentityHandlerFactory, HalApi, Token}, + hub::{Global, GlobalIdentityHandlerFactory, HalApi, Input, Token}, id, init_tracker::{has_copy_partial_init_tracker_coverage, TextureInitRange}, - resource::{BufferAccessError, BufferMapState, TextureInner}, + resource::{BufferAccessError, BufferMapState, StagingBuffer, TextureInner}, track, FastHashSet, SubmissionIndex, }; @@ -86,28 +86,6 @@ pub struct WrappedSubmissionIndex { pub index: SubmissionIndex, } -struct StagingData { - buffer: A::Buffer, -} - -impl StagingData { - unsafe fn write( - &self, - device: &A::Device, - offset: wgt::BufferAddress, - data: &[u8], - ) -> Result<(), hal::DeviceError> { - let mapping = device.map_buffer(&self.buffer, offset..offset + data.len() as u64)?; - ptr::copy_nonoverlapping(data.as_ptr(), mapping.ptr.as_ptr(), data.len()); - if !mapping.is_coherent { - device - .flush_mapped_ranges(&self.buffer, iter::once(offset..offset + data.len() as u64)); - } - device.unmap_buffer(&self.buffer)?; - Ok(()) - } -} - #[derive(Debug)] pub enum TempResource { Buffer(A::Buffer), @@ -178,8 +156,8 @@ impl PendingWrites { self.temp_resources.push(resource); } - fn consume(&mut self, stage: StagingData) { - self.temp_resources.push(TempResource::Buffer(stage.buffer)); + fn consume(&mut self, buffer: StagingBuffer) { + self.temp_resources.push(TempResource::Buffer(buffer.raw)); } #[must_use] @@ -240,16 +218,38 @@ impl PendingWrites { } impl super::Device { - fn prepare_stage(&mut self, size: wgt::BufferAddress) -> Result, DeviceError> { - profiling::scope!("prepare_stage"); + fn prepare_staging_buffer( + &mut self, + size: wgt::BufferAddress, + ) -> Result<(StagingBuffer, *mut u8), DeviceError> { + profiling::scope!("prepare_staging_buffer"); let stage_desc = hal::BufferDescriptor { label: Some("(wgpu internal) Staging"), size, usage: hal::BufferUses::MAP_WRITE | hal::BufferUses::COPY_SRC, memory_flags: hal::MemoryFlags::TRANSIENT, }; + let buffer = unsafe { self.raw.create_buffer(&stage_desc)? }; - Ok(StagingData { buffer }) + let mapping = unsafe { self.raw.map_buffer(&buffer, 0..size) }?; + + let staging_buffer = StagingBuffer { + raw: buffer, + size, + is_coherent: mapping.is_coherent, + }; + + Ok((staging_buffer, mapping.ptr.as_ptr())) + } +} + +impl StagingBuffer { + unsafe fn flush(&self, device: &A::Device) -> Result<(), DeviceError> { + if !self.is_coherent { + device.flush_mapped_ranges(&self.raw, iter::once(0..self.size)); + } + device.unmap_buffer(&self.raw)?; + Ok(()) } } @@ -305,6 +305,8 @@ impl Global { .map_err(|_| DeviceError::Invalid)?; let (buffer_guard, _) = hub.buffers.read(&mut token); + let data_size = data.len() as wgt::BufferAddress; + #[cfg(feature = "trace")] if let Some(ref trace) = device.trace { let mut trace = trace.lock(); @@ -312,23 +314,132 @@ impl Global { trace.add(Action::WriteBuffer { id: buffer_id, data: data_path, - range: buffer_offset..buffer_offset + data.len() as wgt::BufferAddress, + range: buffer_offset..buffer_offset + data_size, queued: true, }); } - let data_size = data.len() as wgt::BufferAddress; if data_size == 0 { log::trace!("Ignoring write_buffer of size 0"); return Ok(()); } - let stage = device.prepare_stage(data_size)?; + let (staging_buffer, staging_buffer_ptr) = device.prepare_staging_buffer(data_size)?; + + profiling::scope!("copy"); unsafe { - profiling::scope!("copy"); - stage.write(&device.raw, 0, data) + ptr::copy_nonoverlapping(data.as_ptr(), staging_buffer_ptr, data.len()); + staging_buffer.flush(&device.raw)?; + }; + + let mut trackers = device.trackers.lock(); + let (dst, transition) = trackers + .buffers + .set_single(&*buffer_guard, buffer_id, hal::BufferUses::COPY_DST) + .ok_or(TransferError::InvalidBuffer(buffer_id))?; + let dst_raw = dst + .raw + .as_ref() + .ok_or(TransferError::InvalidBuffer(buffer_id))?; + if !dst.usage.contains(wgt::BufferUsages::COPY_DST) { + return Err(TransferError::MissingCopyDstUsageFlag(Some(buffer_id), None).into()); } - .map_err(DeviceError::from)?; + dst.life_guard.use_at(device.active_submission_index + 1); + + if data_size % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(TransferError::UnalignedCopySize(data_size).into()); + } + if buffer_offset % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(TransferError::UnalignedBufferOffset(buffer_offset).into()); + } + if buffer_offset + data_size > dst.size { + return Err(TransferError::BufferOverrun { + start_offset: buffer_offset, + end_offset: buffer_offset + data_size, + buffer_size: dst.size, + side: CopySide::Destination, + } + .into()); + } + + let region = wgt::BufferSize::new(data_size).map(|size| hal::BufferCopy { + src_offset: 0, + dst_offset: buffer_offset, + size, + }); + let barriers = iter::once(hal::BufferBarrier { + buffer: &staging_buffer.raw, + usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, + }) + .chain(transition.map(|pending| pending.into_hal(dst))); + let encoder = device.pending_writes.activate(); + unsafe { + encoder.transition_buffers(barriers); + encoder.copy_buffer_to_buffer(&staging_buffer.raw, dst_raw, region.into_iter()); + } + + device.pending_writes.consume(staging_buffer); + device.pending_writes.dst_buffers.insert(buffer_id); + + // Ensure the overwritten bytes are marked as initialized so they don't need to be nulled prior to mapping or binding. + { + drop(buffer_guard); + let (mut buffer_guard, _) = hub.buffers.write(&mut token); + + let dst = buffer_guard.get_mut(buffer_id).unwrap(); + dst.initialization_status + .drain(buffer_offset..(buffer_offset + data_size)); + } + + Ok(()) + } + + pub fn queue_create_staging_buffer( + &self, + queue_id: id::QueueId, + buffer_size: wgt::BufferSize, + id_in: Input, + ) -> Result<(id::StagingBufferId, *mut u8), QueueWriteError> { + let hub = A::hub(self); + let mut token = Token::root(); + let fid = hub.staging_buffers.prepare(id_in); + let (mut device_guard, mut token) = hub.devices.write(&mut token); + let device = device_guard + .get_mut(queue_id) + .map_err(|_| DeviceError::Invalid)?; + + let data_size = buffer_size.get(); + + let (staging_buffer, staging_buffer_ptr) = device.prepare_staging_buffer(data_size)?; + + let id = fid.assign(staging_buffer, &mut token); + Ok((id.0, staging_buffer_ptr)) + } + + pub fn queue_write_staging_buffer( + &self, + queue_id: id::QueueId, + buffer_id: id::BufferId, + buffer_offset: wgt::BufferAddress, + staging_buffer: id::StagingBufferId, + ) -> Result<(), QueueWriteError> { + profiling::scope!("write_buffer_with", "Queue"); + + let hub = A::hub(self); + let mut token = Token::root(); + let (mut device_guard, mut token) = hub.devices.write(&mut token); + let device = device_guard + .get_mut(queue_id) + .map_err(|_| DeviceError::Invalid)?; + + let (src_buffer, _) = hub.staging_buffers.unregister(staging_buffer, &mut token); + let src_buffer = src_buffer.ok_or(TransferError::InvalidBuffer(buffer_id))?; + + let data_size = src_buffer.size; + + unsafe { src_buffer.flush(&device.raw)? }; + + let (buffer_guard, _) = hub.buffers.read(&mut token); let mut trackers = device.trackers.lock(); let (dst, transition) = trackers @@ -360,23 +471,23 @@ impl Global { .into()); } - let region = wgt::BufferSize::new(data.len() as u64).map(|size| hal::BufferCopy { + let region = wgt::BufferSize::new(data_size).map(|size| hal::BufferCopy { src_offset: 0, dst_offset: buffer_offset, size, }); let barriers = iter::once(hal::BufferBarrier { - buffer: &stage.buffer, + buffer: &src_buffer.raw, usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, }) .chain(transition.map(|pending| pending.into_hal(dst))); let encoder = device.pending_writes.activate(); unsafe { encoder.transition_buffers(barriers); - encoder.copy_buffer_to_buffer(&stage.buffer, dst_raw, region.into_iter()); + encoder.copy_buffer_to_buffer(&src_buffer.raw, dst_raw, region.into_iter()); } - device.pending_writes.consume(stage); + device.pending_writes.consume(src_buffer); device.pending_writes.dst_buffers.insert(buffer_id); // Ensure the overwritten bytes are marked as initialized so they don't need to be nulled prior to mapping or binding. @@ -469,7 +580,7 @@ impl Global { let block_rows_in_copy = (size.depth_or_array_layers - 1) * block_rows_per_image + height_blocks; let stage_size = stage_bytes_per_row as u64 * block_rows_in_copy as u64; - let stage = device.prepare_stage(stage_size)?; + let (staging_buffer, staging_buffer_ptr) = device.prepare_staging_buffer(stage_size)?; let dst = texture_guard.get_mut(destination.texture).unwrap(); if !dst.desc.usage.contains(wgt::TextureUsages::COPY_DST) { @@ -538,30 +649,30 @@ impl Global { width_blocks * format_desc.block_size as u32 }; - let mapping = unsafe { device.raw.map_buffer(&stage.buffer, 0..stage_size) } - .map_err(DeviceError::from)?; - unsafe { - if stage_bytes_per_row == bytes_per_row { - profiling::scope!("copy aligned"); - // Fast path if the data is already being aligned optimally. + if stage_bytes_per_row == bytes_per_row { + profiling::scope!("copy aligned"); + // Fast path if the data is already being aligned optimally. + unsafe { ptr::copy_nonoverlapping( data.as_ptr().offset(data_layout.offset as isize), - mapping.ptr.as_ptr(), + staging_buffer_ptr, stage_size as usize, ); - } else { - profiling::scope!("copy chunked"); - // Copy row by row into the optimal alignment. - let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize; - for layer in 0..size.depth_or_array_layers { - let rows_offset = layer * block_rows_per_image; - for row in 0..height_blocks { + } + } else { + profiling::scope!("copy chunked"); + // Copy row by row into the optimal alignment. + let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize; + for layer in 0..size.depth_or_array_layers { + let rows_offset = layer * block_rows_per_image; + for row in 0..height_blocks { + unsafe { ptr::copy_nonoverlapping( data.as_ptr().offset( data_layout.offset as isize + (rows_offset + row) as isize * bytes_per_row as isize, ), - mapping.ptr.as_ptr().offset( + staging_buffer_ptr.offset( (rows_offset + row) as isize * stage_bytes_per_row as isize, ), copy_bytes_per_row, @@ -570,17 +681,8 @@ impl Global { } } } - unsafe { - if !mapping.is_coherent { - device - .raw - .flush_mapped_ranges(&stage.buffer, iter::once(0..stage_size)); - } - device - .raw - .unmap_buffer(&stage.buffer) - .map_err(DeviceError::from)?; - } + + unsafe { staging_buffer.flush(&device.raw) }?; let regions = (0..array_layer_count).map(|rel_array_layer| { let mut texture_base = dst_base.clone(); @@ -598,7 +700,7 @@ impl Global { } }); let barrier = hal::BufferBarrier { - buffer: &stage.buffer, + buffer: &staging_buffer.raw, usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, }; @@ -611,10 +713,10 @@ impl Global { encoder .transition_textures(transition.map(|pending| pending.into_hal(dst)).into_iter()); encoder.transition_buffers(iter::once(barrier)); - encoder.copy_buffer_to_texture(&stage.buffer, dst_raw, regions); + encoder.copy_buffer_to_texture(&staging_buffer.raw, dst_raw, regions); } - device.pending_writes.consume(stage); + device.pending_writes.consume(staging_buffer); device .pending_writes .dst_textures diff --git a/wgpu-core/src/hub.rs b/wgpu-core/src/hub.rs index 4e8ddf78f6..4fd4263913 100644 --- a/wgpu-core/src/hub.rs +++ b/wgpu-core/src/hub.rs @@ -5,7 +5,7 @@ use crate::{ id, instance::{Adapter, HalSurface, Instance, Surface}, pipeline::{ComputePipeline, RenderPipeline, ShaderModule}, - resource::{Buffer, QuerySet, Sampler, Texture, TextureClearMode, TextureView}, + resource::{Buffer, QuerySet, Sampler, StagingBuffer, Texture, TextureClearMode, TextureView}, Epoch, Index, }; @@ -351,6 +351,7 @@ impl Access> for CommandBuffer {} impl Access> for ComputePipeline {} impl Access> for RenderPipeline {} impl Access> for QuerySet {} +impl Access> for Device {} impl Access> for Root {} impl Access> for Device {} impl Access> for Buffer {} @@ -452,6 +453,7 @@ pub trait GlobalIdentityHandlerFactory: + IdentityHandlerFactory + IdentityHandlerFactory + IdentityHandlerFactory + + IdentityHandlerFactory + IdentityHandlerFactory + IdentityHandlerFactory + IdentityHandlerFactory @@ -639,6 +641,7 @@ pub struct Hub { pub compute_pipelines: Registry, id::ComputePipelineId, F>, pub query_sets: Registry, id::QuerySetId, F>, pub buffers: Registry, id::BufferId, F>, + pub staging_buffers: Registry, id::StagingBufferId, F>, pub textures: Registry, id::TextureId, F>, pub texture_views: Registry, id::TextureViewId, F>, pub samplers: Registry, id::SamplerId, F>, @@ -659,6 +662,7 @@ impl Hub { compute_pipelines: Registry::new(A::VARIANT, factory), query_sets: Registry::new(A::VARIANT, factory), buffers: Registry::new(A::VARIANT, factory), + staging_buffers: Registry::new(A::VARIANT, factory), textures: Registry::new(A::VARIANT, factory), texture_views: Registry::new(A::VARIANT, factory), samplers: Registry::new(A::VARIANT, factory), diff --git a/wgpu-core/src/id.rs b/wgpu-core/src/id.rs index ea28fec642..3f5ec0e3a0 100644 --- a/wgpu-core/src/id.rs +++ b/wgpu-core/src/id.rs @@ -196,6 +196,7 @@ pub type DeviceId = Id>; pub type QueueId = DeviceId; // Resource pub type BufferId = Id>; +pub type StagingBufferId = Id>; pub type TextureViewId = Id>; pub type TextureId = Id>; pub type SamplerId = Id>; diff --git a/wgpu-core/src/resource.rs b/wgpu-core/src/resource.rs index dffe52c0be..2fa02ff57b 100644 --- a/wgpu-core/src/resource.rs +++ b/wgpu-core/src/resource.rs @@ -184,6 +184,24 @@ impl Resource for Buffer { } } +pub struct StagingBuffer { + pub(crate) raw: A::Buffer, + pub(crate) size: wgt::BufferAddress, + pub(crate) is_coherent: bool, +} + +impl Resource for StagingBuffer { + const TYPE: &'static str = "StagingBuffer"; + + fn life_guard(&self) -> &LifeGuard { + unreachable!() + } + + fn label(&self) -> &str { + "" + } +} + pub type TextureDescriptor<'a> = wgt::TextureDescriptor>; #[derive(Debug)] diff --git a/wgpu/src/backend/direct.rs b/wgpu/src/backend/direct.rs index 1d9a32d9a6..193ebe3a40 100644 --- a/wgpu/src/backend/direct.rs +++ b/wgpu/src/backend/direct.rs @@ -2180,6 +2180,42 @@ impl crate::Context for Context { } } + fn queue_create_staging_buffer( + &self, + queue: &Self::QueueId, + size: wgt::BufferSize, + ) -> QueueWriteBuffer { + let global = &self.0; + match wgc::gfx_select!( + *queue => global.queue_create_staging_buffer(*queue, size, PhantomData) + ) { + Ok((buffer_id, ptr)) => QueueWriteBuffer { + buffer_id, + mapping: BufferMappedRange { + ptr, + size: size.get() as usize, + }, + }, + Err(err) => self.handle_error_fatal(err, "Queue::write_buffer_with"), + } + } + + fn queue_write_staging_buffer( + &self, + queue: &Self::QueueId, + buffer: &Self::BufferId, + offset: wgt::BufferAddress, + staging_buffer: &QueueWriteBuffer, + ) { + let global = &self.0; + match wgc::gfx_select!( + *queue => global.queue_write_staging_buffer(*queue, buffer.id, offset, staging_buffer.buffer_id) + ) { + Ok(()) => (), + Err(err) => self.handle_error_fatal(err, "Queue::write_buffer_with"), + } + } + fn queue_write_texture( &self, queue: &Self::QueueId, @@ -2312,6 +2348,27 @@ fn default_error_handler(err: crate::Error) { panic!("wgpu error: {}\n", err); } +#[derive(Debug)] +pub struct QueueWriteBuffer { + buffer_id: wgc::id::StagingBufferId, + mapping: BufferMappedRange, +} + +impl std::ops::Deref for QueueWriteBuffer { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + panic!("QueueWriteBuffer is write-only!"); + } +} + +impl std::ops::DerefMut for QueueWriteBuffer { + fn deref_mut(&mut self) -> &mut Self::Target { + use crate::BufferMappedRangeSlice; + self.mapping.slice_mut() + } +} + #[derive(Debug)] pub struct BufferMappedRange { ptr: *mut u8, diff --git a/wgpu/src/backend/mod.rs b/wgpu/src/backend/mod.rs index abd090e086..d3f88c6928 100644 --- a/wgpu/src/backend/mod.rs +++ b/wgpu/src/backend/mod.rs @@ -1,9 +1,9 @@ #[cfg(all(target_arch = "wasm32", not(feature = "webgl")))] mod web; #[cfg(all(target_arch = "wasm32", not(feature = "webgl")))] -pub(crate) use web::{BufferMappedRange, Context}; +pub(crate) use web::{BufferMappedRange, Context, QueueWriteBuffer}; #[cfg(any(not(target_arch = "wasm32"), feature = "webgl"))] mod direct; #[cfg(any(not(target_arch = "wasm32"), feature = "webgl"))] -pub(crate) use direct::{BufferMappedRange, Context}; +pub(crate) use direct::{BufferMappedRange, Context, QueueWriteBuffer}; diff --git a/wgpu/src/backend/web.rs b/wgpu/src/backend/web.rs index af496942c8..d5df5021c8 100644 --- a/wgpu/src/backend/web.rs +++ b/wgpu/src/backend/web.rs @@ -2175,6 +2175,24 @@ impl crate::Context for Context { ); } + fn queue_create_staging_buffer( + &self, + _queue: &Self::QueueId, + size: wgt::BufferSize, + ) -> QueueWriteBuffer { + QueueWriteBuffer(vec![0; size.get() as usize].into_boxed_slice()) + } + + fn queue_write_staging_buffer( + &self, + queue: &Self::QueueId, + buffer: &Self::BufferId, + offset: wgt::BufferAddress, + staging_buffer: &QueueWriteBuffer, + ) { + self.queue_write_buffer(queue, buffer, offset, staging_buffer) + } + fn queue_write_texture( &self, queue: &Self::QueueId, @@ -2240,6 +2258,23 @@ impl crate::Context for Context { pub(crate) type SurfaceOutputDetail = (); +#[derive(Debug)] +pub struct QueueWriteBuffer(Box<[u8]>); + +impl std::ops::Deref for QueueWriteBuffer { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + panic!("QueueWriteBuffer is write-only!"); + } +} + +impl std::ops::DerefMut for QueueWriteBuffer { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + #[derive(Debug)] pub struct BufferMappedRange { actual_mapping: js_sys::Uint8Array, diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index 3d163bb5cc..dd54761eb3 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -43,7 +43,7 @@ pub use wgt::{ QUERY_SIZE, VERTEX_STRIDE_ALIGNMENT, }; -use backend::{BufferMappedRange, Context as C}; +use backend::{BufferMappedRange, Context as C, QueueWriteBuffer}; /// Filter for error scopes. #[derive(Clone, Copy, Debug, PartialEq, PartialOrd)] @@ -481,6 +481,18 @@ trait Context: Debug + Send + Sized + Sync { offset: BufferAddress, data: &[u8], ); + fn queue_create_staging_buffer( + &self, + queue: &Self::QueueId, + size: BufferSize, + ) -> QueueWriteBuffer; + fn queue_write_staging_buffer( + &self, + queue: &Self::QueueId, + buffer: &Self::BufferId, + offset: BufferAddress, + staging_buffer: &QueueWriteBuffer, + ); fn queue_write_texture( &self, queue: &Self::QueueId, @@ -3355,6 +3367,40 @@ impl<'a> RenderBundleEncoder<'a> { } } +/// A write-only view into a staging buffer +pub struct QueueWriteBufferView<'a> { + queue: &'a Queue, + buffer: &'a Buffer, + offset: BufferAddress, + inner: QueueWriteBuffer, +} + +impl<'a> std::ops::Deref for QueueWriteBufferView<'a> { + type Target = [u8]; + + fn deref(&self) -> &Self::Target { + panic!("QueueWriteBufferView is write-only!"); + } +} + +impl<'a> std::ops::DerefMut for QueueWriteBufferView<'a> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +impl<'a> Drop for QueueWriteBufferView<'a> { + fn drop(&mut self) { + Context::queue_write_staging_buffer( + &*self.queue.context, + &self.queue.id, + &self.buffer.id, + self.offset, + &self.inner, + ); + } +} + impl Queue { /// Schedule a data write into `buffer` starting at `offset`. /// @@ -3367,6 +3413,33 @@ impl Queue { Context::queue_write_buffer(&*self.context, &self.id, &buffer.id, offset, data) } + /// Schedule a data write into `buffer` starting at `offset` via the returned [QueueWriteBufferView]. + /// + /// The returned value can be dereferenced to a `&mut [u8]`. + /// + /// Dropping the returned value fails if `size` is greater than the size of `buffer` starting at `offset`. + /// + /// Dereferencing the returned value to a `&[u8]` panics! + /// + /// This method is intended to have low performance costs. + /// As such, the write is not immediately submitted, and instead enqueued + /// internally to happen at the start of the next `submit()` call. + #[must_use] + pub fn write_buffer_with<'a>( + &'a self, + buffer: &'a Buffer, + offset: BufferAddress, + size: BufferSize, + ) -> QueueWriteBufferView<'a> { + let staging_buffer = Context::queue_create_staging_buffer(&*self.context, &self.id, size); + QueueWriteBufferView { + queue: self, + buffer, + offset, + inner: staging_buffer, + } + } + /// Schedule a data write into `texture`. /// /// This method is intended to have low performance costs. From 75913fd32078249dabe8c51d1d50a721b4bcdaa6 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Thu, 23 Jun 2022 13:58:11 +0200 Subject: [PATCH 2/4] address comments --- wgpu-core/src/device/queue.rs | 213 ++++++++++++++++++---------------- wgpu/src/backend/direct.rs | 16 +++ wgpu/src/backend/web.rs | 10 ++ wgpu/src/lib.rs | 8 ++ 4 files changed, 144 insertions(+), 103 deletions(-) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 94ec3b69aa..0d831430d0 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -298,12 +298,12 @@ impl Global { profiling::scope!("write_buffer", "Queue"); let hub = A::hub(self); - let mut token = Token::root(); - let (mut device_guard, mut token) = hub.devices.write(&mut token); + let root_token = &mut Token::root(); + + let (mut device_guard, ref mut device_token) = hub.devices.write(root_token); let device = device_guard .get_mut(queue_id) .map_err(|_| DeviceError::Invalid)?; - let (buffer_guard, _) = hub.buffers.read(&mut token); let data_size = data.len() as wgt::BufferAddress; @@ -332,66 +332,13 @@ impl Global { staging_buffer.flush(&device.raw)?; }; - let mut trackers = device.trackers.lock(); - let (dst, transition) = trackers - .buffers - .set_single(&*buffer_guard, buffer_id, hal::BufferUses::COPY_DST) - .ok_or(TransferError::InvalidBuffer(buffer_id))?; - let dst_raw = dst - .raw - .as_ref() - .ok_or(TransferError::InvalidBuffer(buffer_id))?; - if !dst.usage.contains(wgt::BufferUsages::COPY_DST) { - return Err(TransferError::MissingCopyDstUsageFlag(Some(buffer_id), None).into()); - } - dst.life_guard.use_at(device.active_submission_index + 1); - - if data_size % wgt::COPY_BUFFER_ALIGNMENT != 0 { - return Err(TransferError::UnalignedCopySize(data_size).into()); - } - if buffer_offset % wgt::COPY_BUFFER_ALIGNMENT != 0 { - return Err(TransferError::UnalignedBufferOffset(buffer_offset).into()); - } - if buffer_offset + data_size > dst.size { - return Err(TransferError::BufferOverrun { - start_offset: buffer_offset, - end_offset: buffer_offset + data_size, - buffer_size: dst.size, - side: CopySide::Destination, - } - .into()); - } - - let region = wgt::BufferSize::new(data_size).map(|size| hal::BufferCopy { - src_offset: 0, - dst_offset: buffer_offset, - size, - }); - let barriers = iter::once(hal::BufferBarrier { - buffer: &staging_buffer.raw, - usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, - }) - .chain(transition.map(|pending| pending.into_hal(dst))); - let encoder = device.pending_writes.activate(); - unsafe { - encoder.transition_buffers(barriers); - encoder.copy_buffer_to_buffer(&staging_buffer.raw, dst_raw, region.into_iter()); - } - - device.pending_writes.consume(staging_buffer); - device.pending_writes.dst_buffers.insert(buffer_id); - - // Ensure the overwritten bytes are marked as initialized so they don't need to be nulled prior to mapping or binding. - { - drop(buffer_guard); - let (mut buffer_guard, _) = hub.buffers.write(&mut token); - - let dst = buffer_guard.get_mut(buffer_id).unwrap(); - dst.initialization_status - .drain(buffer_offset..(buffer_offset + data_size)); - } - - Ok(()) + self.queue_write_staging_buffer_impl( + device, + device_token, + staging_buffer, + buffer_id, + buffer_offset, + ) } pub fn queue_create_staging_buffer( @@ -401,18 +348,19 @@ impl Global { id_in: Input, ) -> Result<(id::StagingBufferId, *mut u8), QueueWriteError> { let hub = A::hub(self); - let mut token = Token::root(); - let fid = hub.staging_buffers.prepare(id_in); - let (mut device_guard, mut token) = hub.devices.write(&mut token); + let root_token = &mut Token::root(); + + let (mut device_guard, ref mut device_token) = hub.devices.write(root_token); let device = device_guard .get_mut(queue_id) .map_err(|_| DeviceError::Invalid)?; - let data_size = buffer_size.get(); + let (staging_buffer, staging_buffer_ptr) = + device.prepare_staging_buffer(buffer_size.get())?; - let (staging_buffer, staging_buffer_ptr) = device.prepare_staging_buffer(data_size)?; + let fid = hub.staging_buffers.prepare(id_in); + let id = fid.assign(staging_buffer, device_token); - let id = fid.assign(staging_buffer, &mut token); Ok((id.0, staging_buffer_ptr)) } @@ -421,83 +369,142 @@ impl Global { queue_id: id::QueueId, buffer_id: id::BufferId, buffer_offset: wgt::BufferAddress, - staging_buffer: id::StagingBufferId, + staging_buffer_id: id::StagingBufferId, ) -> Result<(), QueueWriteError> { profiling::scope!("write_buffer_with", "Queue"); let hub = A::hub(self); - let mut token = Token::root(); - let (mut device_guard, mut token) = hub.devices.write(&mut token); + let root_token = &mut Token::root(); + + let (mut device_guard, ref mut device_token) = hub.devices.write(root_token); let device = device_guard .get_mut(queue_id) .map_err(|_| DeviceError::Invalid)?; - let (src_buffer, _) = hub.staging_buffers.unregister(staging_buffer, &mut token); - let src_buffer = src_buffer.ok_or(TransferError::InvalidBuffer(buffer_id))?; + let staging_buffer = hub + .staging_buffers + .unregister(staging_buffer_id, device_token) + .0 + .ok_or(TransferError::InvalidBuffer(buffer_id))?; + + unsafe { staging_buffer.flush(&device.raw)? }; + + self.queue_write_staging_buffer_impl( + device, + device_token, + staging_buffer, + buffer_id, + buffer_offset, + ) + } + + pub fn queue_validate_write_buffer( + &self, + _queue_id: id::QueueId, + buffer_id: id::BufferId, + buffer_offset: u64, + buffer_size: u64, + ) -> Result<(), QueueWriteError> { + let hub = A::hub(self); + let root_token = &mut Token::root(); - let data_size = src_buffer.size; + let (_, ref mut device_token) = hub.devices.read(root_token); - unsafe { src_buffer.flush(&device.raw)? }; + let buffer_guard = hub.buffers.read(device_token).0; + let buffer = buffer_guard + .get(buffer_id) + .map_err(|_| TransferError::InvalidBuffer(buffer_id))?; - let (buffer_guard, _) = hub.buffers.read(&mut token); + self.queue_validate_write_buffer_impl(buffer, buffer_id, buffer_offset, buffer_size)?; + + Ok(()) + } + + fn queue_validate_write_buffer_impl( + &self, + buffer: &super::resource::Buffer, + buffer_id: id::BufferId, + buffer_offset: u64, + buffer_size: u64, + ) -> Result<(), TransferError> { + if !buffer.usage.contains(wgt::BufferUsages::COPY_DST) { + return Err(TransferError::MissingCopyDstUsageFlag( + Some(buffer_id), + None, + )); + } + if buffer_size % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(TransferError::UnalignedCopySize(buffer_size)); + } + if buffer_offset % wgt::COPY_BUFFER_ALIGNMENT != 0 { + return Err(TransferError::UnalignedBufferOffset(buffer_offset)); + } + if buffer_offset + buffer_size > buffer.size { + return Err(TransferError::BufferOverrun { + start_offset: buffer_offset, + end_offset: buffer_offset + buffer_size, + buffer_size: buffer.size, + side: CopySide::Destination, + }); + } + + Ok(()) + } + + fn queue_write_staging_buffer_impl( + &self, + device: &mut super::Device, + device_token: &mut Token>, + staging_buffer: StagingBuffer, + buffer_id: id::BufferId, + buffer_offset: u64, + ) -> Result<(), QueueWriteError> { + let hub = A::hub(self); + + let buffer_guard = hub.buffers.read(device_token).0; let mut trackers = device.trackers.lock(); let (dst, transition) = trackers .buffers - .set_single(&*buffer_guard, buffer_id, hal::BufferUses::COPY_DST) + .set_single(&buffer_guard, buffer_id, hal::BufferUses::COPY_DST) .ok_or(TransferError::InvalidBuffer(buffer_id))?; let dst_raw = dst .raw .as_ref() .ok_or(TransferError::InvalidBuffer(buffer_id))?; - if !dst.usage.contains(wgt::BufferUsages::COPY_DST) { - return Err(TransferError::MissingCopyDstUsageFlag(Some(buffer_id), None).into()); - } - dst.life_guard.use_at(device.active_submission_index + 1); - if data_size % wgt::COPY_BUFFER_ALIGNMENT != 0 { - return Err(TransferError::UnalignedCopySize(data_size).into()); - } - if buffer_offset % wgt::COPY_BUFFER_ALIGNMENT != 0 { - return Err(TransferError::UnalignedBufferOffset(buffer_offset).into()); - } - if buffer_offset + data_size > dst.size { - return Err(TransferError::BufferOverrun { - start_offset: buffer_offset, - end_offset: buffer_offset + data_size, - buffer_size: dst.size, - side: CopySide::Destination, - } - .into()); - } + let src_buffer_size = staging_buffer.size; + self.queue_validate_write_buffer_impl(dst, buffer_id, buffer_offset, src_buffer_size)?; + + dst.life_guard.use_at(device.active_submission_index + 1); - let region = wgt::BufferSize::new(data_size).map(|size| hal::BufferCopy { + let region = wgt::BufferSize::new(src_buffer_size).map(|size| hal::BufferCopy { src_offset: 0, dst_offset: buffer_offset, size, }); let barriers = iter::once(hal::BufferBarrier { - buffer: &src_buffer.raw, + buffer: &staging_buffer.raw, usage: hal::BufferUses::MAP_WRITE..hal::BufferUses::COPY_SRC, }) .chain(transition.map(|pending| pending.into_hal(dst))); let encoder = device.pending_writes.activate(); unsafe { encoder.transition_buffers(barriers); - encoder.copy_buffer_to_buffer(&src_buffer.raw, dst_raw, region.into_iter()); + encoder.copy_buffer_to_buffer(&staging_buffer.raw, dst_raw, region.into_iter()); } - device.pending_writes.consume(src_buffer); + device.pending_writes.consume(staging_buffer); device.pending_writes.dst_buffers.insert(buffer_id); // Ensure the overwritten bytes are marked as initialized so they don't need to be nulled prior to mapping or binding. { drop(buffer_guard); - let (mut buffer_guard, _) = hub.buffers.write(&mut token); + let mut buffer_guard = hub.buffers.write(device_token).0; let dst = buffer_guard.get_mut(buffer_id).unwrap(); dst.initialization_status - .drain(buffer_offset..(buffer_offset + data_size)); + .drain(buffer_offset..(buffer_offset + src_buffer_size)); } Ok(()) diff --git a/wgpu/src/backend/direct.rs b/wgpu/src/backend/direct.rs index 193ebe3a40..e63852546c 100644 --- a/wgpu/src/backend/direct.rs +++ b/wgpu/src/backend/direct.rs @@ -2180,6 +2180,22 @@ impl crate::Context for Context { } } + fn queue_validate_write_buffer( + &self, + queue: &Self::QueueId, + buffer: &Self::BufferId, + offset: wgt::BufferAddress, + size: wgt::BufferSize, + ) { + let global = &self.0; + match wgc::gfx_select!( + *queue => global.queue_validate_write_buffer(*queue, buffer.id, offset, size.get()) + ) { + Ok(()) => (), + Err(err) => self.handle_error_fatal(err, "Queue::write_buffer_with"), + } + } + fn queue_create_staging_buffer( &self, queue: &Self::QueueId, diff --git a/wgpu/src/backend/web.rs b/wgpu/src/backend/web.rs index d5df5021c8..2658de6632 100644 --- a/wgpu/src/backend/web.rs +++ b/wgpu/src/backend/web.rs @@ -2175,6 +2175,16 @@ impl crate::Context for Context { ); } + fn queue_validate_write_buffer( + &self, + _queue: &Self::QueueId, + _buffer: &Self::BufferId, + _offset: wgt::BufferAddress, + _size: wgt::BufferSize, + ) { + // TODO + } + fn queue_create_staging_buffer( &self, _queue: &Self::QueueId, diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index dd54761eb3..d108775e83 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -481,6 +481,13 @@ trait Context: Debug + Send + Sized + Sync { offset: BufferAddress, data: &[u8], ); + fn queue_validate_write_buffer( + &self, + queue: &Self::QueueId, + buffer: &Self::BufferId, + offset: wgt::BufferAddress, + size: wgt::BufferSize, + ); fn queue_create_staging_buffer( &self, queue: &Self::QueueId, @@ -3431,6 +3438,7 @@ impl Queue { offset: BufferAddress, size: BufferSize, ) -> QueueWriteBufferView<'a> { + Context::queue_validate_write_buffer(&*self.context, &self.id, &buffer.id, offset, size); let staging_buffer = Context::queue_create_staging_buffer(&*self.context, &self.id, size); QueueWriteBufferView { queue: self, From 3bfd1249db9b5eb935d9ceb473f63f6dc44944ab Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Thu, 23 Jun 2022 14:10:24 +0200 Subject: [PATCH 3/4] update doc --- wgpu/src/lib.rs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index d108775e83..8d6c2e2712 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -3422,15 +3422,13 @@ impl Queue { /// Schedule a data write into `buffer` starting at `offset` via the returned [QueueWriteBufferView]. /// - /// The returned value can be dereferenced to a `&mut [u8]`. - /// - /// Dropping the returned value fails if `size` is greater than the size of `buffer` starting at `offset`. - /// - /// Dereferencing the returned value to a `&[u8]` panics! + /// The returned value can be dereferenced to a `&mut [u8]`; dereferencing it to a `&[u8]` panics! /// /// This method is intended to have low performance costs. /// As such, the write is not immediately submitted, and instead enqueued /// internally to happen at the start of the next `submit()` call. + /// + /// This method fails if `size` is greater than the size of `buffer` starting at `offset`. #[must_use] pub fn write_buffer_with<'a>( &'a self, From 9b7435a7a707df6aa02a03e917c3c0c6ac4f9e84 Mon Sep 17 00:00:00 2001 From: Connor Fitzgerald Date: Tue, 28 Jun 2022 14:21:49 -0400 Subject: [PATCH 4/4] Fix copy span location --- wgpu-core/src/device/queue.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 0d831430d0..dc1a3b196a 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -326,8 +326,8 @@ impl Global { let (staging_buffer, staging_buffer_ptr) = device.prepare_staging_buffer(data_size)?; - profiling::scope!("copy"); unsafe { + profiling::scope!("copy"); ptr::copy_nonoverlapping(data.as_ptr(), staging_buffer_ptr, data.len()); staging_buffer.flush(&device.raw)?; };