Skip to content

Commit 5cb139a

Browse files
committed
.
1 parent 1bf2725 commit 5cb139a

4 files changed

Lines changed: 173 additions & 58 deletions

File tree

crates/processing_ffi/src/lib.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1912,11 +1912,6 @@ pub extern "C" fn processing_buffer_size(buf_id: u64) -> u64 {
19121912

19131913
/// Read buffer contents into a caller-provided buffer.
19141914
///
1915-
/// Returns the buffer's byte length. If the returned size is `<= out_len`, the
1916-
/// data has been written to `out`; otherwise `out` is left untouched and the
1917-
/// caller should reallocate and retry. Returns 0 if the buffer does not exist
1918-
/// or the GPU readback failed (in which case the error is set).
1919-
///
19201915
/// # Safety
19211916
/// - `out` must be valid for writes of `out_len` bytes (may be null if
19221917
/// `out_len == 0`, in which case this acts as a size query).

crates/processing_render/src/compute.rs

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::collections::BTreeSet;
1+
use std::collections::{BTreeSet, HashMap};
22

33
use bevy::asset::RenderAssetUsages;
44
use bevy::reflect::PartialReflect;
@@ -30,6 +30,10 @@ pub struct Buffer {
3030
pub handle: Handle<ShaderBuffer>,
3131
pub readback_buffer: WgpuBuffer,
3232
pub size: u64,
33+
/// True when `ShaderBuffer.data` reflects current GPU contents. Cleared
34+
/// when a pipeline that may write to the buffer runs; the next read or
35+
/// write must readback first.
36+
pub synced: bool,
3337
}
3438

3539
fn readback_buffer(device: &RenderDevice, size: u64) -> WgpuBuffer {
@@ -47,15 +51,16 @@ pub fn create_buffer(
4751
mut buffers: ResMut<Assets<ShaderBuffer>>,
4852
render_device: Res<RenderDevice>,
4953
) -> Entity {
50-
let handle = buffers.add(ShaderBuffer::with_size(
51-
size as usize,
54+
let handle = buffers.add(ShaderBuffer::new(
55+
&vec![0u8; size as usize],
5256
RenderAssetUsages::all(),
5357
));
5458
commands
5559
.spawn(Buffer {
5660
handle,
5761
readback_buffer: readback_buffer(&render_device, size),
5862
size,
63+
synced: true,
5964
})
6065
.id()
6166
}
@@ -73,30 +78,37 @@ pub fn create_buffer_with_data(
7378
handle,
7479
readback_buffer: readback_buffer(&render_device, size),
7580
size,
81+
synced: true,
7682
})
7783
.id()
7884
}
7985

80-
pub fn write_buffer_gpu(
86+
/// Mutate the CPU-side data of a `ShaderBuffer` in place. Fires
87+
/// `AssetEvent::Modified` so Bevy's render-asset extract uploads the new
88+
/// contents to the GPU at the next sync point.
89+
pub fn write_buffer_cpu(
8190
In((handle, offset, data)): In<(Handle<ShaderBuffer>, u64, Vec<u8>)>,
82-
gpu_buffers: Res<RenderAssets<GpuShaderBuffer>>,
83-
render_queue: Res<RenderQueue>,
91+
mut buffers: ResMut<Assets<ShaderBuffer>>,
8492
) -> Result<()> {
85-
let gpu_buffer = &gpu_buffers
86-
.get(&handle)
87-
.ok_or(ProcessingError::BufferNotFound)?
88-
.buffer;
89-
render_queue.write_buffer(gpu_buffer, offset, &data);
93+
let mut asset = buffers
94+
.get_mut(&handle)
95+
.ok_or(ProcessingError::BufferNotFound)?;
96+
let dst = asset
97+
.data
98+
.as_mut()
99+
.ok_or(ProcessingError::BufferNotFound)?;
100+
let start = offset as usize;
101+
let end = start + data.len();
102+
dst[start..end].copy_from_slice(&data);
90103
Ok(())
91104
}
92105

106+
/// Copy the GPU buffer back to CPU and return its full contents. Runs in the
107+
/// render world; the caller is responsible for writing the bytes back into
108+
/// `ShaderBuffer.data` via `Assets::get_mut_untracked` (avoiding spurious
109+
/// `AssetEvent::Modified`s, since this is a readback, not a stage-for-upload).
93110
pub fn read_buffer_gpu(
94-
In((handle, readback_buffer, src_offset, len)): In<(
95-
Handle<ShaderBuffer>,
96-
WgpuBuffer,
97-
u64,
98-
u64,
99-
)>,
111+
In((handle, readback_buffer, size)): In<(Handle<ShaderBuffer>, WgpuBuffer, u64)>,
100112
gpu_buffers: Res<RenderAssets<GpuShaderBuffer>>,
101113
render_device: Res<RenderDevice>,
102114
render_queue: Res<RenderQueue>,
@@ -107,10 +119,10 @@ pub fn read_buffer_gpu(
107119
.buffer;
108120

109121
let mut encoder = render_device.create_command_encoder(&CommandEncoderDescriptor::default());
110-
encoder.copy_buffer_to_buffer(gpu_buffer, src_offset, &readback_buffer, 0, len);
122+
encoder.copy_buffer_to_buffer(gpu_buffer, 0, &readback_buffer, 0, size);
111123
render_queue.submit(std::iter::once(encoder.finish()));
112124

113-
let buffer_slice = readback_buffer.slice(0..len);
125+
let buffer_slice = readback_buffer.slice(0..size);
114126
let (s, r) = crossbeam_channel::bounded(1);
115127
buffer_slice.map_async(MapMode::Read, move |result| {
116128
let _ = s.send(result);
@@ -122,10 +134,9 @@ pub fn read_buffer_gpu(
122134
.map_err(|e| ProcessingError::BufferMapError(format!("map channel closed: {e}")))?
123135
.map_err(|e| ProcessingError::BufferMapError(format!("map failed: {e}")))?;
124136

125-
let data = buffer_slice.get_mapped_range().to_vec();
137+
let bytes = buffer_slice.get_mapped_range().to_vec();
126138
readback_buffer.unmap();
127-
128-
Ok(data)
139+
Ok(bytes)
129140
}
130141

131142
pub fn destroy_buffer(In(entity): In<Entity>, mut commands: Commands) -> Result<()> {
@@ -139,6 +150,11 @@ pub struct Compute {
139150
pub entry_point: String,
140151
pub pipeline_id: CachedComputePipelineId,
141152
pub bind_group_layout_descriptors: Vec<(u32, BindGroupLayoutDescriptor)>,
153+
/// Buffer entities bound to this compute on a `read_write` storage param.
154+
/// Their CPU view of GPU data is invalidated after each dispatch so the
155+
/// next read/write does a readback. Read-only bindings don't need this
156+
/// since the dispatch can't mutate them.
157+
pub rw_buffers: HashMap<String, Entity>,
142158
}
143159

144160
fn queue_pipeline(
@@ -240,6 +256,7 @@ pub fn create_compute(app: &mut App, shader_entity: Entity) -> Result<Entity> {
240256
entry_point,
241257
pipeline_id,
242258
bind_group_layout_descriptors,
259+
rw_buffers: HashMap::new(),
243260
})
244261
.id());
245262
}
@@ -267,11 +284,16 @@ pub fn set_compute_property(
267284
.ok_or_else(|| ProcessingError::UnknownShaderProperty(name.clone()))?;
268285

269286
match (&value, category) {
270-
(ShaderValue::Buffer(buf_entity), ParameterCategory::Storage { .. }) => {
287+
(ShaderValue::Buffer(buf_entity), ParameterCategory::Storage { read_only }) => {
271288
let buffer = p_buffers
272289
.get(*buf_entity)
273290
.map_err(|_| ProcessingError::BufferNotFound)?;
274291
compute.shader.insert(&name, buffer.handle.clone());
292+
if read_only {
293+
compute.rw_buffers.remove(&name);
294+
} else {
295+
compute.rw_buffers.insert(name.clone(), *buf_entity);
296+
}
275297
Ok(())
276298
}
277299
(ShaderValue::Texture(img_entity), ParameterCategory::Texture)

crates/processing_render/src/graphics.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -453,6 +453,36 @@ pub fn flush(app: &mut App, entity: Entity) -> Result<()> {
453453
Ok(())
454454
}
455455

456+
/// Flush all graphics with pending commands and run a frame so any other
457+
/// pending GPU state (asset writes, etc.) is extracted and uploaded. Used as
458+
/// a sync boundary before operations like compute dispatch that may bind
459+
/// graphics targets or recently-mutated assets.
460+
pub fn flush_all(app: &mut App) {
461+
let mut to_flush = Vec::new();
462+
let world = app.world_mut();
463+
let mut q = world.query::<(Entity, &CommandBuffer, &Graphics)>();
464+
for (e, cb, _) in q.iter(world) {
465+
if !cb.commands.is_empty() {
466+
to_flush.push(e);
467+
}
468+
}
469+
470+
for e in &to_flush {
471+
if let Ok(mut em) = world.get_entity_mut(*e) {
472+
em.insert(Flush);
473+
}
474+
}
475+
476+
app.update();
477+
478+
let world = app.world_mut();
479+
for e in &to_flush {
480+
if let Ok(mut em) = world.get_entity_mut(*e) {
481+
em.remove::<Flush>();
482+
}
483+
}
484+
}
485+
456486
pub fn present(app: &mut App, entity: Entity) -> Result<()> {
457487
graphics_mut!(app, entity)
458488
.get_mut::<Camera>()

crates/processing_render/src/lib.rs

Lines changed: 98 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1676,6 +1676,49 @@ pub fn buffer_write_element(entity: Entity, offset: u64, data: Vec<u8>) -> error
16761676
buffer_write_range(entity, offset, data, false)
16771677
}
16781678

1679+
/// Ensure `ShaderBuffer.data` reflects current GPU contents by reading the
1680+
/// buffer back into the asset if it was invalidated by a prior dispatch.
1681+
/// Subsequent reads/writes can then operate on the in-asset bytes directly.
1682+
fn ensure_buffer_synced(app: &mut App, entity: Entity) -> error::Result<()> {
1683+
let (handle, readback_buffer, size, synced) = {
1684+
let buf = app
1685+
.world()
1686+
.get::<compute::Buffer>(entity)
1687+
.ok_or(error::ProcessingError::BufferNotFound)?;
1688+
(
1689+
buf.handle.clone(),
1690+
buf.readback_buffer.clone(),
1691+
buf.size,
1692+
buf.synced,
1693+
)
1694+
};
1695+
if synced {
1696+
return Ok(());
1697+
}
1698+
let bytes = app
1699+
.sub_app_mut(bevy::render::RenderApp)
1700+
.world_mut()
1701+
.run_system_cached_with(
1702+
compute::read_buffer_gpu,
1703+
(handle.clone(), readback_buffer, size),
1704+
)
1705+
.unwrap()?;
1706+
1707+
let world = app.world_mut();
1708+
let mut buffers = world.resource_mut::<Assets<bevy::render::storage::ShaderBuffer>>();
1709+
let asset = buffers
1710+
.get_mut_untracked(handle.id())
1711+
.ok_or(error::ProcessingError::BufferNotFound)?;
1712+
asset.data = Some(bytes);
1713+
drop(buffers);
1714+
1715+
let mut buf = world
1716+
.get_mut::<compute::Buffer>(entity)
1717+
.ok_or(error::ProcessingError::BufferNotFound)?;
1718+
buf.synced = true;
1719+
Ok(())
1720+
}
1721+
16791722
fn buffer_write_range(
16801723
entity: Entity,
16811724
offset: u64,
@@ -1706,9 +1749,9 @@ fn buffer_write_range(
17061749
data.len()
17071750
)));
17081751
}
1709-
app.sub_app_mut(bevy::render::RenderApp)
1710-
.world_mut()
1711-
.run_system_cached_with(compute::write_buffer_gpu, (handle, offset, data))
1752+
ensure_buffer_synced(app, entity)?;
1753+
app.world_mut()
1754+
.run_system_cached_with(compute::write_buffer_cpu, (handle, offset, data))
17121755
.unwrap()
17131756
})
17141757
}
@@ -1724,13 +1767,11 @@ pub fn buffer_read(entity: Entity) -> error::Result<Vec<u8>> {
17241767

17251768
fn buffer_read_range(entity: Entity, offset: u64, len: u64) -> error::Result<Vec<u8>> {
17261769
app_mut(|app| {
1727-
let (handle, readback_buffer, size) = {
1728-
let buf = app
1729-
.world()
1730-
.get::<compute::Buffer>(entity)
1731-
.ok_or(error::ProcessingError::BufferNotFound)?;
1732-
(buf.handle.clone(), buf.readback_buffer.clone(), buf.size)
1733-
};
1770+
let size = app
1771+
.world()
1772+
.get::<compute::Buffer>(entity)
1773+
.ok_or(error::ProcessingError::BufferNotFound)?
1774+
.size;
17341775
let end = offset.checked_add(len).ok_or_else(|| {
17351776
error::ProcessingError::InvalidArgument("offset + len overflow".to_string())
17361777
})?;
@@ -1739,13 +1780,21 @@ fn buffer_read_range(entity: Entity, offset: u64, len: u64) -> error::Result<Vec
17391780
"buffer read out of bounds: offset {offset} + len {len} > size {size}"
17401781
)));
17411782
}
1742-
app.sub_app_mut(bevy::render::RenderApp)
1743-
.world_mut()
1744-
.run_system_cached_with(
1745-
compute::read_buffer_gpu,
1746-
(handle, readback_buffer, offset, len),
1747-
)
1748-
.unwrap()
1783+
ensure_buffer_synced(app, entity)?;
1784+
let handle = app
1785+
.world()
1786+
.get::<compute::Buffer>(entity)
1787+
.ok_or(error::ProcessingError::BufferNotFound)?
1788+
.handle
1789+
.clone();
1790+
let buffers = app
1791+
.world()
1792+
.resource::<Assets<bevy::render::storage::ShaderBuffer>>();
1793+
let data = buffers
1794+
.get(&handle)
1795+
.and_then(|a| a.data.as_ref())
1796+
.ok_or(error::ProcessingError::BufferNotFound)?;
1797+
Ok(data[offset as usize..(offset + len) as usize].to_vec())
17491798
})
17501799
}
17511800

@@ -1775,22 +1824,41 @@ pub fn compute_set(
17751824

17761825
pub fn compute_dispatch(entity: Entity, x: u32, y: u32, z: u32) -> error::Result<()> {
17771826
app_mut(|app| {
1778-
let c = app
1779-
.world()
1780-
.get::<compute::Compute>(entity)
1781-
.ok_or(error::ProcessingError::ComputeNotFound)?;
1782-
let args = (
1783-
c.pipeline_id,
1784-
c.bind_group_layout_descriptors.clone(),
1785-
c.shader.clone(),
1786-
x,
1787-
y,
1788-
z,
1789-
);
1827+
// Flush any pending graphics work and let Bevy's render-asset extract
1828+
// upload any CPU-side buffer mutations to the GPU before the dispatch
1829+
// runs. This is the sync boundary for compute inputs.
1830+
crate::graphics::flush_all(app);
1831+
1832+
let (args, rw_entities) = {
1833+
let c = app
1834+
.world()
1835+
.get::<compute::Compute>(entity)
1836+
.ok_or(error::ProcessingError::ComputeNotFound)?;
1837+
let args = (
1838+
c.pipeline_id,
1839+
c.bind_group_layout_descriptors.clone(),
1840+
c.shader.clone(),
1841+
x,
1842+
y,
1843+
z,
1844+
);
1845+
let rw_entities: Vec<Entity> = c.rw_buffers.values().copied().collect();
1846+
(args, rw_entities)
1847+
};
17901848
app.sub_app_mut(bevy::render::RenderApp)
17911849
.world_mut()
17921850
.run_system_cached_with(compute::dispatch, args)
1793-
.unwrap()
1851+
.unwrap()?;
1852+
1853+
// Invalidate the CPU view of any buffer the dispatch could have
1854+
// written. The next read or write on those buffers will readback first.
1855+
let world = app.world_mut();
1856+
for e in rw_entities {
1857+
if let Some(mut buf) = world.get_mut::<compute::Buffer>(e) {
1858+
buf.synced = false;
1859+
}
1860+
}
1861+
Ok(())
17941862
})
17951863
}
17961864

0 commit comments

Comments
 (0)