Skip to content

Commit e29c44c

Browse files
committed
.
1 parent e8811df commit e29c44c

4 files changed

Lines changed: 173 additions & 58 deletions

File tree

crates/processing_ffi/src/lib.rs

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1912,11 +1912,6 @@ pub extern "C" fn processing_buffer_size(buf_id: u64) -> u64 {
19121912

19131913
/// Read buffer contents into a caller-provided buffer.
19141914
///
1915-
/// Returns the buffer's byte length. If the returned size is `<= out_len`, the
1916-
/// data has been written to `out`; otherwise `out` is left untouched and the
1917-
/// caller should reallocate and retry. Returns 0 if the buffer does not exist
1918-
/// or the GPU readback failed (in which case the error is set).
1919-
///
19201915
/// # Safety
19211916
/// - `out` must be valid for writes of `out_len` bytes (may be null if
19221917
/// `out_len == 0`, in which case this acts as a size query).

crates/processing_render/src/compute.rs

Lines changed: 45 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use std::collections::BTreeSet;
1+
use std::collections::{BTreeSet, HashMap};
22

33
use bevy::asset::RenderAssetUsages;
44
use bevy::reflect::PartialReflect;
@@ -30,6 +30,10 @@ pub struct Buffer {
3030
pub handle: Handle<ShaderBuffer>,
3131
pub readback_buffer: WgpuBuffer,
3232
pub size: u64,
33+
/// True when `ShaderBuffer.data` reflects current GPU contents. Cleared
34+
/// when a pipeline that may write to the buffer runs; the next read or
35+
/// write must readback first.
36+
pub synced: bool,
3337
}
3438

3539
fn readback_buffer(device: &RenderDevice, size: u64) -> WgpuBuffer {
@@ -47,15 +51,16 @@ pub fn create_buffer(
4751
mut buffers: ResMut<Assets<ShaderBuffer>>,
4852
render_device: Res<RenderDevice>,
4953
) -> Entity {
50-
let handle = buffers.add(ShaderBuffer::with_size(
51-
size as usize,
54+
let handle = buffers.add(ShaderBuffer::new(
55+
&vec![0u8; size as usize],
5256
RenderAssetUsages::all(),
5357
));
5458
commands
5559
.spawn(Buffer {
5660
handle,
5761
readback_buffer: readback_buffer(&render_device, size),
5862
size,
63+
synced: true,
5964
})
6065
.id()
6166
}
@@ -73,30 +78,37 @@ pub fn create_buffer_with_data(
7378
handle,
7479
readback_buffer: readback_buffer(&render_device, size),
7580
size,
81+
synced: true,
7682
})
7783
.id()
7884
}
7985

80-
pub fn write_buffer_gpu(
86+
/// Mutate the CPU-side data of a `ShaderBuffer` in place. Fires
87+
/// `AssetEvent::Modified` so Bevy's render-asset extract uploads the new
88+
/// contents to the GPU at the next sync point.
89+
pub fn write_buffer_cpu(
8190
In((handle, offset, data)): In<(Handle<ShaderBuffer>, u64, Vec<u8>)>,
82-
gpu_buffers: Res<RenderAssets<GpuShaderBuffer>>,
83-
render_queue: Res<RenderQueue>,
91+
mut buffers: ResMut<Assets<ShaderBuffer>>,
8492
) -> Result<()> {
85-
let gpu_buffer = &gpu_buffers
86-
.get(&handle)
87-
.ok_or(ProcessingError::BufferNotFound)?
88-
.buffer;
89-
render_queue.write_buffer(gpu_buffer, offset, &data);
93+
let mut asset = buffers
94+
.get_mut(&handle)
95+
.ok_or(ProcessingError::BufferNotFound)?;
96+
let dst = asset
97+
.data
98+
.as_mut()
99+
.ok_or(ProcessingError::BufferNotFound)?;
100+
let start = offset as usize;
101+
let end = start + data.len();
102+
dst[start..end].copy_from_slice(&data);
90103
Ok(())
91104
}
92105

106+
/// Copy the GPU buffer back to CPU and return its full contents. Runs in the
107+
/// render world; the caller is responsible for writing the bytes back into
108+
/// `ShaderBuffer.data` via `Assets::get_mut_untracked` (avoiding spurious
109+
/// `AssetEvent::Modified`s, since this is a readback, not a stage-for-upload).
93110
pub fn read_buffer_gpu(
94-
In((handle, readback_buffer, src_offset, len)): In<(
95-
Handle<ShaderBuffer>,
96-
WgpuBuffer,
97-
u64,
98-
u64,
99-
)>,
111+
In((handle, readback_buffer, size)): In<(Handle<ShaderBuffer>, WgpuBuffer, u64)>,
100112
gpu_buffers: Res<RenderAssets<GpuShaderBuffer>>,
101113
render_device: Res<RenderDevice>,
102114
render_queue: Res<RenderQueue>,
@@ -107,10 +119,10 @@ pub fn read_buffer_gpu(
107119
.buffer;
108120

109121
let mut encoder = render_device.create_command_encoder(&CommandEncoderDescriptor::default());
110-
encoder.copy_buffer_to_buffer(gpu_buffer, src_offset, &readback_buffer, 0, len);
122+
encoder.copy_buffer_to_buffer(gpu_buffer, 0, &readback_buffer, 0, size);
111123
render_queue.submit(std::iter::once(encoder.finish()));
112124

113-
let buffer_slice = readback_buffer.slice(0..len);
125+
let buffer_slice = readback_buffer.slice(0..size);
114126
let (s, r) = crossbeam_channel::bounded(1);
115127
buffer_slice.map_async(MapMode::Read, move |result| {
116128
let _ = s.send(result);
@@ -122,10 +134,9 @@ pub fn read_buffer_gpu(
122134
.map_err(|e| ProcessingError::BufferMapError(format!("map channel closed: {e}")))?
123135
.map_err(|e| ProcessingError::BufferMapError(format!("map failed: {e}")))?;
124136

125-
let data = buffer_slice.get_mapped_range().to_vec();
137+
let bytes = buffer_slice.get_mapped_range().to_vec();
126138
readback_buffer.unmap();
127-
128-
Ok(data)
139+
Ok(bytes)
129140
}
130141

131142
pub fn destroy_buffer(In(entity): In<Entity>, mut commands: Commands) -> Result<()> {
@@ -139,6 +150,11 @@ pub struct Compute {
139150
pub entry_point: String,
140151
pub pipeline_id: CachedComputePipelineId,
141152
pub bind_group_layout_descriptors: Vec<(u32, BindGroupLayoutDescriptor)>,
153+
/// Buffer entities bound to this compute on a `read_write` storage param.
154+
/// Their CPU view of GPU data is invalidated after each dispatch so the
155+
/// next read/write does a readback. Read-only bindings don't need this
156+
/// since the dispatch can't mutate them.
157+
pub rw_buffers: HashMap<String, Entity>,
142158
}
143159

144160
fn queue_pipeline(
@@ -240,6 +256,7 @@ pub fn create_compute(app: &mut App, shader_entity: Entity) -> Result<Entity> {
240256
entry_point,
241257
pipeline_id,
242258
bind_group_layout_descriptors,
259+
rw_buffers: HashMap::new(),
243260
})
244261
.id());
245262
}
@@ -267,11 +284,16 @@ pub fn set_compute_property(
267284
.ok_or_else(|| ProcessingError::UnknownShaderProperty(name.clone()))?;
268285

269286
match (&value, category) {
270-
(ShaderValue::Buffer(buf_entity), ParameterCategory::Storage { .. }) => {
287+
(ShaderValue::Buffer(buf_entity), ParameterCategory::Storage { read_only }) => {
271288
let buffer = p_buffers
272289
.get(*buf_entity)
273290
.map_err(|_| ProcessingError::BufferNotFound)?;
274291
compute.shader.insert(&name, buffer.handle.clone());
292+
if read_only {
293+
compute.rw_buffers.remove(&name);
294+
} else {
295+
compute.rw_buffers.insert(name.clone(), *buf_entity);
296+
}
275297
Ok(())
276298
}
277299
(ShaderValue::Texture(img_entity), ParameterCategory::Texture)

crates/processing_render/src/graphics.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -461,6 +461,36 @@ pub fn flush(app: &mut App, entity: Entity) -> Result<()> {
461461
Ok(())
462462
}
463463

464+
/// Flush all graphics with pending commands and run a frame so any other
465+
/// pending GPU state (asset writes, etc.) is extracted and uploaded. Used as
466+
/// a sync boundary before operations like compute dispatch that may bind
467+
/// graphics targets or recently-mutated assets.
468+
pub fn flush_all(app: &mut App) {
469+
let mut to_flush = Vec::new();
470+
let world = app.world_mut();
471+
let mut q = world.query::<(Entity, &CommandBuffer, &Graphics)>();
472+
for (e, cb, _) in q.iter(world) {
473+
if !cb.commands.is_empty() {
474+
to_flush.push(e);
475+
}
476+
}
477+
478+
for e in &to_flush {
479+
if let Ok(mut em) = world.get_entity_mut(*e) {
480+
em.insert(Flush);
481+
}
482+
}
483+
484+
app.update();
485+
486+
let world = app.world_mut();
487+
for e in &to_flush {
488+
if let Ok(mut em) = world.get_entity_mut(*e) {
489+
em.remove::<Flush>();
490+
}
491+
}
492+
}
493+
464494
pub fn present(app: &mut App, entity: Entity) -> Result<()> {
465495
graphics_mut!(app, entity)
466496
.get_mut::<Camera>()

crates/processing_render/src/lib.rs

Lines changed: 98 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1522,6 +1522,49 @@ pub fn buffer_write_element(entity: Entity, offset: u64, data: Vec<u8>) -> error
15221522
buffer_write_range(entity, offset, data, false)
15231523
}
15241524

1525+
/// Ensure `ShaderBuffer.data` reflects current GPU contents by reading the
1526+
/// buffer back into the asset if it was invalidated by a prior dispatch.
1527+
/// Subsequent reads/writes can then operate on the in-asset bytes directly.
1528+
fn ensure_buffer_synced(app: &mut App, entity: Entity) -> error::Result<()> {
1529+
let (handle, readback_buffer, size, synced) = {
1530+
let buf = app
1531+
.world()
1532+
.get::<compute::Buffer>(entity)
1533+
.ok_or(error::ProcessingError::BufferNotFound)?;
1534+
(
1535+
buf.handle.clone(),
1536+
buf.readback_buffer.clone(),
1537+
buf.size,
1538+
buf.synced,
1539+
)
1540+
};
1541+
if synced {
1542+
return Ok(());
1543+
}
1544+
let bytes = app
1545+
.sub_app_mut(bevy::render::RenderApp)
1546+
.world_mut()
1547+
.run_system_cached_with(
1548+
compute::read_buffer_gpu,
1549+
(handle.clone(), readback_buffer, size),
1550+
)
1551+
.unwrap()?;
1552+
1553+
let world = app.world_mut();
1554+
let mut buffers = world.resource_mut::<Assets<bevy::render::storage::ShaderBuffer>>();
1555+
let asset = buffers
1556+
.get_mut_untracked(handle.id())
1557+
.ok_or(error::ProcessingError::BufferNotFound)?;
1558+
asset.data = Some(bytes);
1559+
drop(buffers);
1560+
1561+
let mut buf = world
1562+
.get_mut::<compute::Buffer>(entity)
1563+
.ok_or(error::ProcessingError::BufferNotFound)?;
1564+
buf.synced = true;
1565+
Ok(())
1566+
}
1567+
15251568
fn buffer_write_range(
15261569
entity: Entity,
15271570
offset: u64,
@@ -1552,9 +1595,9 @@ fn buffer_write_range(
15521595
data.len()
15531596
)));
15541597
}
1555-
app.sub_app_mut(bevy::render::RenderApp)
1556-
.world_mut()
1557-
.run_system_cached_with(compute::write_buffer_gpu, (handle, offset, data))
1598+
ensure_buffer_synced(app, entity)?;
1599+
app.world_mut()
1600+
.run_system_cached_with(compute::write_buffer_cpu, (handle, offset, data))
15581601
.unwrap()
15591602
})
15601603
}
@@ -1570,13 +1613,11 @@ pub fn buffer_read(entity: Entity) -> error::Result<Vec<u8>> {
15701613

15711614
fn buffer_read_range(entity: Entity, offset: u64, len: u64) -> error::Result<Vec<u8>> {
15721615
app_mut(|app| {
1573-
let (handle, readback_buffer, size) = {
1574-
let buf = app
1575-
.world()
1576-
.get::<compute::Buffer>(entity)
1577-
.ok_or(error::ProcessingError::BufferNotFound)?;
1578-
(buf.handle.clone(), buf.readback_buffer.clone(), buf.size)
1579-
};
1616+
let size = app
1617+
.world()
1618+
.get::<compute::Buffer>(entity)
1619+
.ok_or(error::ProcessingError::BufferNotFound)?
1620+
.size;
15801621
let end = offset.checked_add(len).ok_or_else(|| {
15811622
error::ProcessingError::InvalidArgument("offset + len overflow".to_string())
15821623
})?;
@@ -1585,13 +1626,21 @@ fn buffer_read_range(entity: Entity, offset: u64, len: u64) -> error::Result<Vec
15851626
"buffer read out of bounds: offset {offset} + len {len} > size {size}"
15861627
)));
15871628
}
1588-
app.sub_app_mut(bevy::render::RenderApp)
1589-
.world_mut()
1590-
.run_system_cached_with(
1591-
compute::read_buffer_gpu,
1592-
(handle, readback_buffer, offset, len),
1593-
)
1594-
.unwrap()
1629+
ensure_buffer_synced(app, entity)?;
1630+
let handle = app
1631+
.world()
1632+
.get::<compute::Buffer>(entity)
1633+
.ok_or(error::ProcessingError::BufferNotFound)?
1634+
.handle
1635+
.clone();
1636+
let buffers = app
1637+
.world()
1638+
.resource::<Assets<bevy::render::storage::ShaderBuffer>>();
1639+
let data = buffers
1640+
.get(&handle)
1641+
.and_then(|a| a.data.as_ref())
1642+
.ok_or(error::ProcessingError::BufferNotFound)?;
1643+
Ok(data[offset as usize..(offset + len) as usize].to_vec())
15951644
})
15961645
}
15971646

@@ -1621,22 +1670,41 @@ pub fn compute_set(
16211670

16221671
pub fn compute_dispatch(entity: Entity, x: u32, y: u32, z: u32) -> error::Result<()> {
16231672
app_mut(|app| {
1624-
let c = app
1625-
.world()
1626-
.get::<compute::Compute>(entity)
1627-
.ok_or(error::ProcessingError::ComputeNotFound)?;
1628-
let args = (
1629-
c.pipeline_id,
1630-
c.bind_group_layout_descriptors.clone(),
1631-
c.shader.clone(),
1632-
x,
1633-
y,
1634-
z,
1635-
);
1673+
// Flush any pending graphics work and let Bevy's render-asset extract
1674+
// upload any CPU-side buffer mutations to the GPU before the dispatch
1675+
// runs. This is the sync boundary for compute inputs.
1676+
crate::graphics::flush_all(app);
1677+
1678+
let (args, rw_entities) = {
1679+
let c = app
1680+
.world()
1681+
.get::<compute::Compute>(entity)
1682+
.ok_or(error::ProcessingError::ComputeNotFound)?;
1683+
let args = (
1684+
c.pipeline_id,
1685+
c.bind_group_layout_descriptors.clone(),
1686+
c.shader.clone(),
1687+
x,
1688+
y,
1689+
z,
1690+
);
1691+
let rw_entities: Vec<Entity> = c.rw_buffers.values().copied().collect();
1692+
(args, rw_entities)
1693+
};
16361694
app.sub_app_mut(bevy::render::RenderApp)
16371695
.world_mut()
16381696
.run_system_cached_with(compute::dispatch, args)
1639-
.unwrap()
1697+
.unwrap()?;
1698+
1699+
// Invalidate the CPU view of any buffer the dispatch could have
1700+
// written. The next read or write on those buffers will readback first.
1701+
let world = app.world_mut();
1702+
for e in rw_entities {
1703+
if let Some(mut buf) = world.get_mut::<compute::Buffer>(e) {
1704+
buf.synced = false;
1705+
}
1706+
}
1707+
Ok(())
16401708
})
16411709
}
16421710

0 commit comments

Comments
 (0)