Skip to content

Commit 85b366a

Browse files
authored
Cache MeshInputUniform indices in each RenderBin. (#17772)
Currently, we look up each `MeshInputUniform` index in a hash table that maps the main entity ID to the index every frame. This is inefficient, cache unfriendly, and unnecessary, as the `MeshInputUniform` index for an entity remains the same from frame to frame (even if the input uniform changes). This commit changes the `IndexSet` in the `RenderBin` to an `IndexMap` that maps the `MainEntity` to `MeshInputUniformIndex` (a new type that this patch adds for more type safety). On Caldera with parallel `batch_and_prepare_binned_render_phase`, this patch improves that function from 3.18 ms to 2.42 ms, a 31% speedup.
1 parent ce43395 commit 85b366a

File tree

13 files changed

+99
-61
lines changed

13 files changed

+99
-61
lines changed

crates/bevy_pbr/src/material.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,6 +1002,7 @@ pub fn queue_material_meshes<M: Material>(
10021002
batch_set_key,
10031003
bin_key,
10041004
(*render_entity, *visible_entity),
1005+
mesh_instance.current_uniform_index,
10051006
BinnedRenderPhaseType::mesh(
10061007
mesh_instance.should_batch(),
10071008
&gpu_preprocessing_support,
@@ -1025,6 +1026,7 @@ pub fn queue_material_meshes<M: Material>(
10251026
batch_set_key,
10261027
bin_key,
10271028
(*render_entity, *visible_entity),
1029+
mesh_instance.current_uniform_index,
10281030
BinnedRenderPhaseType::mesh(
10291031
mesh_instance.should_batch(),
10301032
&gpu_preprocessing_support,

crates/bevy_pbr/src/prepass/mod.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,6 +1145,7 @@ pub fn queue_prepass_material_meshes<M: Material>(
11451145
asset_id: mesh_instance.mesh_asset_id.into(),
11461146
},
11471147
(*render_entity, *visible_entity),
1148+
mesh_instance.current_uniform_index,
11481149
BinnedRenderPhaseType::mesh(
11491150
mesh_instance.should_batch(),
11501151
&gpu_preprocessing_support,
@@ -1169,6 +1170,7 @@ pub fn queue_prepass_material_meshes<M: Material>(
11691170
asset_id: mesh_instance.mesh_asset_id.into(),
11701171
},
11711172
(*render_entity, *visible_entity),
1173+
mesh_instance.current_uniform_index,
11721174
BinnedRenderPhaseType::mesh(
11731175
mesh_instance.should_batch(),
11741176
&gpu_preprocessing_support,
@@ -1195,6 +1197,7 @@ pub fn queue_prepass_material_meshes<M: Material>(
11951197
batch_set_key,
11961198
bin_key,
11971199
(*render_entity, *visible_entity),
1200+
mesh_instance.current_uniform_index,
11981201
BinnedRenderPhaseType::mesh(
11991202
mesh_instance.should_batch(),
12001203
&gpu_preprocessing_support,
@@ -1218,6 +1221,7 @@ pub fn queue_prepass_material_meshes<M: Material>(
12181221
batch_set_key,
12191222
bin_key,
12201223
(*render_entity, *visible_entity),
1224+
mesh_instance.current_uniform_index,
12211225
BinnedRenderPhaseType::mesh(
12221226
mesh_instance.should_batch(),
12231227
&gpu_preprocessing_support,

crates/bevy_pbr/src/render/light.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1946,6 +1946,7 @@ pub fn queue_shadows<M: Material>(
19461946
asset_id: mesh_instance.mesh_asset_id.into(),
19471947
},
19481948
(entity, main_entity),
1949+
mesh_instance.current_uniform_index,
19491950
BinnedRenderPhaseType::mesh(
19501951
mesh_instance.should_batch(),
19511952
&gpu_preprocessing_support,

crates/bevy_pbr/src/render/mesh.rs

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use bevy_render::{
3030
primitives::Aabb,
3131
render_asset::RenderAssets,
3232
render_phase::{
33-
BinnedRenderPhasePlugin, PhaseItem, PhaseItemExtraIndex, RenderCommand,
33+
BinnedRenderPhasePlugin, InputUniformIndex, PhaseItem, PhaseItemExtraIndex, RenderCommand,
3434
RenderCommandResult, SortedRenderPhasePlugin, TrackedRenderPass,
3535
},
3636
render_resource::*,
@@ -958,6 +958,7 @@ impl RenderMeshInstancesCpu {
958958
.map(|render_mesh_instance| RenderMeshQueueData {
959959
shared: &render_mesh_instance.shared,
960960
translation: render_mesh_instance.transforms.world_from_local.translation,
961+
current_uniform_index: InputUniformIndex::default(),
961962
})
962963
}
963964

@@ -981,6 +982,9 @@ impl RenderMeshInstancesGpu {
981982
.map(|render_mesh_instance| RenderMeshQueueData {
982983
shared: &render_mesh_instance.shared,
983984
translation: render_mesh_instance.translation,
985+
current_uniform_index: InputUniformIndex(
986+
render_mesh_instance.current_uniform_index.into(),
987+
),
984988
})
985989
}
986990

@@ -1281,6 +1285,9 @@ pub struct RenderMeshQueueData<'a> {
12811285
pub shared: &'a RenderMeshInstanceShared,
12821286
/// The translation of the mesh instance.
12831287
pub translation: Vec3,
1288+
/// The index of the [`MeshInputUniform`] in the GPU buffer for this mesh
1289+
/// instance.
1290+
pub current_uniform_index: InputUniformIndex,
12841291
}
12851292

12861293
/// A [`SystemSet`] that encompasses both [`extract_meshes_for_cpu_building`]
@@ -1945,15 +1952,15 @@ impl GetFullBatchData for MeshPipeline {
19451952
}
19461953

19471954
fn write_batch_indirect_parameters_metadata(
1948-
mesh_index: u32,
1955+
mesh_index: InputUniformIndex,
19491956
indexed: bool,
19501957
base_output_index: u32,
19511958
batch_set_index: Option<NonMaxU32>,
19521959
indirect_parameters_buffer: &mut IndirectParametersBuffers,
19531960
indirect_parameters_offset: u32,
19541961
) {
19551962
let indirect_parameters = IndirectParametersMetadata {
1956-
mesh_index,
1963+
mesh_index: *mesh_index,
19571964
base_output_index,
19581965
batch_set_index: match batch_set_index {
19591966
Some(batch_set_index) => u32::from(batch_set_index),

crates/bevy_render/src/batching/gpu_preprocessing.rs

Lines changed: 17 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,9 @@ use crate::{
2424
experimental::occlusion_culling::OcclusionCulling,
2525
render_phase::{
2626
BinnedPhaseItem, BinnedRenderPhaseBatch, BinnedRenderPhaseBatchSet,
27-
BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, PhaseItemBatchSetKey as _,
28-
PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase, UnbatchableBinnedEntityIndices,
29-
ViewBinnedRenderPhases, ViewSortedRenderPhases,
27+
BinnedRenderPhaseBatchSets, CachedRenderPipelinePhaseItem, InputUniformIndex,
28+
PhaseItemBatchSetKey as _, PhaseItemExtraIndex, SortedPhaseItem, SortedRenderPhase,
29+
UnbatchableBinnedEntityIndices, ViewBinnedRenderPhases, ViewSortedRenderPhases,
3030
},
3131
render_resource::{Buffer, BufferVec, GpuArrayBufferable, RawBufferVec, UninitBufferVec},
3232
renderer::{RenderAdapter, RenderDevice, RenderQueue},
@@ -1271,7 +1271,7 @@ pub fn batch_and_prepare_sorted_render_phase<I, GFBD>(
12711271
// Start a new batch.
12721272
if let Some(indirect_parameters_index) = indirect_parameters_index {
12731273
GFBD::write_batch_indirect_parameters_metadata(
1274-
current_input_index.into(),
1274+
InputUniformIndex(current_input_index.into()),
12751275
item_is_indexed,
12761276
output_index,
12771277
None,
@@ -1382,12 +1382,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
13821382
let first_output_index = data_buffer.len() as u32;
13831383
let mut batch: Option<BinnedRenderPhaseBatch> = None;
13841384

1385-
for main_entity in bin.entities() {
1386-
let Some(input_index) =
1387-
GFBD::get_binned_index(&system_param_item, *main_entity)
1388-
else {
1389-
continue;
1390-
};
1385+
for (&main_entity, &input_index) in bin.entities() {
13911386
let output_index = data_buffer.add() as u32;
13921387

13931388
match batch {
@@ -1397,7 +1392,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
13971392
work_item_buffer.push(
13981393
batch_set_key.indexed(),
13991394
PreprocessWorkItem {
1400-
input_index: input_index.into(),
1395+
input_index: *input_index,
14011396
output_index: first_output_index,
14021397
indirect_parameters_index: match batch.extra_index {
14031398
PhaseItemExtraIndex::IndirectParametersIndex {
@@ -1419,7 +1414,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
14191414
.get_next_batch_set_index(batch_set_key.indexed());
14201415

14211416
GFBD::write_batch_indirect_parameters_metadata(
1422-
input_index.into(),
1417+
input_index,
14231418
batch_set_key.indexed(),
14241419
output_index,
14251420
batch_set_index,
@@ -1429,13 +1424,13 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
14291424
work_item_buffer.push(
14301425
batch_set_key.indexed(),
14311426
PreprocessWorkItem {
1432-
input_index: input_index.into(),
1427+
input_index: *input_index,
14331428
output_index: first_output_index,
14341429
indirect_parameters_index,
14351430
},
14361431
);
14371432
batch = Some(BinnedRenderPhaseBatch {
1438-
representative_entity: (Entity::PLACEHOLDER, *main_entity),
1433+
representative_entity: (Entity::PLACEHOLDER, main_entity),
14391434
instance_range: output_index..output_index + 1,
14401435
extra_index: PhaseItemExtraIndex::maybe_indirect_parameters_index(
14411436
NonMaxU32::new(indirect_parameters_index),
@@ -1481,11 +1476,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
14811476
let first_output_index = data_buffer.len() as u32;
14821477

14831478
let mut batch: Option<BinnedRenderPhaseBatch> = None;
1484-
for main_entity in phase.batchable_mesh_values[key].entities() {
1485-
let Some(input_index) = GFBD::get_binned_index(&system_param_item, *main_entity)
1486-
else {
1487-
continue;
1488-
};
1479+
for (&main_entity, &input_index) in phase.batchable_mesh_values[key].entities() {
14891480
let output_index = data_buffer.add() as u32;
14901481

14911482
match batch {
@@ -1502,7 +1493,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
15021493
work_item_buffer.push(
15031494
key.0.indexed(),
15041495
PreprocessWorkItem {
1505-
input_index: input_index.into(),
1496+
input_index: *input_index,
15061497
output_index: if no_indirect_drawing {
15071498
output_index
15081499
} else {
@@ -1528,7 +1519,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
15281519
indirect_parameters_buffers.get_next_batch_set_index(key.0.indexed());
15291520

15301521
GFBD::write_batch_indirect_parameters_metadata(
1531-
input_index.into(),
1522+
input_index,
15321523
key.0.indexed(),
15331524
output_index,
15341525
batch_set_index,
@@ -1538,13 +1529,13 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
15381529
work_item_buffer.push(
15391530
key.0.indexed(),
15401531
PreprocessWorkItem {
1541-
input_index: input_index.into(),
1532+
input_index: *input_index,
15421533
output_index: first_output_index,
15431534
indirect_parameters_index,
15441535
},
15451536
);
15461537
batch = Some(BinnedRenderPhaseBatch {
1547-
representative_entity: (Entity::PLACEHOLDER, *main_entity),
1538+
representative_entity: (Entity::PLACEHOLDER, main_entity),
15481539
instance_range: output_index..output_index + 1,
15491540
extra_index: PhaseItemExtraIndex::IndirectParametersIndex {
15501541
range: indirect_parameters_index..(indirect_parameters_index + 1),
@@ -1558,13 +1549,13 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
15581549
work_item_buffer.push(
15591550
key.0.indexed(),
15601551
PreprocessWorkItem {
1561-
input_index: input_index.into(),
1552+
input_index: *input_index,
15621553
output_index,
15631554
indirect_parameters_index: 0,
15641555
},
15651556
);
15661557
batch = Some(BinnedRenderPhaseBatch {
1567-
representative_entity: (Entity::PLACEHOLDER, *main_entity),
1558+
representative_entity: (Entity::PLACEHOLDER, main_entity),
15681559
instance_range: output_index..output_index + 1,
15691560
extra_index: PhaseItemExtraIndex::None,
15701561
});
@@ -1627,7 +1618,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
16271618
// We're in indirect mode, so add an indirect parameters
16281619
// index.
16291620
GFBD::write_batch_indirect_parameters_metadata(
1630-
input_index.into(),
1621+
InputUniformIndex(input_index.into()),
16311622
key.0.indexed(),
16321623
output_index,
16331624
None,

crates/bevy_render/src/batching/mod.rs

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,17 @@ use bytemuck::Pod;
77
use nonmax::NonMaxU32;
88

99
use self::gpu_preprocessing::IndirectParametersBuffers;
10-
use crate::{render_phase::PhaseItemExtraIndex, sync_world::MainEntity};
1110
use crate::{
1211
render_phase::{
1312
BinnedPhaseItem, CachedRenderPipelinePhaseItem, DrawFunctionId, SortedPhaseItem,
1413
SortedRenderPhase, ViewBinnedRenderPhases,
1514
},
1615
render_resource::{CachedRenderPipelineId, GpuArrayBufferable},
1716
};
17+
use crate::{
18+
render_phase::{InputUniformIndex, PhaseItemExtraIndex},
19+
sync_world::MainEntity,
20+
};
1821

1922
pub mod gpu_preprocessing;
2023
pub mod no_gpu_preprocessing;
@@ -132,12 +135,17 @@ pub trait GetFullBatchData: GetBatchData {
132135
) -> Option<(NonMaxU32, Option<Self::CompareData>)>;
133136

134137
/// Returns the index of the [`GetFullBatchData::BufferInputData`] that the
135-
/// GPU preprocessing phase will use, for the binning path.
138+
/// GPU preprocessing phase will use.
136139
///
137140
/// We already inserted the [`GetFullBatchData::BufferInputData`] during the
138141
/// extraction phase before we got here, so this function shouldn't need to
139-
/// look up any render data. If CPU instance buffer building is in use, this
140-
/// function will never be called.
142+
/// look up any render data.
143+
///
144+
/// This function is currently only called for unbatchable entities when GPU
145+
/// instance buffer building is in use. For batchable entities, the uniform
146+
/// index is written during queuing (e.g. in `queue_material_meshes`). In
147+
/// the case of CPU instance buffer building, the CPU writes the uniforms,
148+
/// so there's no index to return.
141149
fn get_binned_index(
142150
param: &SystemParamItem<Self::Param>,
143151
query_item: MainEntity,
@@ -167,7 +175,7 @@ pub trait GetFullBatchData: GetBatchData {
167175
/// * `indirect_parameters_offset` is the index in that buffer at which to
168176
/// write the metadata.
169177
fn write_batch_indirect_parameters_metadata(
170-
mesh_index: u32,
178+
mesh_index: InputUniformIndex,
171179
indexed: bool,
172180
base_output_index: u32,
173181
batch_set_index: Option<NonMaxU32>,

crates/bevy_render/src/batching/no_gpu_preprocessing.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ pub fn batch_and_prepare_binned_render_phase<BPI, GFBD>(
110110

111111
for key in &phase.batchable_mesh_keys {
112112
let mut batch_set: SmallVec<[BinnedRenderPhaseBatch; 1]> = smallvec![];
113-
for main_entity in phase.batchable_mesh_values[key].entities() {
113+
for main_entity in phase.batchable_mesh_values[key].entities().keys() {
114114
let Some(buffer_data) =
115115
GFBD::get_binned_batch_data(&system_param_item, *main_entity)
116116
else {

0 commit comments

Comments
 (0)