Skip to content

Commit 4a91c7f

Browse files
Ensure the limit for storage binding size honors alignment (#8719)
1 parent 6c73d46 commit 4a91c7f

File tree

11 files changed

+135
-96
lines changed

11 files changed

+135
-96
lines changed

cts_runner/test.lst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ webgpu:api,validation,buffer,create:*
3434
webgpu:api,validation,buffer,destroy:*
3535
fails-if(dx12) webgpu:api,validation,capability_checks,limits,maxBindGroups:setBindGroup,*
3636
webgpu:api,validation,createBindGroup:buffer,effective_buffer_binding_size:*
37+
webgpu:api,validation,createBindGroup:buffer,resource_binding_size:*
3738
// Fails because we coerce a size of 0 in `GPUDevice.createBindGroup(…)` to `buffer.size - offset`.
3839
// FAIL webgpu:api,validation,createBindGroup:buffer_offset_and_size_for_bind_groups_match:*
3940
webgpu:api,validation,encoding,beginComputePass:*

wgpu-core/src/binding_model.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ pub enum CreateBindGroupError {
168168
#[error("Binding declared as a single item, but bind group is using it as an array")]
169169
SingleBindingExpected,
170170
#[error("Effective buffer binding size {size} for storage buffers is expected to align to {alignment}, but size is {size}")]
171-
UnalignedEffectiveBufferBindingSizeForStorage { alignment: u8, size: u64 },
171+
UnalignedEffectiveBufferBindingSizeForStorage { alignment: u32, size: u64 },
172172
#[error("Buffer offset {0} does not respect device's requested `{1}` limit {2}")]
173173
UnalignedBufferOffset(wgt::BufferAddress, &'static str, u32),
174174
#[error(

wgpu-core/src/device/resource.rs

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2796,16 +2796,13 @@ impl Device {
27962796

27972797
let (bb, bind_size) = buffer.binding(bb.offset, bb.size, snatch_guard)?;
27982798

2799-
if matches!(binding_ty, wgt::BufferBindingType::Storage { .. }) {
2800-
let storage_buf_size_alignment = 4;
2801-
2802-
let aligned = bind_size % u64::from(storage_buf_size_alignment) == 0;
2803-
if !aligned {
2804-
return Err(Error::UnalignedEffectiveBufferBindingSizeForStorage {
2805-
alignment: storage_buf_size_alignment,
2806-
size: bind_size,
2807-
});
2808-
}
2799+
if matches!(binding_ty, wgt::BufferBindingType::Storage { .. })
2800+
&& bind_size % u64::from(wgt::STORAGE_BINDING_SIZE_ALIGNMENT) != 0
2801+
{
2802+
return Err(Error::UnalignedEffectiveBufferBindingSizeForStorage {
2803+
alignment: wgt::STORAGE_BINDING_SIZE_ALIGNMENT,
2804+
size: bind_size,
2805+
});
28092806
}
28102807

28112808
let bind_end = bb.offset + bind_size;

wgpu-hal/build.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ fn main() {
2323
) },
2424
metal: { all(target_vendor = "apple", feature = "metal") },
2525
vulkan: { all(not(target_arch = "wasm32"), feature = "vulkan") },
26+
any_backend: { any(dx12, metal, vulkan, gles) },
2627
// ⚠️ Keep in sync with target.cfg() definition in Cargo.toml and cfg_alias in `wgpu` crate ⚠️
2728
static_dxc: { all(target_os = "windows", feature = "static-dxc", not(target_arch = "aarch64"), target_env = "msvc") },
2829
supports_64bit_atomics: { target_has_atomic = "64" },

wgpu-hal/src/auxil/mod.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -126,3 +126,21 @@ impl crate::TextureCopy {
126126
self.size = self.size.min(&max_src_size).min(&max_dst_size);
127127
}
128128
}
129+
130+
/// Clamp the limits in `limits` to honor any HAL-imposed maximums.
131+
///
132+
/// Limits that do not have a HAL-defined maximum are left unchanged.
133+
#[cfg_attr(not(any_backend), allow(dead_code))]
134+
pub(crate) fn apply_hal_limits(mut limits: wgt::Limits) -> wgt::Limits {
135+
// The Metal backend wants to have its own consistent view of the limits, so
136+
// it may duplicate some of these limits.
137+
limits.max_bind_groups = limits.max_bind_groups.min(crate::MAX_BIND_GROUPS as u32);
138+
limits.max_storage_buffer_binding_size &= !(wgt::STORAGE_BINDING_SIZE_ALIGNMENT - 1);
139+
limits.max_vertex_buffers = limits
140+
.max_vertex_buffers
141+
.min(crate::MAX_VERTEX_BUFFERS as u32);
142+
limits.max_color_attachments = limits
143+
.max_color_attachments
144+
.min(crate::MAX_COLOR_ATTACHMENTS as u32);
145+
limits
146+
}

wgpu-hal/src/dx12/adapter.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -695,7 +695,7 @@ impl super::Adapter {
695695
info,
696696
features,
697697
capabilities: crate::Capabilities {
698-
limits: wgt::Limits {
698+
limits: auxil::apply_hal_limits(wgt::Limits {
699699
max_texture_dimension_1d: Direct3D12::D3D12_REQ_TEXTURE1D_U_DIMENSION,
700700
max_texture_dimension_2d: Direct3D12::D3D12_REQ_TEXTURE2D_U_OR_V_DIMENSION
701701
.min(Direct3D12::D3D12_REQ_TEXTURECUBE_DIMENSION),
@@ -724,8 +724,7 @@ impl super::Adapter {
724724
max_uniform_buffer_binding_size:
725725
Direct3D12::D3D12_REQ_CONSTANT_BUFFER_ELEMENT_COUNT * 16,
726726
max_storage_buffer_binding_size: auxil::MAX_I32_BINDING_SIZE,
727-
max_vertex_buffers: Direct3D12::D3D12_VS_INPUT_REGISTER_COUNT
728-
.min(crate::MAX_VERTEX_BUFFERS as u32),
727+
max_vertex_buffers: Direct3D12::D3D12_VS_INPUT_REGISTER_COUNT,
729728
max_vertex_attributes: Direct3D12::D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT,
730729
max_vertex_buffer_array_stride: Direct3D12::D3D12_SO_BUFFER_MAX_STRIDE_IN_BYTES,
731730
// The immediates are part of the root signature which
@@ -806,7 +805,7 @@ impl super::Adapter {
806805
},
807806

808807
max_multiview_view_count,
809-
},
808+
}),
810809
alignments: crate::Alignments {
811810
buffer_copy_offset: wgt::BufferSize::new(
812811
Direct3D12::D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT as u64,

wgpu-hal/src/gles/adapter.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -680,15 +680,14 @@ impl super::Adapter {
680680

681681
let max_color_attachments = unsafe {
682682
gl.get_parameter_i32(glow::MAX_COLOR_ATTACHMENTS)
683-
.min(gl.get_parameter_i32(glow::MAX_DRAW_BUFFERS))
684-
.min(crate::MAX_COLOR_ATTACHMENTS as i32) as u32
683+
.min(gl.get_parameter_i32(glow::MAX_DRAW_BUFFERS)) as u32
685684
};
686685

687686
// 16 bytes per sample is the maximum size of a color attachment.
688687
let max_color_attachment_bytes_per_sample =
689688
max_color_attachments * wgt::TextureFormat::MAX_TARGET_PIXEL_BYTE_COST;
690689

691-
let limits = wgt::Limits {
690+
let limits = crate::auxil::apply_hal_limits(wgt::Limits {
692691
max_texture_dimension_1d: max_texture_size,
693692
max_texture_dimension_2d: max_texture_size,
694693
max_texture_dimension_3d: max_texture_3d_size,
@@ -720,8 +719,7 @@ impl super::Adapter {
720719
(unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_ATTRIB_BINDINGS) } as u32)
721720
} else {
722721
16 // should this be different?
723-
}
724-
.min(crate::MAX_VERTEX_BUFFERS as u32),
722+
},
725723
max_vertex_attributes: (unsafe { gl.get_parameter_i32(glow::MAX_VERTEX_ATTRIBS) }
726724
as u32)
727725
.min(super::MAX_VERTEX_ATTRIBUTES as u32),
@@ -815,7 +813,7 @@ impl super::Adapter {
815813
max_acceleration_structures_per_shader_stage: 0,
816814

817815
max_multiview_view_count: 0,
818-
};
816+
});
819817

820818
let mut workarounds = super::Workarounds::empty();
821819

wgpu-hal/src/metal/adapter.rs

Lines changed: 84 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -1136,72 +1136,92 @@ impl super::PrivateCapabilities {
11361136
downlevel
11371137
.flags
11381138
.set(wgt::DownlevelFlags::ANISOTROPIC_FILTERING, true);
1139+
11391140
let base = wgt::Limits::default();
1140-
crate::Capabilities {
1141-
limits: wgt::Limits {
1142-
max_texture_dimension_1d: self.max_texture_size as u32,
1143-
max_texture_dimension_2d: self.max_texture_size as u32,
1144-
max_texture_dimension_3d: self.max_texture_3d_size as u32,
1145-
max_texture_array_layers: self.max_texture_layers as u32,
1146-
max_bind_groups: 8,
1147-
max_bindings_per_bind_group: 65535,
1148-
max_dynamic_uniform_buffers_per_pipeline_layout: base
1149-
.max_dynamic_uniform_buffers_per_pipeline_layout,
1150-
max_dynamic_storage_buffers_per_pipeline_layout: base
1151-
.max_dynamic_storage_buffers_per_pipeline_layout,
1152-
max_sampled_textures_per_shader_stage: self.max_textures_per_stage,
1153-
max_samplers_per_shader_stage: self.max_samplers_per_stage,
1154-
max_storage_buffers_per_shader_stage: self.max_buffers_per_stage,
1155-
max_storage_textures_per_shader_stage: self.max_textures_per_stage,
1156-
max_uniform_buffers_per_shader_stage: self.max_buffers_per_stage,
1157-
max_binding_array_elements_per_shader_stage: self.max_binding_array_elements,
1158-
max_binding_array_sampler_elements_per_shader_stage: self
1159-
.max_sampler_binding_array_elements,
1160-
max_uniform_buffer_binding_size: self.max_buffer_size.min(!0u32 as u64) as u32,
1161-
max_storage_buffer_binding_size: self.max_buffer_size.min(!0u32 as u64) as u32,
1162-
max_vertex_buffers: self.max_vertex_buffers,
1163-
max_vertex_attributes: 31,
1164-
max_vertex_buffer_array_stride: base.max_vertex_buffer_array_stride,
1165-
max_immediate_size: 0x1000,
1166-
min_uniform_buffer_offset_alignment: self.buffer_alignment as u32,
1167-
min_storage_buffer_offset_alignment: self.buffer_alignment as u32,
1168-
max_inter_stage_shader_components: self.max_varying_components,
1169-
max_color_attachments: (self.max_color_render_targets as u32)
1170-
.min(crate::MAX_COLOR_ATTACHMENTS as u32),
1171-
max_color_attachment_bytes_per_sample: self.max_color_attachment_bytes_per_sample
1172-
as u32,
1173-
max_compute_workgroup_storage_size: self.max_total_threadgroup_memory,
1174-
max_compute_invocations_per_workgroup: self.max_threads_per_group,
1175-
max_compute_workgroup_size_x: self.max_threads_per_group,
1176-
max_compute_workgroup_size_y: self.max_threads_per_group,
1177-
max_compute_workgroup_size_z: self.max_threads_per_group,
1178-
max_compute_workgroups_per_dimension: 0xFFFF,
1179-
max_buffer_size: self.max_buffer_size,
1180-
max_non_sampler_bindings: u32::MAX,
1181-
1182-
// See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf, Maximum threadgroups per mesh shader grid
1183-
max_task_workgroup_total_count: 1024,
1184-
max_task_workgroups_per_dimension: 1024,
1185-
max_mesh_multiview_view_count: 0,
1186-
max_mesh_output_layers: self.max_texture_layers as u32,
1187-
1188-
max_blas_primitive_count: 0, // When added: 2^28 from https://developer.apple.com/documentation/metal/mtlaccelerationstructureusage/extendedlimits
1189-
max_blas_geometry_count: 0, // When added: 2^24
1190-
max_tlas_instance_count: 0, // When added: 2^24
1191-
// Unsure what this will be when added: acceleration structures count as a buffer so
1192-
// it may be worth using argument buffers for this all acceleration structures, then
1193-
// there will be no limit.
1194-
// From 2.17.7 in https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
1195-
// > [Acceleration structures] are opaque objects that can be bound directly using
1196-
// buffer binding points or via argument buffers
1197-
max_acceleration_structures_per_shader_stage: 0,
1198-
1199-
max_multiview_view_count: if self.supported_vertex_amplification_factor > 1 {
1200-
self.supported_vertex_amplification_factor
1201-
} else {
1202-
0
1203-
},
1141+
1142+
// Be careful adjusting limits here. The `AdapterShared` stores the
1143+
// original `PrivateCapabilities`, so code could accidentally use
1144+
// the wrong value.
1145+
1146+
let limits = wgt::Limits {
1147+
max_texture_dimension_1d: self.max_texture_size as u32,
1148+
max_texture_dimension_2d: self.max_texture_size as u32,
1149+
max_texture_dimension_3d: self.max_texture_3d_size as u32,
1150+
max_texture_array_layers: self.max_texture_layers as u32,
1151+
max_bind_groups: 8,
1152+
max_bindings_per_bind_group: 65535,
1153+
max_dynamic_uniform_buffers_per_pipeline_layout: base
1154+
.max_dynamic_uniform_buffers_per_pipeline_layout,
1155+
max_dynamic_storage_buffers_per_pipeline_layout: base
1156+
.max_dynamic_storage_buffers_per_pipeline_layout,
1157+
max_sampled_textures_per_shader_stage: self.max_textures_per_stage,
1158+
max_samplers_per_shader_stage: self.max_samplers_per_stage,
1159+
max_storage_buffers_per_shader_stage: self.max_buffers_per_stage,
1160+
max_storage_textures_per_shader_stage: self.max_textures_per_stage,
1161+
max_uniform_buffers_per_shader_stage: self.max_buffers_per_stage,
1162+
max_binding_array_elements_per_shader_stage: self.max_binding_array_elements,
1163+
max_binding_array_sampler_elements_per_shader_stage: self
1164+
.max_sampler_binding_array_elements,
1165+
// Note: any adjustment here will not be reflected in the stored `PrivateCapabilities`.
1166+
max_uniform_buffer_binding_size: self.max_buffer_size.min(!0u32 as u64) as u32,
1167+
max_storage_buffer_binding_size: self.max_buffer_size.min(!0u32 as u64) as u32
1168+
& !(wgt::STORAGE_BINDING_SIZE_ALIGNMENT - 1),
1169+
max_vertex_buffers: self.max_vertex_buffers,
1170+
max_vertex_attributes: 31,
1171+
max_vertex_buffer_array_stride: base.max_vertex_buffer_array_stride,
1172+
max_immediate_size: 0x1000,
1173+
min_uniform_buffer_offset_alignment: self.buffer_alignment as u32,
1174+
min_storage_buffer_offset_alignment: self.buffer_alignment as u32,
1175+
max_inter_stage_shader_components: self.max_varying_components,
1176+
max_color_attachments: self.max_color_render_targets as u32,
1177+
max_color_attachment_bytes_per_sample: self.max_color_attachment_bytes_per_sample
1178+
as u32,
1179+
max_compute_workgroup_storage_size: self.max_total_threadgroup_memory,
1180+
max_compute_invocations_per_workgroup: self.max_threads_per_group,
1181+
max_compute_workgroup_size_x: self.max_threads_per_group,
1182+
max_compute_workgroup_size_y: self.max_threads_per_group,
1183+
max_compute_workgroup_size_z: self.max_threads_per_group,
1184+
max_compute_workgroups_per_dimension: 0xFFFF,
1185+
max_buffer_size: self.max_buffer_size,
1186+
max_non_sampler_bindings: u32::MAX,
1187+
1188+
// See https://developer.apple.com/metal/Metal-Feature-Set-Tables.pdf, Maximum threadgroups per mesh shader grid
1189+
max_task_workgroup_total_count: 1024,
1190+
max_task_workgroups_per_dimension: 1024,
1191+
max_mesh_multiview_view_count: 0,
1192+
max_mesh_output_layers: self.max_texture_layers as u32,
1193+
1194+
max_blas_primitive_count: 0, // When added: 2^28 from https://developer.apple.com/documentation/metal/mtlaccelerationstructureusage/extendedlimits
1195+
max_blas_geometry_count: 0, // When added: 2^24
1196+
max_tlas_instance_count: 0, // When added: 2^24
1197+
// Unsure what this will be when added: acceleration structures count as a buffer so
1198+
// it may be worth using argument buffers for this all acceleration structures, then
1199+
// there will be no limit.
1200+
// From 2.17.7 in https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
1201+
// > [Acceleration structures] are opaque objects that can be bound directly using
1202+
// buffer binding points or via argument buffers
1203+
max_acceleration_structures_per_shader_stage: 0,
1204+
1205+
max_multiview_view_count: if self.supported_vertex_amplification_factor > 1 {
1206+
self.supported_vertex_amplification_factor
1207+
} else {
1208+
0
12041209
},
1210+
};
1211+
1212+
// Since a bunch of the limits are duplicated between `Limits` and
1213+
// `PrivateCapabilities`, reducing the limits at this point could make
1214+
// things inconsistent and lead to confusion. Make sure that doesn't
1215+
// happen.
1216+
debug_assert!(
1217+
crate::auxil::apply_hal_limits(limits.clone()) == limits,
1218+
"Limits were modified by apply_hal_limits\nOriginal:\n{:#?}\nModified:\n{:#?}",
1219+
limits,
1220+
crate::auxil::apply_hal_limits(limits.clone())
1221+
);
1222+
1223+
crate::Capabilities {
1224+
limits,
12051225
alignments: crate::Alignments {
12061226
buffer_copy_offset: wgt::BufferSize::new(self.buffer_alignment).unwrap(),
12071227
buffer_copy_pitch: wgt::BufferSize::new(4).unwrap(),

wgpu-hal/src/metal/mod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,8 @@ bitflags!(
202202
}
203203
);
204204

205+
// TODO(https://github.com/gfx-rs/wgpu/issues/8715): Eliminate duplication with
206+
// `wgt::Limits`. Keeping multiple sets of limits creates a risk of confusion.
205207
#[allow(dead_code)]
206208
#[derive(Clone, Debug)]
207209
struct PrivateCapabilities {
@@ -277,6 +279,11 @@ struct PrivateCapabilities {
277279
max_binding_array_elements: ResourceIndex,
278280
max_sampler_binding_array_elements: ResourceIndex,
279281
buffer_alignment: u64,
282+
283+
/// Platform-reported maximum buffer size
284+
///
285+
/// This value is clamped to `u32::MAX` for `wgt::Limits`, so you probably
286+
/// shouldn't be looking at this copy.
280287
max_buffer_size: u64,
281288
max_texture_size: u64,
282289
max_texture_3d_size: u64,

wgpu-hal/src/vulkan/adapter.rs

Lines changed: 5 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1328,14 +1328,12 @@ impl PhysicalDeviceProperties {
13281328
.map(|a| a.max_multiview_view_count.min(32))
13291329
.unwrap_or(0);
13301330

1331-
wgt::Limits {
1331+
crate::auxil::apply_hal_limits(wgt::Limits {
13321332
max_texture_dimension_1d: limits.max_image_dimension1_d,
13331333
max_texture_dimension_2d: limits.max_image_dimension2_d,
13341334
max_texture_dimension_3d: limits.max_image_dimension3_d,
13351335
max_texture_array_layers: limits.max_image_array_layers,
1336-
max_bind_groups: limits
1337-
.max_bound_descriptor_sets
1338-
.min(crate::MAX_BIND_GROUPS as u32),
1336+
max_bind_groups: limits.max_bound_descriptor_sets,
13391337
max_bindings_per_bind_group: wgt::Limits::default().max_bindings_per_bind_group,
13401338
max_dynamic_uniform_buffers_per_pipeline_layout: limits
13411339
.max_descriptor_set_uniform_buffers_dynamic,
@@ -1354,9 +1352,7 @@ impl PhysicalDeviceProperties {
13541352
max_storage_buffer_binding_size: limits
13551353
.max_storage_buffer_range
13561354
.min(crate::auxil::MAX_I32_BINDING_SIZE),
1357-
max_vertex_buffers: limits
1358-
.max_vertex_input_bindings
1359-
.min(crate::MAX_VERTEX_BUFFERS as u32),
1355+
max_vertex_buffers: limits.max_vertex_input_bindings,
13601356
max_vertex_attributes: limits.max_vertex_input_attributes,
13611357
max_vertex_buffer_array_stride: limits.max_vertex_input_binding_stride,
13621358
max_immediate_size: limits.max_push_constants_size,
@@ -1365,9 +1361,7 @@ impl PhysicalDeviceProperties {
13651361
max_inter_stage_shader_components: limits
13661362
.max_vertex_output_components
13671363
.min(limits.max_fragment_input_components),
1368-
max_color_attachments: limits
1369-
.max_color_attachments
1370-
.min(crate::MAX_COLOR_ATTACHMENTS as u32),
1364+
max_color_attachments: limits.max_color_attachments,
13711365
max_color_attachment_bytes_per_sample,
13721366
max_compute_workgroup_storage_size: limits.max_compute_shared_memory_size,
13731367
max_compute_invocations_per_workgroup: limits.max_compute_work_group_invocations,
@@ -1389,7 +1383,7 @@ impl PhysicalDeviceProperties {
13891383
max_acceleration_structures_per_shader_stage,
13901384

13911385
max_multiview_view_count,
1392-
}
1386+
})
13931387
}
13941388

13951389
/// Return a `wgpu_hal::Alignments` structure describing this adapter.

0 commit comments

Comments
 (0)