From 5f8093457e366ab9a7d6e990a604c8cc7dcb6770 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Sun, 27 Apr 2025 16:29:12 +0200 Subject: [PATCH 01/45] Introduce BlockSize concept --- cpp/dolfinx/common/types.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 318b1c86060..9ac581fca5f 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -41,4 +41,35 @@ using scalar_value_t = typename scalar_value::type; /// @private mdspan/mdarray namespace namespace md = MDSPAN_IMPL_STANDARD_NAMESPACE; +/// @private Constant of maximum compile time optimized block sizes. +constexpr int MaxOptimizedBlockSize = 3; + +/// @private Concept capturing both compile time defined block sizes and runtime +/// ones. +template +concept BlockSize + = std::is_same_v || (requires { + typename T::value_type; + requires std::is_same_v; + requires T::value >= 1 && T::value <= MaxOptimizedBlockSize; + }); + +/// @private Check if block size is a compile time constant. +template +constexpr bool is_compile_time_v = !std::is_same_v; + +/// @private Check if block size is a run time constant. +template +constexpr bool is_runtime_v = std::is_same_v; + +/// @private Retrieves the integral block size of a runtime or compile time +/// block size. +int block_size(BlockSize auto bs) +{ + if constexpr (is_compile_time_v) + return decltype(bs)::value; + + return bs; +} + } // namespace dolfinx From 65ff61f72128762b985ff18cdbe6b4fafecabb83 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Sun, 27 Apr 2025 16:47:03 +0200 Subject: [PATCH 02/45] Use BlockSize in packing --- cpp/dolfinx/fem/pack.h | 40 +++++++++++++++------------------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/cpp/dolfinx/fem/pack.h b/cpp/dolfinx/fem/pack.h index 7d8a30f7ff6..2033bbb100d 100644 --- a/cpp/dolfinx/fem/pack.h +++ b/cpp/dolfinx/fem/pack.h @@ -12,6 +12,7 @@ #include "Form.h" #include "Function.h" #include "FunctionSpace.h" +#include "dolfinx/common/types.h" #include "traits.h" #include #include @@ -51,29 +52,19 @@ get_cell_orientation_info(const Function& coefficient) } /// Pack a single coefficient for a single cell -template -void pack_impl(std::span coeffs, std::int32_t cell, int bs, +template +void pack_impl(std::span coeffs, std::int32_t cell, BlockSize auto _bs, std::span v, std::span cell_info, const DofMap& dofmap, auto transform) { std::span dofs = dofmap.cell_dofs(cell); for (std::size_t i = 0; i < dofs.size(); ++i) { - if constexpr (_bs < 0) - { - const int pos_c = bs * i; - const int pos_v = bs * dofs[i]; - for (int k = 0; k < bs; ++k) - coeffs[pos_c + k] = v[pos_v + k]; - } - else - { - assert(_bs == bs); - const int pos_c = _bs * i; - const int pos_v = _bs * dofs[i]; - for (int k = 0; k < _bs; ++k) - coeffs[pos_c + k] = v[pos_v + k]; - } + int bs = block_size(_bs); + const int pos_c = bs * i; + const int pos_v = bs * dofs[i]; + for (int k = 0; k < bs; ++k) + coeffs[pos_c + k] = v[pos_v + k]; } transform(coeffs, cell_info, cell, 1); @@ -117,8 +108,8 @@ void pack_coefficient_entity(std::span c, int cstride, if (std::int32_t cell = cells(e); cell >= 0) { auto cell_coeff = c.subspan(e * cstride + offset, space_dim); - pack_impl<1>(cell_coeff, cell, bs, v, cell_info, dofmap, - transformation); + pack_impl(cell_coeff, cell, std::integral_constant(), v, + cell_info, dofmap, transformation); } } break; @@ -128,8 +119,8 @@ void pack_coefficient_entity(std::span c, int cstride, if (std::int32_t cell = cells(e); cell >= 0) { auto cell_coeff = c.subspan(e * cstride + offset, space_dim); - pack_impl<2>(cell_coeff, cell, bs, v, cell_info, dofmap, - transformation); + pack_impl(cell_coeff, cell, std::integral_constant(), v, + cell_info, dofmap, transformation); } } break; @@ -139,8 +130,8 @@ void pack_coefficient_entity(std::span c, int cstride, if (std::int32_t cell = cells(e); cell >= 0) { auto cell_coeff = c.subspan(e * cstride + offset, space_dim); - pack_impl<3>(cell_coeff, cell, bs, v, cell_info, dofmap, - transformation); + pack_impl(cell_coeff, cell, std::integral_constant(), v, + cell_info, dofmap, transformation); } } break; @@ -150,8 +141,7 @@ void pack_coefficient_entity(std::span c, int cstride, if (std::int32_t cell = cells(e); cell >= 0) { auto cell_coeff = c.subspan(e * cstride + offset, space_dim); - pack_impl<-1>(cell_coeff, cell, bs, v, cell_info, dofmap, - transformation); + pack_impl(cell_coeff, cell, bs, v, cell_info, dofmap, transformation); } } break; From a708e7d82b6973cfcf141e8d33ce5a872831a0b1 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Sun, 27 Apr 2025 17:13:37 +0200 Subject: [PATCH 03/45] Use BlockSize in vector assembly --- cpp/dolfinx/fem/assemble_vector_impl.h | 234 ++++++++++--------------- 1 file changed, 88 insertions(+), 146 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 0e32e12fc23..d9ccc003bcf 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -10,6 +10,7 @@ #include "DirichletBC.h" #include "DofMap.h" #include "Form.h" +#include "dolfinx/common/types.h" #include "traits.h" #include "utils.h" #include @@ -23,6 +24,7 @@ #include #include #include +#include #include namespace dolfinx::fem @@ -76,16 +78,16 @@ using mdspan2_t = md::mdspan>; /// conditions applied. /// @param[in] x0 Vector used in the lifting. /// @param[in] alpha Scaling to apply. -template +template void _lift_bc_cells( std::span b, mdspan2_t x_dofmap, md::mdspan, md::extents> x, FEkernel auto kernel, std::span cells, - std::tuple> dofmap0, + std::tuple> dofmap0, fem::DofTransformKernel auto P0, - std::tuple> dofmap1, + std::tuple> dofmap1, fem::DofTransformKernel auto P1T, std::span constants, md::mdspan> coeffs, std::span cell_info0, @@ -95,10 +97,11 @@ void _lift_bc_cells( if (cells.empty()) return; - const auto [dmap0, bs0, cells0] = dofmap0; - const auto [dmap1, bs1, cells1] = dofmap1; - assert(_bs0 < 0 or _bs0 == bs0); - assert(_bs1 < 0 or _bs1 == bs1); + const auto [dmap0, _bs0, cells0] = dofmap0; + const auto [dmap1, _bs1, cells1] = dofmap1; + + int bs0 = block_size(_bs0); + int bs1 = block_size(_bs1); // Data structures used in bc application std::vector> cdofs(3 * x_dofmap.extent(1)); @@ -120,28 +123,13 @@ void _lift_bc_cells( bool has_bc = false; for (std::size_t j = 0; j < dofs1.size(); ++j) { - if constexpr (_bs1 > 0) - { - for (int k = 0; k < _bs1; ++k) - { - assert(_bs1 * dofs1[j] + k < (int)bc_markers1.size()); - if (bc_markers1[_bs1 * dofs1[j] + k]) - { - has_bc = true; - break; - } - } - } - else + for (int k = 0; k < bs1; ++k) { - for (int k = 0; k < bs1; ++k) + assert(bs1 * dofs1[j] + k < (int)bc_markers1.size()); + if (bc_markers1[bs1 * dofs1[j] + k]) { - assert(bs1 * dofs1[j] + k < (int)bc_markers1.size()); - if (bc_markers1[bs1 * dofs1[j] + k]) - { - has_bc = true; - break; - } + has_bc = true; + break; } } } @@ -172,53 +160,27 @@ void _lift_bc_cells( std::ranges::fill(be, 0); for (std::size_t j = 0; j < dofs1.size(); ++j) { - if constexpr (_bs1 > 0) - { - for (int k = 0; k < _bs1; ++k) - { - const std::int32_t jj = _bs1 * dofs1[j] + k; - assert(jj < (int)bc_markers1.size()); - if (bc_markers1[jj]) - { - const T bc = bc_values1[jj]; - const T _x0 = x0.empty() ? 0 : x0[jj]; - // const T _x0 = 0; - // be -= Ae.col(bs1 * j + k) * alpha * (bc - _x0); - for (int m = 0; m < num_rows; ++m) - be[m] -= Ae[m * num_cols + _bs1 * j + k] * alpha * (bc - _x0); - } - } - } - else + + for (int k = 0; k < bs1; ++k) { - for (int k = 0; k < bs1; ++k) + const std::int32_t jj = bs1 * dofs1[j] + k; + assert(jj < (int)bc_markers1.size()); + if (bc_markers1[jj]) { - const std::int32_t jj = bs1 * dofs1[j] + k; - assert(jj < (int)bc_markers1.size()); - if (bc_markers1[jj]) - { - const T bc = bc_values1[jj]; - const T _x0 = x0.empty() ? 0 : x0[jj]; - // be -= Ae.col(bs1 * j + k) * alpha * (bc - _x0); - for (int m = 0; m < num_rows; ++m) - be[m] -= Ae[m * num_cols + bs1 * j + k] * alpha * (bc - _x0); - } + const T bc = bc_values1[jj]; + const T _x0 = x0.empty() ? 0 : x0[jj]; + // be -= Ae.col(bs1 * j + k) * alpha * (bc - _x0); + for (int m = 0; m < num_rows; ++m) + be[m] -= Ae[m * num_cols + bs1 * j + k] * alpha * (bc - _x0); } } } for (std::size_t i = 0; i < dofs0.size(); ++i) { - if constexpr (_bs0 > 0) - { - for (int k = 0; k < _bs0; ++k) - b[_bs0 * dofs0[i] + k] += be[_bs0 * i + k]; - } - else - { - for (int k = 0; k < bs0; ++k) - b[bs0 * dofs0[i] + k] += be[bs0 * i + k]; - } + + for (int k = 0; k < bs0; ++k) + b[bs0 * dofs0[i] + k] += be[bs0 * i + k]; } } } @@ -638,14 +600,14 @@ void _lift_bc_interior_facets( /// coefficient for cell `i`. /// @param[in] cell_info0 Cell permutation information for the test /// function mesh. -template +template void assemble_cells( fem::DofTransformKernel auto P0, std::span b, mdspan2_t x_dofmap, md::mdspan, md::extents> x, std::span cells, - std::tuple> dofmap, + std::tuple> dofmap, FEkernel auto kernel, std::span constants, md::mdspan> coeffs, std::span cell_info0) @@ -653,8 +615,8 @@ void assemble_cells( if (cells.empty()) return; - const auto [dmap, bs, cells0] = dofmap; - assert(_bs < 0 or _bs == bs); + const auto [dmap, _bs, cells0] = dofmap; + int bs = block_size(_bs); // Create data structures used in assembly std::vector> cdofs(3 * x_dofmap.extent(1)); @@ -681,18 +643,9 @@ void assemble_cells( // Scatter cell vector to 'global' vector array auto dofs = md::submdspan(dmap, c0, md::full_extent); - if constexpr (_bs > 0) - { - for (std::size_t i = 0; i < dofs.size(); ++i) - for (int k = 0; k < _bs; ++k) - b[_bs * dofs[i] + k] += be[_bs * i + k]; - } - else - { - for (std::size_t i = 0; i < dofs.size(); ++i) - for (int k = 0; k < bs; ++k) - b[bs * dofs[i] + k] += be[bs * i + k]; - } + for (std::size_t i = 0; i < dofs.size(); ++i) + for (int k = 0; k < bs; ++k) + b[bs * dofs[i] + k] += be[bs * i + k]; } } @@ -720,7 +673,7 @@ void assemble_cells( /// function mesh. /// @param[in] perms Facet permutation integer. Empty if facet /// permutations are not required. -template +template void assemble_exterior_facets( fem::DofTransformKernel auto P0, std::span b, mdspan2_t x_dofmap, md::mdspan, @@ -729,7 +682,7 @@ void assemble_exterior_facets( md::mdspan> facets, - std::tuple>> dofmap, @@ -741,8 +694,8 @@ void assemble_exterior_facets( if (facets.empty()) return; - const auto [dmap, bs, facets0] = dofmap; - assert(_bs < 0 or _bs == bs); + const auto [dmap, _bs, facets0] = dofmap; + int bs = block_size(_bs); // Create data structures used in assembly const int num_dofs = dmap.extent(1); @@ -775,18 +728,9 @@ void assemble_exterior_facets( // Add element vector to global vector auto dofs = md::submdspan(dmap, cell0, md::full_extent); - if constexpr (_bs > 0) - { - for (std::size_t i = 0; i < dofs.size(); ++i) - for (int k = 0; k < _bs; ++k) - b[_bs * dofs[i] + k] += be[_bs * i + k]; - } - else - { - for (std::size_t i = 0; i < dofs.size(); ++i) - for (int k = 0; k < bs; ++k) - b[bs * dofs[i] + k] += be[bs * i + k]; - } + for (std::size_t i = 0; i < dofs.size(); ++i) + for (int k = 0; k < bs; ++k) + b[bs * dofs[i] + k] += be[bs * i + k]; } } @@ -813,7 +757,7 @@ void assemble_exterior_facets( /// function mesh. /// @param[in] perms Facet permutation integer. Empty if facet /// permutations are not required. -template +template void assemble_interior_facets( fem::DofTransformKernel auto P0, std::span b, mdspan2_t x_dofmap, md::mdspan, @@ -822,7 +766,7 @@ void assemble_interior_facets( md::mdspan> facets, - std::tuple>> dofmap, @@ -836,8 +780,8 @@ void assemble_interior_facets( if (facets.empty()) return; - const auto [dmap, bs, facets0] = dofmap; - assert(_bs < 0 or _bs == bs); + const auto [dmap, _bs, facets0] = dofmap; + int bs = block_size(_bs); // Create data structures used in assembly using X = scalar_value_t; @@ -886,24 +830,12 @@ void assemble_interior_facets( P0(sub_be, cell_info0, cells0[1], 1); // Add element vector to global vector - if constexpr (_bs > 0) - { - for (std::size_t i = 0; i < dmap0.size(); ++i) - for (int k = 0; k < _bs; ++k) - b[_bs * dmap0[i] + k] += be[_bs * i + k]; - for (std::size_t i = 0; i < dmap1.size(); ++i) - for (int k = 0; k < _bs; ++k) - b[_bs * dmap1[i] + k] += be[_bs * (i + dmap0.size()) + k]; - } - else - { - for (std::size_t i = 0; i < dmap0.size(); ++i) - for (int k = 0; k < bs; ++k) - b[bs * dmap0[i] + k] += be[bs * i + k]; - for (std::size_t i = 0; i < dmap1.size(); ++i) - for (int k = 0; k < bs; ++k) - b[bs * dmap1[i] + k] += be[bs * (i + dmap0.size()) + k]; - } + for (std::size_t i = 0; i < dmap0.size(); ++i) + for (int k = 0; k < bs; ++k) + b[bs * dmap0[i] + k] += be[bs * i + k]; + for (std::size_t i = 0; i < dmap1.size(); ++i) + for (int k = 0; k < bs; ++k) + b[bs * dmap1[i] + k] += be[bs * (i + dmap0.size()) + k]; } } @@ -988,23 +920,28 @@ void lift_bc(std::span b, const Form& a, mdspan2_t x_dofmap, auto coeffs = md::mdspan(_coeffs.data(), cells.size(), cstride); if (bs0 == 1 and bs1 == 1) { - _lift_bc_cells( - b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, P0, - {dofmap1, bs1, cells1}, P1T, constants, coeffs, cell_info0, - cell_info1, bc_values1, bc_markers1, x0, alpha); + _lift_bc_cells, + std::integral_constant>( + b, x_dofmap, x, kernel, cells, + {dofmap0, std::integral_constant(), cells0}, P0, + {dofmap1, std::integral_constant(), cells1}, P1T, constants, + coeffs, cell_info0, cell_info1, bc_values1, bc_markers1, x0, alpha); } else if (bs0 == 3 and bs1 == 3) { - _lift_bc_cells( - b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, P0, - {dofmap1, bs1, cells1}, P1T, constants, coeffs, cell_info0, - cell_info1, bc_values1, bc_markers1, x0, alpha); + _lift_bc_cells, + std::integral_constant>( + b, x_dofmap, x, kernel, cells, + {dofmap0, std::integral_constant(), cells0}, P0, + {dofmap1, std::integral_constant(), cells1}, P1T, constants, + coeffs, cell_info0, cell_info1, bc_values1, bc_markers1, x0, alpha); } else { - _lift_bc_cells(b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, P0, - {dofmap1, bs1, cells1}, P1T, constants, coeffs, cell_info0, - cell_info1, bc_values1, bc_markers1, x0, alpha); + _lift_bc_cells( + b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, P0, + {dofmap1, bs1, cells1}, P1T, constants, coeffs, cell_info0, + cell_info1, bc_values1, bc_markers1, x0, alpha); } } @@ -1221,19 +1158,21 @@ void assemble_vector( assert(cells.size() * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_cells( - P0, b, x_dofmap, x, cells, {dofs, bs, cells0}, fn, constants, + impl::assemble_cells>( + P0, b, x_dofmap, x, cells, + {dofs, std::integral_constant(), cells0}, fn, constants, md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } else if (bs == 3) { - impl::assemble_cells( - P0, b, x_dofmap, x, cells, {dofs, bs, cells0}, fn, constants, + impl::assemble_cells>( + P0, b, x_dofmap, x, cells, + {dofs, std::integral_constant(), cells0}, fn, constants, md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } else { - impl::assemble_cells( + impl::assemble_cells( P0, b, x_dofmap, x, cells, {dofs, bs, cells0}, fn, constants, md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } @@ -1269,21 +1208,24 @@ void assemble_vector( assert((facets.size() / 2) * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_exterior_facets( - P0, b, x_dofmap, x, facets, {dofs, bs, facets1}, fn, constants, + + impl::assemble_exterior_facets>( + P0, b, x_dofmap, x, facets, + {dofs, std::integral_constant(), facets1}, fn, constants, md::mdspan(coeffs.data(), facets.extent(0), cstride), cell_info0, perms); } else if (bs == 3) { - impl::assemble_exterior_facets( - P0, b, x_dofmap, x, facets, {dofs, bs, facets1}, fn, constants, + impl::assemble_exterior_facets>( + P0, b, x_dofmap, x, facets, + {dofs, std::integral_constant(), facets1}, fn, constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); } else { - impl::assemble_exterior_facets( + impl::assemble_exterior_facets( P0, b, x_dofmap, x, facets, {dofs, bs, facets1}, fn, constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); @@ -1308,10 +1250,10 @@ void assemble_vector( assert((facets.size() / 4) * 2 * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_interior_facets( + impl::assemble_interior_facets>( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - {*dofmap, bs, + {*dofmap, std::integral_constant(), mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), @@ -1319,10 +1261,10 @@ void assemble_vector( } else if (bs == 3) { - impl::assemble_interior_facets( + impl::assemble_interior_facets>( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - {*dofmap, bs, + {*dofmap, std::integral_constant(), mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), @@ -1330,7 +1272,7 @@ void assemble_vector( } else { - impl::assemble_interior_facets( + impl::assemble_interior_facets( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {*dofmap, bs, From 5b65ad8cab96e44b307ecb01721a0a6245175b0d Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Sun, 27 Apr 2025 17:50:55 +0200 Subject: [PATCH 04/45] Adapt demo --- cpp/demo/custom_kernel/main.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/cpp/demo/custom_kernel/main.cpp b/cpp/demo/custom_kernel/main.cpp index b49da2ca416..8df8b56d24a 100644 --- a/cpp/demo/custom_kernel/main.cpp +++ b/cpp/demo/custom_kernel/main.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -167,9 +168,10 @@ double assemble_vector1(const mesh::Geometry& g, const fem::DofMap& dofmap, md::mdspan> x( g.x().data(), g.x().size() / 3, 3); common::Timer timer("Assembler1 lambda (vector)"); - fem::impl::assemble_cells([](auto, auto, auto, auto) {}, - b.mutable_array(), g.dofmap(), x, cells, - {dofmap.map(), 1, cells}, kernel, {}, {}, {}); + fem::impl::assemble_cells>( + [](auto, auto, auto, auto) {}, b.mutable_array(), g.dofmap(), x, cells, + {dofmap.map(), std::integral_constant(), cells}, kernel, {}, {}, + {}); b.scatter_rev(std::plus()); return la::squared_norm(b); } From 29a1219246bcba03ec7fa4aa4da8dd5488c5640a Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Sun, 27 Apr 2025 19:44:36 +0200 Subject: [PATCH 05/45] Introduce BS<> alias --- cpp/demo/custom_kernel/main.cpp | 5 ++- cpp/dolfinx/common/types.h | 4 +++ cpp/dolfinx/fem/assemble_vector_impl.h | 48 +++++++++++--------------- cpp/dolfinx/fem/pack.h | 12 +++---- 4 files changed, 32 insertions(+), 37 deletions(-) diff --git a/cpp/demo/custom_kernel/main.cpp b/cpp/demo/custom_kernel/main.cpp index 8df8b56d24a..caf35e2ba44 100644 --- a/cpp/demo/custom_kernel/main.cpp +++ b/cpp/demo/custom_kernel/main.cpp @@ -168,10 +168,9 @@ double assemble_vector1(const mesh::Geometry& g, const fem::DofMap& dofmap, md::mdspan> x( g.x().data(), g.x().size() / 3, 3); common::Timer timer("Assembler1 lambda (vector)"); - fem::impl::assemble_cells>( + fem::impl::assemble_cells>( [](auto, auto, auto, auto) {}, b.mutable_array(), g.dofmap(), x, cells, - {dofmap.map(), std::integral_constant(), cells}, kernel, {}, {}, - {}); + {dofmap.map(), BS<1>(), cells}, kernel, {}, {}, {}); b.scatter_rev(std::plus()); return la::squared_norm(b); } diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 9ac581fca5f..afa2d725b06 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -54,6 +54,10 @@ concept BlockSize requires T::value >= 1 && T::value <= MaxOptimizedBlockSize; }); +/// @private Short notation for a compile time block size. +template +using BS = std::integral_constant; + /// @private Check if block size is a compile time constant. template constexpr bool is_compile_time_v = !std::is_same_v; diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index d9ccc003bcf..2df8ee1a78c 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -920,21 +920,17 @@ void lift_bc(std::span b, const Form& a, mdspan2_t x_dofmap, auto coeffs = md::mdspan(_coeffs.data(), cells.size(), cstride); if (bs0 == 1 and bs1 == 1) { - _lift_bc_cells, - std::integral_constant>( - b, x_dofmap, x, kernel, cells, - {dofmap0, std::integral_constant(), cells0}, P0, - {dofmap1, std::integral_constant(), cells1}, P1T, constants, - coeffs, cell_info0, cell_info1, bc_values1, bc_markers1, x0, alpha); + _lift_bc_cells, BS<1>>( + b, x_dofmap, x, kernel, cells, {dofmap0, BS<1>(), cells0}, P0, + {dofmap1, BS<1>(), cells1}, P1T, constants, coeffs, cell_info0, + cell_info1, bc_values1, bc_markers1, x0, alpha); } else if (bs0 == 3 and bs1 == 3) { - _lift_bc_cells, - std::integral_constant>( - b, x_dofmap, x, kernel, cells, - {dofmap0, std::integral_constant(), cells0}, P0, - {dofmap1, std::integral_constant(), cells1}, P1T, constants, - coeffs, cell_info0, cell_info1, bc_values1, bc_markers1, x0, alpha); + _lift_bc_cells, BS<3>>( + b, x_dofmap, x, kernel, cells, {dofmap0, BS<3>(), cells0}, P0, + {dofmap1, BS<3>(), cells1}, P1T, constants, coeffs, cell_info0, + cell_info1, bc_values1, bc_markers1, x0, alpha); } else { @@ -1158,16 +1154,14 @@ void assemble_vector( assert(cells.size() * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_cells>( - P0, b, x_dofmap, x, cells, - {dofs, std::integral_constant(), cells0}, fn, constants, + impl::assemble_cells>( + P0, b, x_dofmap, x, cells, {dofs, BS<1>(), cells0}, fn, constants, md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } else if (bs == 3) { - impl::assemble_cells>( - P0, b, x_dofmap, x, cells, - {dofs, std::integral_constant(), cells0}, fn, constants, + impl::assemble_cells>( + P0, b, x_dofmap, x, cells, {dofs, BS<3>(), cells0}, fn, constants, md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } else @@ -1209,17 +1203,15 @@ void assemble_vector( if (bs == 1) { - impl::assemble_exterior_facets>( - P0, b, x_dofmap, x, facets, - {dofs, std::integral_constant(), facets1}, fn, constants, + impl::assemble_exterior_facets>( + P0, b, x_dofmap, x, facets, {dofs, BS<1>(), facets1}, fn, constants, md::mdspan(coeffs.data(), facets.extent(0), cstride), cell_info0, perms); } else if (bs == 3) { - impl::assemble_exterior_facets>( - P0, b, x_dofmap, x, facets, - {dofs, std::integral_constant(), facets1}, fn, constants, + impl::assemble_exterior_facets>( + P0, b, x_dofmap, x, facets, {dofs, BS<3>(), facets1}, fn, constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); } @@ -1250,10 +1242,10 @@ void assemble_vector( assert((facets.size() / 4) * 2 * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_interior_facets>( + impl::assemble_interior_facets>( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - {*dofmap, std::integral_constant(), + {*dofmap, BS<1>(), mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), @@ -1261,10 +1253,10 @@ void assemble_vector( } else if (bs == 3) { - impl::assemble_interior_facets>( + impl::assemble_interior_facets>( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - {*dofmap, std::integral_constant(), + {*dofmap, BS<3>(), mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), diff --git a/cpp/dolfinx/fem/pack.h b/cpp/dolfinx/fem/pack.h index 2033bbb100d..63e82ba0e07 100644 --- a/cpp/dolfinx/fem/pack.h +++ b/cpp/dolfinx/fem/pack.h @@ -108,8 +108,8 @@ void pack_coefficient_entity(std::span c, int cstride, if (std::int32_t cell = cells(e); cell >= 0) { auto cell_coeff = c.subspan(e * cstride + offset, space_dim); - pack_impl(cell_coeff, cell, std::integral_constant(), v, - cell_info, dofmap, transformation); + pack_impl(cell_coeff, cell, BS<1>(), v, cell_info, dofmap, + transformation); } } break; @@ -119,8 +119,8 @@ void pack_coefficient_entity(std::span c, int cstride, if (std::int32_t cell = cells(e); cell >= 0) { auto cell_coeff = c.subspan(e * cstride + offset, space_dim); - pack_impl(cell_coeff, cell, std::integral_constant(), v, - cell_info, dofmap, transformation); + pack_impl(cell_coeff, cell, BS<2>(), v, cell_info, dofmap, + transformation); } } break; @@ -130,8 +130,8 @@ void pack_coefficient_entity(std::span c, int cstride, if (std::int32_t cell = cells(e); cell >= 0) { auto cell_coeff = c.subspan(e * cstride + offset, space_dim); - pack_impl(cell_coeff, cell, std::integral_constant(), v, - cell_info, dofmap, transformation); + pack_impl(cell_coeff, cell, BS<3>(), v, cell_info, dofmap, + transformation); } } break; From 10cc79c1bdea96017d666caa7b1ea41577e5cd82 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Sun, 27 Apr 2025 19:46:06 +0200 Subject: [PATCH 06/45] Use BlockSize in spmv --- cpp/dolfinx/la/MatrixCSR.h | 17 +++++++++-------- cpp/dolfinx/la/matrix_csr_impl.h | 23 +++++++---------------- 2 files changed, 16 insertions(+), 24 deletions(-) diff --git a/cpp/dolfinx/la/MatrixCSR.h b/cpp/dolfinx/la/MatrixCSR.h index 104fb8edb25..3bfa269684e 100644 --- a/cpp/dolfinx/la/MatrixCSR.h +++ b/cpp/dolfinx/la/MatrixCSR.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -788,13 +789,13 @@ void MatrixCSR::mult(la::Vector& x, // yi[0] += Ai[0] * xi[0] if (_bs[1] == 1) { - impl::spmv(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y, - _bs[0], 1); + impl::spmv>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, + _y, _bs[0], BS<1>()); } else { - impl::spmv(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y, - _bs[0], _bs[1]); + impl::spmv(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, + _y, _bs[0], _bs[1]); } // finalize ghost update @@ -804,13 +805,13 @@ void MatrixCSR::mult(la::Vector& x, // yi[0] += Ai[1] * xi[1] if (_bs[1] == 1) { - impl::spmv(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y, - _bs[0], 1); + impl::spmv>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, + _y, _bs[0], BS<1>()); } else { - impl::spmv(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y, - _bs[0], _bs[1]); + impl::spmv(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y, + _bs[0], _bs[1]); } } diff --git a/cpp/dolfinx/la/matrix_csr_impl.h b/cpp/dolfinx/la/matrix_csr_impl.h index 143b6c0d4f8..29f9ffde6fa 100644 --- a/cpp/dolfinx/la/matrix_csr_impl.h +++ b/cpp/dolfinx/la/matrix_csr_impl.h @@ -6,6 +6,7 @@ #pragma once +#include "dolfinx/common/types.h" #include #include #include @@ -222,12 +223,13 @@ void insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr, /// @param y /// @param bs0 /// @param bs1 -template +template void spmv(std::span values, std::span row_begin, std::span row_end, std::span indices, std::span x, - std::span y, int bs0, int bs1) + std::span y, int bs0, BS1 _bs1) { + int bs1 = block_size(_bs1); assert(row_begin.size() == row_end.size()); for (int k0 = 0; k0 < bs0; ++k0) { @@ -236,21 +238,10 @@ void spmv(std::span values, std::span row_begin, T vi{0}; for (std::int32_t j = row_begin[i]; j < row_end[i]; j++) { - if constexpr (BS1 == -1) + for (int k1 = 0; k1 < bs1; ++k1) { - for (int k1 = 0; k1 < bs1; ++k1) - { - vi += values[j * bs1 * bs0 + k1 * bs0 + k0] - * x[indices[j] * bs1 + k1]; - } - } - else - { - for (int k1 = 0; k1 < BS1; ++k1) - { - vi += values[j * BS1 * bs0 + k1 * bs0 + k0] - * x[indices[j] * BS1 + k1]; - } + vi += values[j * bs1 * bs0 + k1 * bs0 + k0] + * x[indices[j] * bs1 + k1]; } } From 1fb65d474a7e7d4f947776372504bf707614008b Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Sun, 27 Apr 2025 20:44:19 +0200 Subject: [PATCH 07/45] doc --- cpp/dolfinx/la/matrix_csr_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/dolfinx/la/matrix_csr_impl.h b/cpp/dolfinx/la/matrix_csr_impl.h index 29f9ffde6fa..3a1d625b611 100644 --- a/cpp/dolfinx/la/matrix_csr_impl.h +++ b/cpp/dolfinx/la/matrix_csr_impl.h @@ -222,7 +222,7 @@ void insert_nonblocked_csr(U&& data, const V& cols, const W& row_ptr, /// @param x /// @param y /// @param bs0 -/// @param bs1 +/// @param _bs1 template void spmv(std::span values, std::span row_begin, std::span row_end, From dcfef33dd03c229f7b85445c3156543f0bcb1a17 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 01:38:46 +0200 Subject: [PATCH 08/45] Introduce generic ConstexprType --- cpp/dolfinx/common/types.h | 39 ++++++++++++++++++++++++++------------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index afa2d725b06..dc66cd0cc19 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -44,33 +44,48 @@ namespace md = MDSPAN_IMPL_STANDARD_NAMESPACE; /// @private Constant of maximum compile time optimized block sizes. constexpr int MaxOptimizedBlockSize = 3; +/// @private Concept defining a variadic compile time or runtime variable. T +/// indicates the type that is stored and V the value. Either V equals T, i.e. +/// it is a runtime variable or V defines a compile time value V::value of type +/// T. +template +concept ConstexprType = std::is_same_v || (requires { + typename V::value_type; + requires std::is_same_v; + }); + +/// @private Check if ConstexprType holds a compile time constant. +template + requires ConstexprType +constexpr bool is_compile_time_v = !std::is_same_v; + +/// @private Check if ConstexprType holds a run time variable. +template + requires ConstexprType +constexpr bool is_runtime_v = std::is_same_v; + /// @private Concept capturing both compile time defined block sizes and runtime /// ones. -template -concept BlockSize - = std::is_same_v || (requires { - typename T::value_type; - requires std::is_same_v; - requires T::value >= 1 && T::value <= MaxOptimizedBlockSize; - }); +template +concept BlockSize = ConstexprType; /// @private Short notation for a compile time block size. template using BS = std::integral_constant; /// @private Check if block size is a compile time constant. -template -constexpr bool is_compile_time_v = !std::is_same_v; +template +constexpr bool is_compile_time_bs_v = is_compile_time_v; /// @private Check if block size is a run time constant. -template -constexpr bool is_runtime_v = std::is_same_v; +template +constexpr bool is_runtime_bs_v = is_runtime_v; /// @private Retrieves the integral block size of a runtime or compile time /// block size. int block_size(BlockSize auto bs) { - if constexpr (is_compile_time_v) + if constexpr (is_compile_time_bs_v) return decltype(bs)::value; return bs; From b8b0f9025a500fdbb84f003ddde35c841aebde68 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 02:05:09 +0200 Subject: [PATCH 09/45] value() --- cpp/dolfinx/common/types.h | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index dc66cd0cc19..9bff16698fa 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -64,6 +64,16 @@ template requires ConstexprType constexpr bool is_runtime_v = std::is_same_v; +template + requires ConstexprType +T value(V container) +{ + if constexpr (is_compile_time_v) + return V::value; + + return container; +} + /// @private Concept capturing both compile time defined block sizes and runtime /// ones. template @@ -85,10 +95,7 @@ constexpr bool is_runtime_bs_v = is_runtime_v; /// block size. int block_size(BlockSize auto bs) { - if constexpr (is_compile_time_bs_v) - return decltype(bs)::value; - - return bs; + return value(bs); } } // namespace dolfinx From 152e8d058bb40faa885648af7e6d959a80704603 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 02:05:25 +0200 Subject: [PATCH 10/45] Add test case --- cpp/test/CMakeLists.txt | 1 + cpp/test/common/constexpr_type.cpp | 38 ++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 cpp/test/common/constexpr_type.cpp diff --git a/cpp/test/CMakeLists.txt b/cpp/test/CMakeLists.txt index 353681c32cb..bb8db312bb6 100644 --- a/cpp/test/CMakeLists.txt +++ b/cpp/test/CMakeLists.txt @@ -50,6 +50,7 @@ add_executable( matrix.cpp io.cpp common/CIFailure.cpp + common/constexpr_type.cpp common/sub_systems_manager.cpp common/index_map.cpp common/sort.cpp diff --git a/cpp/test/common/constexpr_type.cpp b/cpp/test/common/constexpr_type.cpp new file mode 100644 index 00000000000..5c91a97eef1 --- /dev/null +++ b/cpp/test/common/constexpr_type.cpp @@ -0,0 +1,38 @@ +// Copyright (C) 2025 Paul T. Kühner +// +// This file is part of DOLFINx (https://www.fenicsproject.org) +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +#include + +#include +#include +#include +#include + +namespace +{ +template +void test() +{ + using V_runtime = T; + static_assert(!dolfinx::is_compile_time_v); + static_assert(dolfinx::is_runtime_v); + assert((dolfinx::value(V_runtime(1)) == T(1))); + + using V_compile_time = std::integral_constant; + static_assert(dolfinx::is_compile_time_v); + static_assert(!dolfinx::is_runtime_v); + assert((dolfinx::value(V_compile_time()) == T(1))); +} +} // namespace + +TEST_CASE("Test constexpr type", "[constexpr_type]") +{ + test(); + test(); + test(); + test(); + test(); +} \ No newline at end of file From 0e2ad158fc18538d7dcb3d915d90b89c8c6b0647 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 02:07:05 +0200 Subject: [PATCH 11/45] format --- cpp/dolfinx/common/types.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 9bff16698fa..3774d1bb4a8 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -93,9 +93,6 @@ constexpr bool is_runtime_bs_v = is_runtime_v; /// @private Retrieves the integral block size of a runtime or compile time /// block size. -int block_size(BlockSize auto bs) -{ - return value(bs); -} +int block_size(BlockSize auto bs) { return value(bs); } } // namespace dolfinx From 31a146d98e0ac587d3c3c2d504fd1dadbcbeeb34 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 17:22:45 +0200 Subject: [PATCH 12/45] constexpr value access --- cpp/dolfinx/common/types.h | 10 +++++++--- cpp/test/common/constexpr_type.cpp | 2 +- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 3774d1bb4a8..1e57e6872c1 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -66,11 +66,15 @@ constexpr bool is_runtime_v = std::is_same_v; template requires ConstexprType -T value(V container) +constexpr T value(V container, typename std::enable_if_t>* = 0) { - if constexpr (is_compile_time_v) - return V::value; + return V::value; +} +template + requires ConstexprType +T value(V container, typename std::enable_if_t>* = 0) +{ return container; } diff --git a/cpp/test/common/constexpr_type.cpp b/cpp/test/common/constexpr_type.cpp index 5c91a97eef1..196d5ab1209 100644 --- a/cpp/test/common/constexpr_type.cpp +++ b/cpp/test/common/constexpr_type.cpp @@ -24,7 +24,7 @@ void test() using V_compile_time = std::integral_constant; static_assert(dolfinx::is_compile_time_v); static_assert(!dolfinx::is_runtime_v); - assert((dolfinx::value(V_compile_time()) == T(1))); + static_assert((dolfinx::value(V_compile_time()) == T(1))); } } // namespace From 6a4d5b59d03fe3318ad142da4c45894cd2830490 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 17:24:12 +0200 Subject: [PATCH 13/45] format --- cpp/dolfinx/common/types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 1e57e6872c1..509c9944650 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -66,7 +66,8 @@ constexpr bool is_runtime_v = std::is_same_v; template requires ConstexprType -constexpr T value(V container, typename std::enable_if_t>* = 0) +constexpr T value(V container, + typename std::enable_if_t>* = 0) { return V::value; } From 2652fb5b20e195c0c5e3c260b4a27d18d4267307 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 22:40:48 +0200 Subject: [PATCH 14/45] Bump PETSc/SLEPc --- docker/Dockerfile.redhat | 2 +- docker/Dockerfile.test-env | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile.redhat b/docker/Dockerfile.redhat index a07786255eb..8b512027d71 100644 --- a/docker/Dockerfile.redhat +++ b/docker/Dockerfile.redhat @@ -8,7 +8,7 @@ FROM rockylinux/rockylinux:9 ARG BUILD_NP=4 ARG HDF5_VERSION=1.14.6 -ARG PETSC_VERSION=3.23.0 +ARG PETSC_VERSION=3.23.1 ARG MPICH_VERSION=4.2.3 WORKDIR /tmp diff --git a/docker/Dockerfile.test-env b/docker/Dockerfile.test-env index 96203cf1fff..9b622e1839b 100644 --- a/docker/Dockerfile.test-env +++ b/docker/Dockerfile.test-env @@ -22,8 +22,8 @@ ARG KAHIP_VERSION=3.18 # the most recent Numba release, see # https://numba.readthedocs.io/en/stable/user/installing.html#version-support-information ARG NUMPY_VERSION=2.1.3 -ARG PETSC_VERSION=3.23.0 -ARG SLEPC_VERSION=3.23.0 +ARG PETSC_VERSION=3.23.1 +ARG SLEPC_VERSION=3.23.1 ARG SPDLOG_VERSION=1.15.1 ARG MPICH_VERSION=4.2.3 From c762822f69604dfe476397f7752963e1cdc9478a Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 22:43:15 +0200 Subject: [PATCH 15/45] Revert "Bump PETSc/SLEPc" This reverts commit 2652fb5b20e195c0c5e3c260b4a27d18d4267307. --- docker/Dockerfile.redhat | 2 +- docker/Dockerfile.test-env | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/docker/Dockerfile.redhat b/docker/Dockerfile.redhat index 8b512027d71..a07786255eb 100644 --- a/docker/Dockerfile.redhat +++ b/docker/Dockerfile.redhat @@ -8,7 +8,7 @@ FROM rockylinux/rockylinux:9 ARG BUILD_NP=4 ARG HDF5_VERSION=1.14.6 -ARG PETSC_VERSION=3.23.1 +ARG PETSC_VERSION=3.23.0 ARG MPICH_VERSION=4.2.3 WORKDIR /tmp diff --git a/docker/Dockerfile.test-env b/docker/Dockerfile.test-env index 9b622e1839b..96203cf1fff 100644 --- a/docker/Dockerfile.test-env +++ b/docker/Dockerfile.test-env @@ -22,8 +22,8 @@ ARG KAHIP_VERSION=3.18 # the most recent Numba release, see # https://numba.readthedocs.io/en/stable/user/installing.html#version-support-information ARG NUMPY_VERSION=2.1.3 -ARG PETSC_VERSION=3.23.1 -ARG SLEPC_VERSION=3.23.1 +ARG PETSC_VERSION=3.23.0 +ARG SLEPC_VERSION=3.23.0 ARG SPDLOG_VERSION=1.15.1 ARG MPICH_VERSION=4.2.3 From 796725cf935e29c382d81f6831e5186eb4f4ed8b Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 30 Apr 2025 23:20:40 +0200 Subject: [PATCH 16/45] Tidy up --- cpp/dolfinx/common/CMakeLists.txt | 1 + cpp/dolfinx/common/constexpr_type.h | 53 ++++++++++++++++++++++++ cpp/dolfinx/common/types.h | 62 +++++++---------------------- cpp/test/common/constexpr_type.cpp | 16 ++++---- 4 files changed, 78 insertions(+), 54 deletions(-) create mode 100644 cpp/dolfinx/common/constexpr_type.h diff --git a/cpp/dolfinx/common/CMakeLists.txt b/cpp/dolfinx/common/CMakeLists.txt index 3a5c77044e0..20d486e56fc 100644 --- a/cpp/dolfinx/common/CMakeLists.txt +++ b/cpp/dolfinx/common/CMakeLists.txt @@ -1,5 +1,6 @@ set(HEADERS_common ${CMAKE_CURRENT_SOURCE_DIR}/defines.h + ${CMAKE_CURRENT_SOURCE_DIR}/constexpr_type.h ${CMAKE_CURRENT_SOURCE_DIR}/dolfinx_common.h ${CMAKE_CURRENT_SOURCE_DIR}/dolfinx_doc.h ${CMAKE_CURRENT_SOURCE_DIR}/IndexMap.h diff --git a/cpp/dolfinx/common/constexpr_type.h b/cpp/dolfinx/common/constexpr_type.h new file mode 100644 index 00000000000..4e7e656bb7e --- /dev/null +++ b/cpp/dolfinx/common/constexpr_type.h @@ -0,0 +1,53 @@ +// Copyright (C) 2025 Paul T. Kühner +// +// This file is part of DOLFINx (https://www.fenicsproject.org) +// +// SPDX-License-Identifier: LGPL-3.0-or-later + +#pragma once + +#include + +namespace dolfinx::common +{ +/// @private Concept defining a variadic compile time or runtime variable. T +/// indicates the type that is stored and V the value. Either V equals T, i.e. +/// it is a runtime variable or V defines a compile time value V::value of type +/// T. +/// @tparam T type of the value to be stored. +/// @tparam V container type. Usually T for a runtime variable or a +/// std::integral_constant for a compile time constant. +template +concept ConstexprType = std::is_same_v || (requires { + typename V::value_type; + requires std::is_same_v; + }); + +/// @private Check if ConstexprType holds a compile time constant. +template + requires ConstexprType +constexpr bool is_compile_time_v = !std::is_same_v; + +/// @private Check if ConstexprType holds a run time variable. +template + requires ConstexprType +constexpr bool is_runtime_v = std::is_same_v; + +/// @private Retrieve value of a compile time constant form a ConstexprType. +template + requires ConstexprType +constexpr T value(V container, + typename std::enable_if_t>* = 0) +{ + return V::value; +} + +/// @private Retrieve value of runtime variable form a ConstexprType. +template + requires ConstexprType +T value(V container, typename std::enable_if_t>* = 0) +{ + return container; +} + +} // namespace dolfinx::common diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 509c9944650..8bbcdf5a208 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -9,6 +9,7 @@ #include #include #include +#include #include namespace dolfinx @@ -41,63 +42,30 @@ using scalar_value_t = typename scalar_value::type; /// @private mdspan/mdarray namespace namespace md = MDSPAN_IMPL_STANDARD_NAMESPACE; -/// @private Constant of maximum compile time optimized block sizes. -constexpr int MaxOptimizedBlockSize = 3; - -/// @private Concept defining a variadic compile time or runtime variable. T -/// indicates the type that is stored and V the value. Either V equals T, i.e. -/// it is a runtime variable or V defines a compile time value V::value of type -/// T. -template -concept ConstexprType = std::is_same_v || (requires { - typename V::value_type; - requires std::is_same_v; - }); - -/// @private Check if ConstexprType holds a compile time constant. -template - requires ConstexprType -constexpr bool is_compile_time_v = !std::is_same_v; - -/// @private Check if ConstexprType holds a run time variable. -template - requires ConstexprType -constexpr bool is_runtime_v = std::is_same_v; - -template - requires ConstexprType -constexpr T value(V container, - typename std::enable_if_t>* = 0) -{ - return V::value; -} - -template - requires ConstexprType -T value(V container, typename std::enable_if_t>* = 0) -{ - return container; -} - /// @private Concept capturing both compile time defined block sizes and runtime /// ones. template -concept BlockSize = ConstexprType; +concept BlockSize = common::ConstexprType; /// @private Short notation for a compile time block size. template using BS = std::integral_constant; -/// @private Check if block size is a compile time constant. +/// @private Retrieves the integral block size of a compile time block size. template -constexpr bool is_compile_time_bs_v = is_compile_time_v; +constexpr int +block_size(V bs, + typename std::enable_if_t>* = 0) +{ + return common::value(bs); +} -/// @private Check if block size is a run time constant. +/// @private Retrieves the integral block size of a runtime block size. template -constexpr bool is_runtime_bs_v = is_runtime_v; - -/// @private Retrieves the integral block size of a runtime or compile time -/// block size. -int block_size(BlockSize auto bs) { return value(bs); } +int block_size(V bs, + typename std::enable_if_t>* = 0) +{ + return common::value(bs); +} } // namespace dolfinx diff --git a/cpp/test/common/constexpr_type.cpp b/cpp/test/common/constexpr_type.cpp index 196d5ab1209..c2bd6189a1a 100644 --- a/cpp/test/common/constexpr_type.cpp +++ b/cpp/test/common/constexpr_type.cpp @@ -8,23 +8,25 @@ #include #include -#include +#include #include +using namespace dolfinx::common; + namespace { template void test() { using V_runtime = T; - static_assert(!dolfinx::is_compile_time_v); - static_assert(dolfinx::is_runtime_v); - assert((dolfinx::value(V_runtime(1)) == T(1))); + static_assert(!is_compile_time_v); + static_assert(is_runtime_v); + assert((value(V_runtime(1)) == T(1))); using V_compile_time = std::integral_constant; - static_assert(dolfinx::is_compile_time_v); - static_assert(!dolfinx::is_runtime_v); - static_assert((dolfinx::value(V_compile_time()) == T(1))); + static_assert(is_compile_time_v); + static_assert(!is_runtime_v); + static_assert((value(V_compile_time()) == T(1))); } } // namespace From 460b35034d296dc748cbd85182b7d38cdc7240bd Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Thu, 1 May 2025 01:51:50 +0200 Subject: [PATCH 17/45] Compiler limitation for floating point values --- cpp/dolfinx/common/constexpr_type.h | 2 +- cpp/test/common/constexpr_type.cpp | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cpp/dolfinx/common/constexpr_type.h b/cpp/dolfinx/common/constexpr_type.h index 4e7e656bb7e..4051ef54034 100644 --- a/cpp/dolfinx/common/constexpr_type.h +++ b/cpp/dolfinx/common/constexpr_type.h @@ -36,7 +36,7 @@ constexpr bool is_runtime_v = std::is_same_v; /// @private Retrieve value of a compile time constant form a ConstexprType. template requires ConstexprType -constexpr T value(V container, +constexpr T value(V /* container */, typename std::enable_if_t>* = 0) { return V::value; diff --git a/cpp/test/common/constexpr_type.cpp b/cpp/test/common/constexpr_type.cpp index c2bd6189a1a..e88314ef730 100644 --- a/cpp/test/common/constexpr_type.cpp +++ b/cpp/test/common/constexpr_type.cpp @@ -35,6 +35,11 @@ TEST_CASE("Test constexpr type", "[constexpr_type]") test(); test(); test(); + +// is C++ 20, but some compilers do not fully support, see +// https://en.cppreference.com/w/cpp/compiler_support/20#cpp_nontype_template_args_201911L +#if defined(__cpp_nontype_template_args) test(); test(); +#endif } \ No newline at end of file From 5c1d722bd145b42cd9abe028571da507c3ed6d5b Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Thu, 1 May 2025 02:13:52 +0200 Subject: [PATCH 18/45] Misses year code --- cpp/test/common/constexpr_type.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/test/common/constexpr_type.cpp b/cpp/test/common/constexpr_type.cpp index e88314ef730..b9a4fc66bcf 100644 --- a/cpp/test/common/constexpr_type.cpp +++ b/cpp/test/common/constexpr_type.cpp @@ -38,7 +38,8 @@ TEST_CASE("Test constexpr type", "[constexpr_type]") // is C++ 20, but some compilers do not fully support, see // https://en.cppreference.com/w/cpp/compiler_support/20#cpp_nontype_template_args_201911L -#if defined(__cpp_nontype_template_args) +#if defined(__cpp_nontype_template_args) \ + && __cpp_nontype_template_args >= 201911L test(); test(); #endif From 5f3d563409485f9870ed153ac6b34e371bc7c00e Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Mon, 2 Jun 2025 21:40:09 +0200 Subject: [PATCH 19/45] Even better --- cpp/dolfinx/common/constexpr_type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/dolfinx/common/constexpr_type.h b/cpp/dolfinx/common/constexpr_type.h index 4051ef54034..4ddf7ade352 100644 --- a/cpp/dolfinx/common/constexpr_type.h +++ b/cpp/dolfinx/common/constexpr_type.h @@ -36,7 +36,7 @@ constexpr bool is_runtime_v = std::is_same_v; /// @private Retrieve value of a compile time constant form a ConstexprType. template requires ConstexprType -constexpr T value(V /* container */, +consteval T value(V /* container */, typename std::enable_if_t>* = 0) { return V::value; From 6f6cb9bd56ebf05971c4199f918460deb596e8dd Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Mon, 2 Jun 2025 21:47:43 +0200 Subject: [PATCH 20/45] Missed one --- cpp/dolfinx/common/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 8bbcdf5a208..2ed7ed57924 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -53,7 +53,7 @@ using BS = std::integral_constant; /// @private Retrieves the integral block size of a compile time block size. template -constexpr int +consteval int block_size(V bs, typename std::enable_if_t>* = 0) { From c50ce564bf304dd30bd9231a544daadf94aadbcc Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Tue, 3 Jun 2025 18:47:28 +0200 Subject: [PATCH 21/45] Revert to constexpr --- cpp/dolfinx/common/constexpr_type.h | 2 +- cpp/dolfinx/common/types.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/dolfinx/common/constexpr_type.h b/cpp/dolfinx/common/constexpr_type.h index 4ddf7ade352..4051ef54034 100644 --- a/cpp/dolfinx/common/constexpr_type.h +++ b/cpp/dolfinx/common/constexpr_type.h @@ -36,7 +36,7 @@ constexpr bool is_runtime_v = std::is_same_v; /// @private Retrieve value of a compile time constant form a ConstexprType. template requires ConstexprType -consteval T value(V /* container */, +constexpr T value(V /* container */, typename std::enable_if_t>* = 0) { return V::value; diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 2ed7ed57924..8bbcdf5a208 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -53,7 +53,7 @@ using BS = std::integral_constant; /// @private Retrieves the integral block size of a compile time block size. template -consteval int +constexpr int block_size(V bs, typename std::enable_if_t>* = 0) { From 6900e883a31506242729799ae0009a8a2e64a7e3 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Mon, 21 Jul 2025 20:10:10 +0200 Subject: [PATCH 22/45] Try trait for constexpr type deduction --- cpp/dolfinx/common/constexpr_type.h | 11 ++++++++--- cpp/dolfinx/common/types.h | 3 +++ cpp/dolfinx/fem/pack.h | 2 +- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/cpp/dolfinx/common/constexpr_type.h b/cpp/dolfinx/common/constexpr_type.h index 4051ef54034..f4c5c53d1e2 100644 --- a/cpp/dolfinx/common/constexpr_type.h +++ b/cpp/dolfinx/common/constexpr_type.h @@ -33,11 +33,15 @@ template requires ConstexprType constexpr bool is_runtime_v = std::is_same_v; +template +using value_type + = std::conditional_t, typename V::value_type, V>; + /// @private Retrieve value of a compile time constant form a ConstexprType. template requires ConstexprType -constexpr T value(V /* container */, - typename std::enable_if_t>* = 0) +value_type value(V /* container */, + typename std::enable_if_t>* = 0) { return V::value; } @@ -45,7 +49,8 @@ constexpr T value(V /* container */, /// @private Retrieve value of runtime variable form a ConstexprType. template requires ConstexprType -T value(V container, typename std::enable_if_t>* = 0) +value_type value(V container, + typename std::enable_if_t>* = 0) { return container; } diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 8bbcdf5a208..fa635419433 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -51,6 +51,9 @@ concept BlockSize = common::ConstexprType; template using BS = std::integral_constant; +template +using BS_type = common::value_type; + /// @private Retrieves the integral block size of a compile time block size. template constexpr int diff --git a/cpp/dolfinx/fem/pack.h b/cpp/dolfinx/fem/pack.h index 63e82ba0e07..98e423d97b8 100644 --- a/cpp/dolfinx/fem/pack.h +++ b/cpp/dolfinx/fem/pack.h @@ -60,7 +60,7 @@ void pack_impl(std::span coeffs, std::int32_t cell, BlockSize auto _bs, std::span dofs = dofmap.cell_dofs(cell); for (std::size_t i = 0; i < dofs.size(); ++i) { - int bs = block_size(_bs); + BS_type bs = block_size(_bs); const int pos_c = bs * i; const int pos_v = bs * dofs[i]; for (int k = 0; k < bs; ++k) From 4e3367fc31480f2b3fc06842602a48f4bb4dfc19 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Mon, 21 Jul 2025 21:33:32 +0200 Subject: [PATCH 23/45] Revert "Try trait for constexpr type deduction" This reverts commit 6900e883a31506242729799ae0009a8a2e64a7e3. --- cpp/dolfinx/common/constexpr_type.h | 11 +++-------- cpp/dolfinx/common/types.h | 3 --- cpp/dolfinx/fem/pack.h | 2 +- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/cpp/dolfinx/common/constexpr_type.h b/cpp/dolfinx/common/constexpr_type.h index f4c5c53d1e2..4051ef54034 100644 --- a/cpp/dolfinx/common/constexpr_type.h +++ b/cpp/dolfinx/common/constexpr_type.h @@ -33,15 +33,11 @@ template requires ConstexprType constexpr bool is_runtime_v = std::is_same_v; -template -using value_type - = std::conditional_t, typename V::value_type, V>; - /// @private Retrieve value of a compile time constant form a ConstexprType. template requires ConstexprType -value_type value(V /* container */, - typename std::enable_if_t>* = 0) +constexpr T value(V /* container */, + typename std::enable_if_t>* = 0) { return V::value; } @@ -49,8 +45,7 @@ value_type value(V /* container */, /// @private Retrieve value of runtime variable form a ConstexprType. template requires ConstexprType -value_type value(V container, - typename std::enable_if_t>* = 0) +T value(V container, typename std::enable_if_t>* = 0) { return container; } diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index fa635419433..8bbcdf5a208 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -51,9 +51,6 @@ concept BlockSize = common::ConstexprType; template using BS = std::integral_constant; -template -using BS_type = common::value_type; - /// @private Retrieves the integral block size of a compile time block size. template constexpr int diff --git a/cpp/dolfinx/fem/pack.h b/cpp/dolfinx/fem/pack.h index 98e423d97b8..63e82ba0e07 100644 --- a/cpp/dolfinx/fem/pack.h +++ b/cpp/dolfinx/fem/pack.h @@ -60,7 +60,7 @@ void pack_impl(std::span coeffs, std::int32_t cell, BlockSize auto _bs, std::span dofs = dofmap.cell_dofs(cell); for (std::size_t i = 0; i < dofs.size(); ++i) { - BS_type bs = block_size(_bs); + int bs = block_size(_bs); const int pos_c = bs * i; const int pos_v = bs * dofs[i]; for (int k = 0; k < bs; ++k) From cb756fa6b3bc0a51b972ef5f630ec4361f4e19d5 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Mon, 21 Jul 2025 21:38:27 +0200 Subject: [PATCH 24/45] Don't reimplement auto --- cpp/dolfinx/fem/pack.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/dolfinx/fem/pack.h b/cpp/dolfinx/fem/pack.h index 63e82ba0e07..496865c17d3 100644 --- a/cpp/dolfinx/fem/pack.h +++ b/cpp/dolfinx/fem/pack.h @@ -60,7 +60,7 @@ void pack_impl(std::span coeffs, std::int32_t cell, BlockSize auto _bs, std::span dofs = dofmap.cell_dofs(cell); for (std::size_t i = 0; i < dofs.size(); ++i) { - int bs = block_size(_bs); + auto bs = block_size(_bs); const int pos_c = bs * i; const int pos_v = bs * dofs[i]; for (int k = 0; k < bs; ++k) From 60d0032dc0d9e2e37b9a449a690a460088cc3a28 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Mon, 21 Jul 2025 22:09:45 +0200 Subject: [PATCH 25/45] Switch all occurences to auto --- cpp/dolfinx/fem/FiniteElement.h | 2 +- cpp/dolfinx/fem/assemble_vector_impl.h | 10 +++++----- cpp/dolfinx/la/matrix_csr_impl.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/dolfinx/fem/FiniteElement.h b/cpp/dolfinx/fem/FiniteElement.h index 34e31b389d9..94ab9840a2f 100644 --- a/cpp/dolfinx/fem/FiniteElement.h +++ b/cpp/dolfinx/fem/FiniteElement.h @@ -588,7 +588,7 @@ class FiniteElement std::span cell_info, std::int32_t cell, int data_block_size) { - const int ebs = block_size(); + auto ebs = block_size(); const std::size_t dof_count = data.size() / data_block_size; for (int block = 0; block < data_block_size; ++block) { diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 1da29448f67..52daf738e5b 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -100,8 +100,8 @@ void _lift_bc_cells( const auto [dmap0, _bs0, cells0] = dofmap0; const auto [dmap1, _bs1, cells1] = dofmap1; - int bs0 = block_size(_bs0); - int bs1 = block_size(_bs1); + auto bs0 = block_size(_bs0); + auto bs1 = block_size(_bs1); // Data structures used in bc application std::vector> cdofs(3 * x_dofmap.extent(1)); @@ -638,7 +638,7 @@ void assemble_cells( return; const auto [dmap, _bs, cells0] = dofmap; - int bs = block_size(_bs); + auto bs = block_size(_bs); // Create data structures used in assembly std::vector> cdofs(3 * x_dofmap.extent(1)); @@ -717,7 +717,7 @@ void assemble_exterior_facets( return; const auto [dmap, _bs, facets0] = dofmap; - int bs = block_size(_bs); + auto bs = block_size(_bs); // Create data structures used in assembly const int num_dofs = dmap.extent(1); @@ -805,7 +805,7 @@ void assemble_interior_facets( return; const auto [dmap, _bs, facets0] = dofmap; - int bs = block_size(_bs); + auto bs = block_size(_bs); // Create data structures used in assembly using X = scalar_value_t; diff --git a/cpp/dolfinx/la/matrix_csr_impl.h b/cpp/dolfinx/la/matrix_csr_impl.h index 3a1d625b611..3d56e7b7e62 100644 --- a/cpp/dolfinx/la/matrix_csr_impl.h +++ b/cpp/dolfinx/la/matrix_csr_impl.h @@ -229,7 +229,7 @@ void spmv(std::span values, std::span row_begin, std::span indices, std::span x, std::span y, int bs0, BS1 _bs1) { - int bs1 = block_size(_bs1); + auto bs1 = block_size(_bs1); assert(row_begin.size() == row_end.size()); for (int k0 = 0; k0 < bs0; ++k0) { From 617042c26120100d8c2ac9c74083401b6f30b58c Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Mon, 21 Jul 2025 22:50:36 +0200 Subject: [PATCH 26/45] Fix merge mess up --- cpp/dolfinx/fem/assemble_vector_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 52daf738e5b..8edc662c0f8 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -870,7 +870,7 @@ void assemble_interior_facets( b[bs * dmap0[i] + k] += be[bs * i + k]; for (std::size_t i = 0; i < dmap1.size(); ++i) for (int k = 0; k < bs; ++k) - b[bs * dmap1[i] + k] += be[bs * (i + dmap0.size()) + k]; + b[bs * dmap1[i] + k] += be[bs * (i + dmap_size) + k]; } } From 40efcef3608159bafee520a67142b193985bb63c Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Thu, 24 Jul 2025 16:18:46 +0200 Subject: [PATCH 27/45] Do not provide tempalte type twice --- cpp/dolfinx/fem/assemble_vector_impl.h | 24 ++++++++++++------------ cpp/dolfinx/la/MatrixCSR.h | 16 ++++++++-------- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 8edc662c0f8..631f6f87ee6 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -955,24 +955,24 @@ void lift_bc(std::span b, const Form& a, mdspan2_t x_dofmap, auto coeffs = md::mdspan(_coeffs.data(), cells.size(), cstride); if (bs0 == 1 and bs1 == 1) { - _lift_bc_cells, BS<1>>( + _lift_bc_cells( b, x_dofmap, x, kernel, cells, {dofmap0, BS<1>(), cells0}, P0, {dofmap1, BS<1>(), cells1}, P1T, constants, coeffs, cell_info0, cell_info1, bc_values1, bc_markers1, x0, alpha); } else if (bs0 == 3 and bs1 == 3) { - _lift_bc_cells, BS<3>>( + _lift_bc_cells( b, x_dofmap, x, kernel, cells, {dofmap0, BS<3>(), cells0}, P0, {dofmap1, BS<3>(), cells1}, P1T, constants, coeffs, cell_info0, cell_info1, bc_values1, bc_markers1, x0, alpha); } else { - _lift_bc_cells( - b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, P0, - {dofmap1, bs1, cells1}, P1T, constants, coeffs, cell_info0, - cell_info1, bc_values1, bc_markers1, x0, alpha); + _lift_bc_cells(b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, + P0, {dofmap1, bs1, cells1}, P1T, constants, coeffs, + cell_info0, cell_info1, bc_values1, bc_markers1, x0, + alpha); } } @@ -1238,21 +1238,21 @@ void assemble_vector( if (bs == 1) { - impl::assemble_exterior_facets>( + impl::assemble_exterior_facets( P0, b, x_dofmap, x, facets, {dofs, BS<1>(), facets1}, fn, constants, md::mdspan(coeffs.data(), facets.extent(0), cstride), cell_info0, perms); } else if (bs == 3) { - impl::assemble_exterior_facets>( + impl::assemble_exterior_facets( P0, b, x_dofmap, x, facets, {dofs, BS<3>(), facets1}, fn, constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); } else { - impl::assemble_exterior_facets( + impl::assemble_exterior_facets( P0, b, x_dofmap, x, facets, {dofs, bs, facets1}, fn, constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); @@ -1277,7 +1277,7 @@ void assemble_vector( assert((facets.size() / 4) * 2 * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_interior_facets>( + impl::assemble_interior_facets( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {*dofmap, BS<1>(), @@ -1288,7 +1288,7 @@ void assemble_vector( } else if (bs == 3) { - impl::assemble_interior_facets>( + impl::assemble_interior_facets( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {*dofmap, BS<3>(), @@ -1299,7 +1299,7 @@ void assemble_vector( } else { - impl::assemble_interior_facets( + impl::assemble_interior_facets( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {*dofmap, bs, diff --git a/cpp/dolfinx/la/MatrixCSR.h b/cpp/dolfinx/la/MatrixCSR.h index 9c273e4f36d..fe214ffc807 100644 --- a/cpp/dolfinx/la/MatrixCSR.h +++ b/cpp/dolfinx/la/MatrixCSR.h @@ -792,13 +792,13 @@ void MatrixCSR::mult(la::Vector& x, // yi[0] += Ai[0] * xi[0] if (_bs[1] == 1) { - impl::spmv>(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, - _y, _bs[0], BS<1>()); + impl::spmv(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y, + _bs[0], BS<1>()); } else { - impl::spmv(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, - _y, _bs[0], _bs[1]); + impl::spmv(Avalues, Arow_begin, Aoff_diag_offset, Acols, _x, _y, + _bs[0], _bs[1]); } // finalize ghost update @@ -808,13 +808,13 @@ void MatrixCSR::mult(la::Vector& x, // yi[0] += Ai[1] * xi[1] if (_bs[1] == 1) { - impl::spmv>(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, - _y, _bs[0], BS<1>()); + impl::spmv(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y, + _bs[0], BS<1>()); } else { - impl::spmv(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y, - _bs[0], _bs[1]); + impl::spmv(Avalues, Aoff_diag_offset, Arow_end, Acols, _x, _y, + _bs[0], _bs[1]); } } From b58721790e375132ba411ba7c7e0bb62a4dc1147 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Thu, 24 Jul 2025 16:20:56 +0200 Subject: [PATCH 28/45] Missed one --- cpp/demo/custom_kernel/main.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/demo/custom_kernel/main.cpp b/cpp/demo/custom_kernel/main.cpp index caf35e2ba44..14300740213 100644 --- a/cpp/demo/custom_kernel/main.cpp +++ b/cpp/demo/custom_kernel/main.cpp @@ -168,7 +168,7 @@ double assemble_vector1(const mesh::Geometry& g, const fem::DofMap& dofmap, md::mdspan> x( g.x().data(), g.x().size() / 3, 3); common::Timer timer("Assembler1 lambda (vector)"); - fem::impl::assemble_cells>( + fem::impl::assemble_cells( [](auto, auto, auto, auto) {}, b.mutable_array(), g.dofmap(), x, cells, {dofmap.map(), BS<1>(), cells}, kernel, {}, {}, {}); b.scatter_rev(std::plus()); From c11fb975a6ea606db3fff087ce1c74311e3db09c Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Thu, 24 Jul 2025 16:29:25 +0200 Subject: [PATCH 29/45] Good default --- cpp/dolfinx/fem/assemble_vector_impl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 631f6f87ee6..f17548d34fa 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -78,7 +78,7 @@ using mdspan2_t = md::mdspan>; /// conditions applied. /// @param[in] x0 Vector used in the lifting. /// @param[in] alpha Scaling to apply. -template +template void _lift_bc_cells( std::span b, mdspan2_t x_dofmap, md::mdspan, From 57c2ccb5146617d5239b61a7636869e833b2effc Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Thu, 24 Jul 2025 16:33:37 +0200 Subject: [PATCH 30/45] Modernize: enable_if_t -> concept --- cpp/dolfinx/common/constexpr_type.h | 9 ++++----- cpp/dolfinx/common/types.h | 9 ++++----- 2 files changed, 8 insertions(+), 10 deletions(-) diff --git a/cpp/dolfinx/common/constexpr_type.h b/cpp/dolfinx/common/constexpr_type.h index 4051ef54034..141355c3ad7 100644 --- a/cpp/dolfinx/common/constexpr_type.h +++ b/cpp/dolfinx/common/constexpr_type.h @@ -35,17 +35,16 @@ constexpr bool is_runtime_v = std::is_same_v; /// @private Retrieve value of a compile time constant form a ConstexprType. template - requires ConstexprType -constexpr T value(V /* container */, - typename std::enable_if_t>* = 0) + requires ConstexprType && is_compile_time_v +constexpr T value(V /* container */) { return V::value; } /// @private Retrieve value of runtime variable form a ConstexprType. template - requires ConstexprType -T value(V container, typename std::enable_if_t>* = 0) + requires ConstexprType && is_runtime_v +T value(V container) { return container; } diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 8bbcdf5a208..5a79d928980 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -53,17 +53,16 @@ using BS = std::integral_constant; /// @private Retrieves the integral block size of a compile time block size. template -constexpr int -block_size(V bs, - typename std::enable_if_t>* = 0) + requires common::is_compile_time_v +constexpr int block_size(V bs) { return common::value(bs); } /// @private Retrieves the integral block size of a runtime block size. template -int block_size(V bs, - typename std::enable_if_t>* = 0) + requires common::is_runtime_v +int block_size(V bs) { return common::value(bs); } From b2d436af70b7c4b27076edd0e3ce6ddf6396c807 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Thu, 24 Jul 2025 16:41:17 +0200 Subject: [PATCH 31/45] More type deduction --- cpp/demo/custom_kernel/main.cpp | 2 +- cpp/dolfinx/fem/assemble_vector_impl.h | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/cpp/demo/custom_kernel/main.cpp b/cpp/demo/custom_kernel/main.cpp index 14300740213..73920de948f 100644 --- a/cpp/demo/custom_kernel/main.cpp +++ b/cpp/demo/custom_kernel/main.cpp @@ -170,7 +170,7 @@ double assemble_vector1(const mesh::Geometry& g, const fem::DofMap& dofmap, common::Timer timer("Assembler1 lambda (vector)"); fem::impl::assemble_cells( [](auto, auto, auto, auto) {}, b.mutable_array(), g.dofmap(), x, cells, - {dofmap.map(), BS<1>(), cells}, kernel, {}, {}, {}); + std::make_tuple(dofmap.map(), BS<1>(), cells), kernel, {}, {}, {}); b.scatter_rev(std::plus()); return la::squared_norm(b); } diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index f17548d34fa..b9fb8c91c0b 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -1239,21 +1239,21 @@ void assemble_vector( { impl::assemble_exterior_facets( - P0, b, x_dofmap, x, facets, {dofs, BS<1>(), facets1}, fn, constants, + P0, b, x_dofmap, x, facets, std::make_tuple(dofs, BS<1>(), facets1), fn, constants, md::mdspan(coeffs.data(), facets.extent(0), cstride), cell_info0, perms); } else if (bs == 3) { impl::assemble_exterior_facets( - P0, b, x_dofmap, x, facets, {dofs, BS<3>(), facets1}, fn, constants, + P0, b, x_dofmap, x, facets, std::make_tuple(dofs, BS<3>(), facets1), fn, constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); } else { impl::assemble_exterior_facets( - P0, b, x_dofmap, x, facets, {dofs, bs, facets1}, fn, constants, + P0, b, x_dofmap, x, facets, std::make_tuple(dofs, bs, facets1), fn, constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); } @@ -1280,8 +1280,8 @@ void assemble_vector( impl::assemble_interior_facets( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - {*dofmap, BS<1>(), - mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, + std::make_tuple(*dofmap, BS<1>(), + mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)), fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), cell_info0, perms); @@ -1291,8 +1291,8 @@ void assemble_vector( impl::assemble_interior_facets( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - {*dofmap, BS<3>(), - mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, + std::make_tuple(*dofmap, BS<3>(), + mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)), fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), cell_info0, perms); @@ -1302,8 +1302,8 @@ void assemble_vector( impl::assemble_interior_facets( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - {*dofmap, bs, - mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, + std::make_tuple(*dofmap, bs, + mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)), fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), cell_info0, perms); From 5114f16b80a6077722411c7a2b3288cd750d0144 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Thu, 24 Jul 2025 16:42:08 +0200 Subject: [PATCH 32/45] format --- cpp/dolfinx/fem/assemble_vector_impl.h | 30 +++++++++++++++----------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index b9fb8c91c0b..9156c90a0ee 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -1239,23 +1239,24 @@ void assemble_vector( { impl::assemble_exterior_facets( - P0, b, x_dofmap, x, facets, std::make_tuple(dofs, BS<1>(), facets1), fn, constants, - md::mdspan(coeffs.data(), facets.extent(0), cstride), cell_info0, - perms); + P0, b, x_dofmap, x, facets, std::make_tuple(dofs, BS<1>(), facets1), + fn, constants, md::mdspan(coeffs.data(), facets.extent(0), cstride), + cell_info0, perms); } else if (bs == 3) { impl::assemble_exterior_facets( - P0, b, x_dofmap, x, facets, std::make_tuple(dofs, BS<3>(), facets1), fn, constants, + P0, b, x_dofmap, x, facets, std::make_tuple(dofs, BS<3>(), facets1), + fn, constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); } else { impl::assemble_exterior_facets( - P0, b, x_dofmap, x, facets, std::make_tuple(dofs, bs, facets1), fn, constants, - md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, - perms); + P0, b, x_dofmap, x, facets, std::make_tuple(dofs, bs, facets1), fn, + constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), + cell_info0, perms); } } @@ -1280,8 +1281,9 @@ void assemble_vector( impl::assemble_interior_facets( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - std::make_tuple(*dofmap, BS<1>(), - mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)), + std::make_tuple( + *dofmap, BS<1>(), + mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)), fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), cell_info0, perms); @@ -1291,8 +1293,9 @@ void assemble_vector( impl::assemble_interior_facets( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - std::make_tuple(*dofmap, BS<3>(), - mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)), + std::make_tuple( + *dofmap, BS<3>(), + mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)), fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), cell_info0, perms); @@ -1302,8 +1305,9 @@ void assemble_vector( impl::assemble_interior_facets( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - std::make_tuple(*dofmap, bs, - mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)), + std::make_tuple( + *dofmap, bs, + mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)), fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), cell_info0, perms); From c100b287c2172a84d4c4a0c4e964e8bc97a80c2c Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Thu, 24 Jul 2025 17:21:07 +0200 Subject: [PATCH 33/45] Initializer lsits are a typing mess... --- cpp/demo/custom_kernel/main.cpp | 4 ++-- cpp/dolfinx/fem/assemble_vector_impl.h | 31 ++++++++++++-------------- 2 files changed, 16 insertions(+), 19 deletions(-) diff --git a/cpp/demo/custom_kernel/main.cpp b/cpp/demo/custom_kernel/main.cpp index 73920de948f..caf35e2ba44 100644 --- a/cpp/demo/custom_kernel/main.cpp +++ b/cpp/demo/custom_kernel/main.cpp @@ -168,9 +168,9 @@ double assemble_vector1(const mesh::Geometry& g, const fem::DofMap& dofmap, md::mdspan> x( g.x().data(), g.x().size() / 3, 3); common::Timer timer("Assembler1 lambda (vector)"); - fem::impl::assemble_cells( + fem::impl::assemble_cells>( [](auto, auto, auto, auto) {}, b.mutable_array(), g.dofmap(), x, cells, - std::make_tuple(dofmap.map(), BS<1>(), cells), kernel, {}, {}, {}); + {dofmap.map(), BS<1>(), cells}, kernel, {}, {}, {}); b.scatter_rev(std::plus()); return la::squared_norm(b); } diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 9156c90a0ee..c33a314e2d7 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -1163,11 +1164,11 @@ void assemble_vector( assert(L.function_spaces().at(0)); auto element = L.function_spaces().at(0)->elements(cell_type_idx); assert(element); - std::shared_ptr dofmap - = L.function_spaces().at(0)->dofmaps(cell_type_idx); - assert(dofmap); - auto dofs = dofmap->map(); - const int bs = dofmap->bs(); + assert(L.function_spaces().at(0)->dofmaps(cell_type_idx)); + const fem::DofMap& dofmap + = *L.function_spaces().at(0)->dofmaps(cell_type_idx); + auto dofs = dofmap.map(); + const int bs = dofmap.bs(); fem::DofTransformKernel auto P0 = element->template dof_transformation_fn(doftransform::standard); @@ -1278,36 +1279,32 @@ void assemble_vector( assert((facets.size() / 4) * 2 * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_interior_facets( + impl::assemble_interior_facets>( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - std::make_tuple( - *dofmap, BS<1>(), - mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)), + {dofmap, BS<1>(), + mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), cell_info0, perms); } else if (bs == 3) { - impl::assemble_interior_facets( + impl::assemble_interior_facets>( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - std::make_tuple( - *dofmap, BS<3>(), - mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)), + {dofmap, BS<3>(), + mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), cell_info0, perms); } else { - impl::assemble_interior_facets( + impl::assemble_interior_facets( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), - std::make_tuple( - *dofmap, bs, - mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)), + {dofmap, bs, mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, fn, constants, mdspanx2x_t(coeffs.data(), facets.size() / 4, 2, cstride), cell_info0, perms); From 400938b30cafbef936c0c1aed7c7e043951838e1 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Mon, 1 Sep 2025 20:52:57 +0200 Subject: [PATCH 34/45] format --- cpp/demo/custom_kernel/main.cpp | 6 +++--- cpp/dolfinx/fem/assemble_vector_impl.h | 16 ++++++++-------- 2 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cpp/demo/custom_kernel/main.cpp b/cpp/demo/custom_kernel/main.cpp index fc6d2e77d1a..eca06a25bc4 100644 --- a/cpp/demo/custom_kernel/main.cpp +++ b/cpp/demo/custom_kernel/main.cpp @@ -168,9 +168,9 @@ double assemble_vector1(const mesh::Geometry& g, const fem::DofMap& dofmap, md::mdspan> x( g.x().data(), g.x().size() / 3, 3); common::Timer timer("Assembler1 lambda (vector)"); - fem::impl::assemble_cells>([](auto, auto, auto, auto) {}, b.array(), - g.dofmap(), x, cells, {dofmap.map(), 1, cells}, - kernel, {}, {}, {}); + fem::impl::assemble_cells>( + [](auto, auto, auto, auto) {}, b.array(), g.dofmap(), x, cells, + {dofmap.map(), 1, cells}, kernel, {}, {}, {}); b.scatter_rev(std::plus()); return la::squared_norm(b); } diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 55045cb8b3e..2ef499432ea 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -1044,17 +1044,17 @@ void lift_bc(V&& b, const Form& a, mdspan2_t x_dofmap, auto coeffs = md::mdspan(_coeffs.data(), cells.size(), cstride); if (bs0 == 1 and bs1 == 1) { - _lift_bc_cells(b, x_dofmap, x, kernel, cells, - {dofmap0, BS<1>(), cells0}, P0, {dofmap1, BS<1>(), cells1}, - P1T, constants, coeffs, cell_info0, cell_info1, - bc_values1, bc_markers1, x0, alpha); + _lift_bc_cells(b, x_dofmap, x, kernel, cells, {dofmap0, BS<1>(), cells0}, + P0, {dofmap1, BS<1>(), cells1}, P1T, constants, coeffs, + cell_info0, cell_info1, bc_values1, bc_markers1, x0, + alpha); } else if (bs0 == 3 and bs1 == 3) { - _lift_bc_cells(b, x_dofmap, x, kernel, cells, - {dofmap0, BS<3>(), cells0}, P0, {dofmap1, BS<3>(), cells1}, - P1T, constants, coeffs, cell_info0, cell_info1, - bc_values1, bc_markers1, x0, alpha); + _lift_bc_cells(b, x_dofmap, x, kernel, cells, {dofmap0, BS<3>(), cells0}, + P0, {dofmap1, BS<3>(), cells1}, P1T, constants, coeffs, + cell_info0, cell_info1, bc_values1, bc_markers1, x0, + alpha); } else { From daf57c64c74fd7f69aa048e3f0855f7f7491f6b7 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Mon, 1 Sep 2025 21:05:47 +0200 Subject: [PATCH 35/45] Start fixing --- cpp/dolfinx/fem/assemble_vector_impl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 2ef499432ea..0e287c2a107 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -1295,7 +1295,7 @@ void assemble_vector( } else { - impl::assemble_cells( + impl::assemble_cells( P0, b, x_dofmap, x, cells, {dofs, bs, cells0}, fn, constants, md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } @@ -1370,7 +1370,7 @@ void assemble_vector( assert((facets.size() / 4) * 2 * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_interior_facets>( + impl::assemble_interior_facets( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {dofmap, BS<1>(), @@ -1381,7 +1381,7 @@ void assemble_vector( } else if (bs == 3) { - impl::assemble_interior_facets>( + impl::assemble_interior_facets( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {dofmap, BS<3>(), @@ -1392,7 +1392,7 @@ void assemble_vector( } else { - impl::assemble_interior_facets( + impl::assemble_interior_facets( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {dofmap, bs, mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, From 783831400f48d10cb1f9aacc5f148c56cce8dcd9 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Mon, 1 Sep 2025 21:29:02 +0200 Subject: [PATCH 36/45] All up to vector --- cpp/dolfinx/fem/assemble_vector_impl.h | 11 ++++----- python/dolfinx/wrappers/assemble.cpp | 34 +++++++++++++------------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 0e287c2a107..0b3ce307373 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -1058,10 +1058,9 @@ void lift_bc(V&& b, const Form& a, mdspan2_t x_dofmap, } else { - _lift_bc_cells(b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, - P0, {dofmap1, bs1, cells1}, P1T, constants, coeffs, - cell_info0, cell_info1, bc_values1, bc_markers1, x0, - alpha); + _lift_bc_cells(b, x_dofmap, x, kernel, cells, {dofmap0, bs0, cells0}, P0, + {dofmap1, bs1, cells1}, P1T, constants, coeffs, cell_info0, + cell_info1, bc_values1, bc_markers1, x0, alpha); } } @@ -1331,14 +1330,14 @@ void assemble_vector( assert((facets.size() / 2) * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_exterior_facets( + impl::assemble_exterior_facets>( P0, b, x_dofmap, x, facets, {dofs, BS<1>(), facets1}, fn, constants, md::mdspan(coeffs.data(), facets.extent(0), cstride), cell_info0, perms); } else if (bs == 3) { - impl::assemble_exterior_facets( + impl::assemble_exterior_facets>( P0, b, x_dofmap, x, facets, {dofs, BS<3>(), facets1}, fn, constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); diff --git a/python/dolfinx/wrappers/assemble.cpp b/python/dolfinx/wrappers/assemble.cpp index d4ae84e000a..b1bc4791691 100644 --- a/python/dolfinx/wrappers/assemble.cpp +++ b/python/dolfinx/wrappers/assemble.cpp @@ -326,23 +326,23 @@ void declare_assembly_functions(nb::module_& m) "Assemble functional over mesh with provided constants and " "coefficients"); // Vector - m.def( - "assemble_vector", - [](nb::ndarray, nb::c_contig> b, - const dolfinx::fem::Form& L, - nb::ndarray, nb::c_contig> constants, - const std::map, - nb::ndarray, nb::c_contig>>& - coefficients) - { - dolfinx::fem::assemble_vector( - std::span(b.data(), b.size()), L, - std::span(constants.data(), constants.size()), - dolfinx_wrappers::py_to_cpp_coeffs(coefficients)); - }, - nb::arg("b"), nb::arg("L"), nb::arg("constants"), nb::arg("coeffs"), - "Assemble linear form into an existing vector with pre-packed constants " - "and coefficients"); + // m.def( + // "assemble_vector", + // [](nb::ndarray, nb::c_contig> b, + // const dolfinx::fem::Form& L, + // nb::ndarray, nb::c_contig> constants, + // const std::map, + // nb::ndarray, nb::c_contig>>& + // coefficients) + // { + // dolfinx::fem::assemble_vector( + // std::span(b.data(), b.size()), L, + // std::span(constants.data(), constants.size()), + // dolfinx_wrappers::py_to_cpp_coeffs(coefficients)); + // }, + // nb::arg("b"), nb::arg("L"), nb::arg("constants"), nb::arg("coeffs"), + // "Assemble linear form into an existing vector with pre-packed constants " + // "and coefficients"); // MatrixCSR m.def( "assemble_matrix", From a8d2302b010661119ff8db20ab80af8db229e946 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Mon, 1 Sep 2025 21:37:58 +0200 Subject: [PATCH 37/45] Fix --- cpp/dolfinx/fem/assemble_vector_impl.h | 8 +++--- python/dolfinx/wrappers/assemble.cpp | 34 +++++++++++++------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 0b3ce307373..a4798e33da6 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -1344,7 +1344,7 @@ void assemble_vector( } else { - impl::assemble_exterior_facets( + impl::assemble_exterior_facets( P0, b, x_dofmap, x, facets, std::make_tuple(dofs, bs, facets1), fn, constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); @@ -1369,7 +1369,7 @@ void assemble_vector( assert((facets.size() / 4) * 2 * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_interior_facets( + impl::assemble_interior_facets>( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {dofmap, BS<1>(), @@ -1380,7 +1380,7 @@ void assemble_vector( } else if (bs == 3) { - impl::assemble_interior_facets( + impl::assemble_interior_facets>( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {dofmap, BS<3>(), @@ -1391,7 +1391,7 @@ void assemble_vector( } else { - impl::assemble_interior_facets( + impl::assemble_interior_facets( P0, b, x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {dofmap, bs, mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, diff --git a/python/dolfinx/wrappers/assemble.cpp b/python/dolfinx/wrappers/assemble.cpp index b1bc4791691..d4ae84e000a 100644 --- a/python/dolfinx/wrappers/assemble.cpp +++ b/python/dolfinx/wrappers/assemble.cpp @@ -326,23 +326,23 @@ void declare_assembly_functions(nb::module_& m) "Assemble functional over mesh with provided constants and " "coefficients"); // Vector - // m.def( - // "assemble_vector", - // [](nb::ndarray, nb::c_contig> b, - // const dolfinx::fem::Form& L, - // nb::ndarray, nb::c_contig> constants, - // const std::map, - // nb::ndarray, nb::c_contig>>& - // coefficients) - // { - // dolfinx::fem::assemble_vector( - // std::span(b.data(), b.size()), L, - // std::span(constants.data(), constants.size()), - // dolfinx_wrappers::py_to_cpp_coeffs(coefficients)); - // }, - // nb::arg("b"), nb::arg("L"), nb::arg("constants"), nb::arg("coeffs"), - // "Assemble linear form into an existing vector with pre-packed constants " - // "and coefficients"); + m.def( + "assemble_vector", + [](nb::ndarray, nb::c_contig> b, + const dolfinx::fem::Form& L, + nb::ndarray, nb::c_contig> constants, + const std::map, + nb::ndarray, nb::c_contig>>& + coefficients) + { + dolfinx::fem::assemble_vector( + std::span(b.data(), b.size()), L, + std::span(constants.data(), constants.size()), + dolfinx_wrappers::py_to_cpp_coeffs(coefficients)); + }, + nb::arg("b"), nb::arg("L"), nb::arg("constants"), nb::arg("coeffs"), + "Assemble linear form into an existing vector with pre-packed constants " + "and coefficients"); // MatrixCSR m.def( "assemble_matrix", From c333ba29706db19365ce73beffe2cd6592b19dd4 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Mon, 1 Sep 2025 21:47:41 +0200 Subject: [PATCH 38/45] Extend to vertex integrals --- cpp/dolfinx/fem/assemble_vector_impl.h | 79 +++++++++++++++++--------- 1 file changed, 52 insertions(+), 27 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index a4798e33da6..ce1f68bc432 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -898,7 +898,7 @@ void assemble_interior_facets( /// coefficient for cell `i`. /// @param[in] cell_info0 Cell permutation information for the test /// function mesh. -template +template void assemble_vertices( fem::DofTransformKernel auto P0, std::span b, mdspan2_t x_dofmap, md::mdspan, @@ -907,7 +907,7 @@ void assemble_vertices( md::mdspan> vertices, - std::tuple>> dofmap, @@ -918,8 +918,8 @@ void assemble_vertices( if (vertices.empty()) return; - const auto [dmap, bs, vertices0] = dofmap; - assert(_bs < 0 or _bs == bs); + const auto [dmap, _bs, vertices0] = dofmap; + auto bs = block_size(_bs); // Create data structures used in assembly std::vector> cdofs(3 * x_dofmap.extent(1)); @@ -946,18 +946,9 @@ void assemble_vertices( // Scatter vertex vector to 'global' vector array auto dofs = md::submdspan(dmap, c0, md::full_extent); - if constexpr (_bs > 0) - { - for (std::size_t i = 0; i < dofs.size(); ++i) - for (int k = 0; k < _bs; ++k) - b[_bs * dofs[i] + k] += be[_bs * i + k]; - } - else - { - for (std::size_t i = 0; i < dofs.size(); ++i) - for (int k = 0; k < bs; ++k) - b[bs * dofs[i] + k] += be[bs * i + k]; - } + for (std::size_t i = 0; i < dofs.size(); ++i) + for (int k = 0; k < bs; ++k) + b[_bs * dofs[i] + k] += be[_bs * i + k]; } } @@ -1414,17 +1405,51 @@ void assemble_vector( assert(vertices.size() * cstride == coeffs.size()); - impl::assemble_vertices( - P0, b, x_dofmap, x, - md::mdspan>( - vertices.data(), vertices.size() / 2, 2), - {dofs, bs, - md::mdspan>( - vertices0.data(), vertices0.size() / 2, 2)}, - fn, constants, - md::mdspan(coeffs.data(), vertices.size() / 2, cstride), cell_info0); + if (bs == 1) + { + impl::assemble_vertices>( + P0, b, x_dofmap, x, + md::mdspan>( + vertices.data(), vertices.size() / 2, 2), + {dofs, BS<1>(), + md::mdspan>( + vertices0.data(), vertices0.size() / 2, 2)}, + fn, constants, + md::mdspan(coeffs.data(), vertices.size() / 2, cstride), + cell_info0); + } + else if (bs == 3) + { + impl::assemble_vertices>( + P0, b, x_dofmap, x, + md::mdspan>( + vertices.data(), vertices.size() / 2, 2), + {dofs, BS<3>(), + md::mdspan>( + vertices0.data(), vertices0.size() / 2, 2)}, + fn, constants, + md::mdspan(coeffs.data(), vertices.size() / 2, cstride), + cell_info0); + } + else + { + impl::assemble_vertices( + P0, b, x_dofmap, x, + md::mdspan>( + vertices.data(), vertices.size() / 2, 2), + {dofs, bs, + md::mdspan>( + vertices0.data(), vertices0.size() / 2, 2)}, + fn, constants, + md::mdspan(coeffs.data(), vertices.size() / 2, cstride), + cell_info0); + } } } } From af84980c1b23d9b973e6c99ec8952615703f1242 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 3 Sep 2025 11:04:55 +0200 Subject: [PATCH 39/45] Fix demos --- cpp/demo/custom_kernel/main.cpp | 2 +- cpp/dolfinx/fem/assemble_vector_impl.h | 22 +++++++++++----------- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/cpp/demo/custom_kernel/main.cpp b/cpp/demo/custom_kernel/main.cpp index eca06a25bc4..49d29bb79d6 100644 --- a/cpp/demo/custom_kernel/main.cpp +++ b/cpp/demo/custom_kernel/main.cpp @@ -170,7 +170,7 @@ double assemble_vector1(const mesh::Geometry& g, const fem::DofMap& dofmap, common::Timer timer("Assembler1 lambda (vector)"); fem::impl::assemble_cells>( [](auto, auto, auto, auto) {}, b.array(), g.dofmap(), x, cells, - {dofmap.map(), 1, cells}, kernel, {}, {}, {}); + {dofmap.map(), BS<1>(), cells}, kernel, {}, {}, {}); b.scatter_rev(std::plus()); return la::squared_norm(b); } diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index ce1f68bc432..bc04108c223 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -1274,19 +1274,19 @@ void assemble_vector( if (bs == 1) { impl::assemble_cells( - P0, b, x_dofmap, x, cells, {dofs, BS<1>(), cells0}, fn, constants, + P0, std::span(b), x_dofmap, x, cells, {dofs, BS<1>(), cells0}, fn, constants, md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } else if (bs == 3) { impl::assemble_cells( - P0, b, x_dofmap, x, cells, {dofs, BS<3>(), cells0}, fn, constants, + P0, std::span(b), x_dofmap, x, cells, {dofs, BS<3>(), cells0}, fn, constants, md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } else { impl::assemble_cells( - P0, b, x_dofmap, x, cells, {dofs, bs, cells0}, fn, constants, + P0, std::span(b), x_dofmap, x, cells, {dofs, bs, cells0}, fn, constants, md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); } } @@ -1322,14 +1322,14 @@ void assemble_vector( if (bs == 1) { impl::assemble_exterior_facets>( - P0, b, x_dofmap, x, facets, {dofs, BS<1>(), facets1}, fn, constants, + P0, std::span(b), x_dofmap, x, facets, {dofs, BS<1>(), facets1}, fn, constants, md::mdspan(coeffs.data(), facets.extent(0), cstride), cell_info0, perms); } else if (bs == 3) { impl::assemble_exterior_facets>( - P0, b, x_dofmap, x, facets, {dofs, BS<3>(), facets1}, fn, constants, + P0, std::span(b), x_dofmap, x, facets, {dofs, BS<3>(), facets1}, fn, constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); } @@ -1361,7 +1361,7 @@ void assemble_vector( if (bs == 1) { impl::assemble_interior_facets>( - P0, b, x_dofmap, x, + P0, std::span(b), x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {dofmap, BS<1>(), mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, @@ -1372,7 +1372,7 @@ void assemble_vector( else if (bs == 3) { impl::assemble_interior_facets>( - P0, b, x_dofmap, x, + P0, std::span(b), x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {dofmap, BS<3>(), mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, @@ -1383,7 +1383,7 @@ void assemble_vector( else { impl::assemble_interior_facets( - P0, b, x_dofmap, x, + P0, std::span(b), x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {dofmap, bs, mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, fn, constants, @@ -1408,7 +1408,7 @@ void assemble_vector( if (bs == 1) { impl::assemble_vertices>( - P0, b, x_dofmap, x, + P0, std::span(b), x_dofmap, x, md::mdspan>( vertices.data(), vertices.size() / 2, 2), @@ -1423,7 +1423,7 @@ void assemble_vector( else if (bs == 3) { impl::assemble_vertices>( - P0, b, x_dofmap, x, + P0, std::span(b), x_dofmap, x, md::mdspan>( vertices.data(), vertices.size() / 2, 2), @@ -1438,7 +1438,7 @@ void assemble_vector( else { impl::assemble_vertices( - P0, b, x_dofmap, x, + P0, std::span(b), x_dofmap, x, md::mdspan>( vertices.data(), vertices.size() / 2, 2), From e61f0eb55f1241c24dea4d6bd482fd527eb7af94 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 3 Sep 2025 11:06:07 +0200 Subject: [PATCH 40/45] Format --- cpp/dolfinx/fem/assemble_vector_impl.h | 33 ++++++++++++++------------ 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index bc04108c223..926165a56f5 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -1273,21 +1273,24 @@ void assemble_vector( assert(cells.size() * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_cells( - P0, std::span(b), x_dofmap, x, cells, {dofs, BS<1>(), cells0}, fn, constants, - md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); + impl::assemble_cells(P0, std::span(b), x_dofmap, x, cells, + {dofs, BS<1>(), cells0}, fn, constants, + md::mdspan(coeffs.data(), cells.size(), cstride), + cell_info0); } else if (bs == 3) { - impl::assemble_cells( - P0, std::span(b), x_dofmap, x, cells, {dofs, BS<3>(), cells0}, fn, constants, - md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); + impl::assemble_cells(P0, std::span(b), x_dofmap, x, cells, + {dofs, BS<3>(), cells0}, fn, constants, + md::mdspan(coeffs.data(), cells.size(), cstride), + cell_info0); } else { - impl::assemble_cells( - P0, std::span(b), x_dofmap, x, cells, {dofs, bs, cells0}, fn, constants, - md::mdspan(coeffs.data(), cells.size(), cstride), cell_info0); + impl::assemble_cells(P0, std::span(b), x_dofmap, x, cells, + {dofs, bs, cells0}, fn, constants, + md::mdspan(coeffs.data(), cells.size(), cstride), + cell_info0); } } @@ -1322,16 +1325,16 @@ void assemble_vector( if (bs == 1) { impl::assemble_exterior_facets>( - P0, std::span(b), x_dofmap, x, facets, {dofs, BS<1>(), facets1}, fn, constants, - md::mdspan(coeffs.data(), facets.extent(0), cstride), cell_info0, - perms); + P0, std::span(b), x_dofmap, x, facets, {dofs, BS<1>(), facets1}, fn, + constants, md::mdspan(coeffs.data(), facets.extent(0), cstride), + cell_info0, perms); } else if (bs == 3) { impl::assemble_exterior_facets>( - P0, std::span(b), x_dofmap, x, facets, {dofs, BS<3>(), facets1}, fn, constants, - md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, - perms); + P0, std::span(b), x_dofmap, x, facets, {dofs, BS<3>(), facets1}, fn, + constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), + cell_info0, perms); } else { From b2d2aea58934eee25af5b65474c6143e90d69448 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 3 Sep 2025 11:12:34 +0200 Subject: [PATCH 41/45] Fix auto deduce --- cpp/demo/custom_kernel/main.cpp | 6 +++--- cpp/dolfinx/fem/assemble_vector_impl.h | 20 ++++++++++---------- 2 files changed, 13 insertions(+), 13 deletions(-) diff --git a/cpp/demo/custom_kernel/main.cpp b/cpp/demo/custom_kernel/main.cpp index 49d29bb79d6..80c9664b0d4 100644 --- a/cpp/demo/custom_kernel/main.cpp +++ b/cpp/demo/custom_kernel/main.cpp @@ -168,9 +168,9 @@ double assemble_vector1(const mesh::Geometry& g, const fem::DofMap& dofmap, md::mdspan> x( g.x().data(), g.x().size() / 3, 3); common::Timer timer("Assembler1 lambda (vector)"); - fem::impl::assemble_cells>( - [](auto, auto, auto, auto) {}, b.array(), g.dofmap(), x, cells, - {dofmap.map(), BS<1>(), cells}, kernel, {}, {}, {}); + fem::impl::assemble_cells([](auto, auto, auto, auto) {}, b.array(), + g.dofmap(), x, cells, + {dofmap.map(), BS<1>(), cells}, kernel, {}, {}, {}); b.scatter_rev(std::plus()); return la::squared_norm(b); } diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 926165a56f5..a053398865e 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -695,7 +695,7 @@ void assemble_cells( /// function mesh. /// @param[in] perms Facet permutation integer. Empty if facet /// permutations are not required. -template ::value_type> requires std::is_same_v::value_type, T> void assemble_exterior_facets( @@ -781,7 +781,7 @@ void assemble_exterior_facets( /// function mesh. /// @param[in] perms Facet permutation integer. Empty if facet /// permutations are not required. -template ::value_type> requires std::is_same_v::value_type, T> void assemble_interior_facets( @@ -1324,14 +1324,14 @@ void assemble_vector( assert((facets.size() / 2) * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_exterior_facets>( + impl::assemble_exterior_facets( P0, std::span(b), x_dofmap, x, facets, {dofs, BS<1>(), facets1}, fn, constants, md::mdspan(coeffs.data(), facets.extent(0), cstride), cell_info0, perms); } else if (bs == 3) { - impl::assemble_exterior_facets>( + impl::assemble_exterior_facets( P0, std::span(b), x_dofmap, x, facets, {dofs, BS<3>(), facets1}, fn, constants, md::mdspan(coeffs.data(), facets.size() / 2, cstride), cell_info0, perms); @@ -1363,7 +1363,7 @@ void assemble_vector( assert((facets.size() / 4) * 2 * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_interior_facets>( + impl::assemble_interior_facets( P0, std::span(b), x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {dofmap, BS<1>(), @@ -1374,7 +1374,7 @@ void assemble_vector( } else if (bs == 3) { - impl::assemble_interior_facets>( + impl::assemble_interior_facets( P0, std::span(b), x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {dofmap, BS<3>(), @@ -1385,7 +1385,7 @@ void assemble_vector( } else { - impl::assemble_interior_facets( + impl::assemble_interior_facets( P0, std::span(b), x_dofmap, x, mdspanx22_t(facets.data(), facets.size() / 4, 2, 2), {dofmap, bs, mdspanx22_t(facets1.data(), facets1.size() / 4, 2, 2)}, @@ -1410,7 +1410,7 @@ void assemble_vector( if (bs == 1) { - impl::assemble_vertices>( + impl::assemble_vertices( P0, std::span(b), x_dofmap, x, md::mdspan>( @@ -1425,7 +1425,7 @@ void assemble_vector( } else if (bs == 3) { - impl::assemble_vertices>( + impl::assemble_vertices( P0, std::span(b), x_dofmap, x, md::mdspan>( @@ -1440,7 +1440,7 @@ void assemble_vector( } else { - impl::assemble_vertices( + impl::assemble_vertices( P0, std::span(b), x_dofmap, x, md::mdspan>( From df100ede82dda261c2321d08318335888555c723 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 3 Sep 2025 11:15:42 +0200 Subject: [PATCH 42/45] Make value access of compile time value consteval --- cpp/dolfinx/common/constexpr_type.h | 2 +- cpp/dolfinx/common/types.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/dolfinx/common/constexpr_type.h b/cpp/dolfinx/common/constexpr_type.h index 141355c3ad7..3ad41444da4 100644 --- a/cpp/dolfinx/common/constexpr_type.h +++ b/cpp/dolfinx/common/constexpr_type.h @@ -36,7 +36,7 @@ constexpr bool is_runtime_v = std::is_same_v; /// @private Retrieve value of a compile time constant form a ConstexprType. template requires ConstexprType && is_compile_time_v -constexpr T value(V /* container */) +consteval T value(V /* container */) { return V::value; } diff --git a/cpp/dolfinx/common/types.h b/cpp/dolfinx/common/types.h index 5a79d928980..941395d3c57 100644 --- a/cpp/dolfinx/common/types.h +++ b/cpp/dolfinx/common/types.h @@ -54,7 +54,7 @@ using BS = std::integral_constant; /// @private Retrieves the integral block size of a compile time block size. template requires common::is_compile_time_v -constexpr int block_size(V bs) +consteval int block_size(V bs) { return common::value(bs); } From adc20fc652964306d8621aea98f1ea2e501db62c Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Sat, 13 Sep 2025 21:17:15 +0200 Subject: [PATCH 43/45] Use template test case --- cpp/test/common/constexpr_type.cpp | 33 ++++++++++++------------------ 1 file changed, 13 insertions(+), 20 deletions(-) diff --git a/cpp/test/common/constexpr_type.cpp b/cpp/test/common/constexpr_type.cpp index b9a4fc66bcf..b205b1502d9 100644 --- a/cpp/test/common/constexpr_type.cpp +++ b/cpp/test/common/constexpr_type.cpp @@ -4,6 +4,7 @@ // // SPDX-License-Identifier: LGPL-3.0-or-later +#include #include #include @@ -13,11 +14,19 @@ using namespace dolfinx::common; -namespace -{ -template -void test() +TEMPLATE_TEST_CASE("Test constexpr type", "[constexpr_type]", std::int16_t, + std::int32_t, std::int64_t +// is C++ 20, but some compilers do not fully support, see +// https://en.cppreference.com/w/cpp/compiler_support/20#cpp_nontype_template_args_201911L +#if defined(__cpp_nontype_template_args) \ + && __cpp_nontype_template_args >= 201911L + , + float, double +#endif +) { + using T = TestType; + using V_runtime = T; static_assert(!is_compile_time_v); static_assert(is_runtime_v); @@ -28,19 +37,3 @@ void test() static_assert(!is_runtime_v); static_assert((value(V_compile_time()) == T(1))); } -} // namespace - -TEST_CASE("Test constexpr type", "[constexpr_type]") -{ - test(); - test(); - test(); - -// is C++ 20, but some compilers do not fully support, see -// https://en.cppreference.com/w/cpp/compiler_support/20#cpp_nontype_template_args_201911L -#if defined(__cpp_nontype_template_args) \ - && __cpp_nontype_template_args >= 201911L - test(); - test(); -#endif -} \ No newline at end of file From 6f674a6d28c9da62c3c47894536966934011338e Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Sat, 13 Sep 2025 21:30:01 +0200 Subject: [PATCH 44/45] macross... --- cpp/test/common/constexpr_type.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/test/common/constexpr_type.cpp b/cpp/test/common/constexpr_type.cpp index b205b1502d9..dc5a5402055 100644 --- a/cpp/test/common/constexpr_type.cpp +++ b/cpp/test/common/constexpr_type.cpp @@ -14,16 +14,16 @@ using namespace dolfinx::common; -TEMPLATE_TEST_CASE("Test constexpr type", "[constexpr_type]", std::int16_t, - std::int32_t, std::int64_t -// is C++ 20, but some compilers do not fully support, see +// float and double is C++ 20, but some compilers do not fully support, see // https://en.cppreference.com/w/cpp/compiler_support/20#cpp_nontype_template_args_201911L #if defined(__cpp_nontype_template_args) \ && __cpp_nontype_template_args >= 201911L - , - float, double +TEMPLATE_TEST_CASE("Test constexpr type", "[constexpr_type]", std::int16_t, + std::int32_t, std::int64_t, float, double) +#else +TEMPLATE_TEST_CASE("Test constexpr type", "[constexpr_type]", std::int16_t, + std::int32_t, std::int64_t) #endif -) { using T = TestType; From ed1ba3e08c632bedd24b16afdf800cce23497f92 Mon Sep 17 00:00:00 2001 From: schnellerhase <56360279+schnellerhase@users.noreply.github.com> Date: Wed, 22 Oct 2025 16:21:23 +0200 Subject: [PATCH 45/45] Adapt new assembler --- cpp/dolfinx/fem/assemble_vector_impl.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/dolfinx/fem/assemble_vector_impl.h b/cpp/dolfinx/fem/assemble_vector_impl.h index 17616d64efc..da70f16d632 100644 --- a/cpp/dolfinx/fem/assemble_vector_impl.h +++ b/cpp/dolfinx/fem/assemble_vector_impl.h @@ -1319,15 +1319,15 @@ void assemble_vector( assert((entities.size() / 2) * cstride == coeffs.size()); if (bs == 1) { - impl::assemble_entities<1>( - P0, b, x_dofmap, x, entities, {dofs, bs, entities1}, fn, + impl::assemble_entities( + P0, b, x_dofmap, x, entities, {dofs, BS<1>(), entities1}, fn, constants, md::mdspan(coeffs.data(), entities.extent(0), cstride), cell_info0, perms); } else if (bs == 3) { - impl::assemble_entities<3>( - P0, b, x_dofmap, x, entities, {dofs, bs, entities1}, fn, + impl::assemble_entities( + P0, b, x_dofmap, x, entities, {dofs, BS<3>(), entities1}, fn, constants, md::mdspan(coeffs.data(), entities.size() / 2, cstride), cell_info0, perms);