From 31720f9115b24098fc01b0ba7cf52437db8b1061 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 10 Feb 2023 14:07:07 +0100 Subject: [PATCH 1/9] cuFINUFFT interface made. Need to create a test and see if it works --- src/FFT/FFT.h | 77 ++++ src/FFT/FFT.hpp | 1036 ++++++++++++++++++++++++++++------------------- 2 files changed, 687 insertions(+), 426 deletions(-) diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index 8a13e3b45..703fc9373 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -30,6 +30,7 @@ #include #include +#include #include #include #include @@ -64,6 +65,10 @@ namespace ippl { Tag classes for Cosine transforms */ class CosTransform {}; + /** + Tag classes for Non-uniform type of Fourier transforms + */ + class NUFFTransform {}; enum FFTComm { a2av = 0, @@ -110,6 +115,29 @@ namespace ippl { using backendCos = heffte::backend::stock_cos; }; #endif +#endif + +#ifdef KOKKOS_ENABLE_CUDA + template + struct CufinufftType {}; + + template <> + struct Cufinufft { + using makeplan = cufinufftf_makeplan; + using setpts = cufinufftf_setpts; + using transform = cufinufftf_execute; + using destroy = cufinufftf_destroy; + using plan_t = cufinufftf_plan; + }; + + template <> + struct Cufinufft { + using makeplan = cufinufft_makeplan; + using setpts = cufinufft_setpts; + using transform = cufinufft_execute; + using destroy = cufinufft_destroy; + using plan_t = cufinufft_plan; + }; #endif } @@ -296,6 +324,55 @@ namespace ippl { }; + /** + Non-uniform FFT class + */ + template + class FFT { + + public: + + typedef FieldLayout Layout_t; + typedef std::complex Complex_t; + typedef Field ComplexField_t; + + using makeplan = detail::Cufinufft::makeplan; + using setpts = detail::Cufinufft::setpts; + using transform = detail::Cufinufft::transform; + using destroy = detail::Cufinufft::destroy; + using plan_t = detail::Cufinufft::plan_t; + + /** Create a new FFT object with the layout for the input Field, type + * (1 or 2) for the NUFFT and parameters for cuFINUFFT. + */ + FFT(const Layout_t& layout, int type, const ParameterList& params); + + // Destructor + ~FFT(); + + /** Do the NUFFT. + */ + template + void transform(const ParticleAttrib< Vector, Properties... >& R, + ParticleAttrib& Q, ComplexField_t& f); + + + private: + + /** + setup performs the initialization necessary. + */ + void setup(const std::array& nmodes, + const ParameterList& params); + + plan_t plan_m; + int ier_m; + T tol_m; + int type_m; + + }; + + } #include "FFT/FFT.hpp" #endif // IPPL_FFT_FFT_H diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index 853651858..f804413b5 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -57,8 +57,8 @@ namespace ippl { * 1D FFTs we just have to make the length in other * dimensions to be 1. */ - std::array low; - std::array high; + std::array low; + std::array high; const NDIndex& lDom = layout.getLocalNDIndex(); @@ -88,45 +88,45 @@ namespace ippl { const ParameterList& params) { - heffte::box3d inbox = {low, high}; - heffte::box3d outbox = {low, high}; + heffte::box3d inbox = {low, high}; + heffte::box3d outbox = {low, high}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = + heffte::default_options(); - if(!params.get("use_heffte_defaults")) { - heffteOptions.use_pencils = params.get("use_pencils"); - heffteOptions.use_reorder = params.get("use_reorder"); + if(!params.get("use_heffte_defaults")) { + heffteOptions.use_pencils = params.get("use_pencils"); + heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU - heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); + heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif - switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); - } - } - - heffte_m = std::make_shared> - (inbox, outbox, Ippl::getComm(), heffteOptions); - - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + switch (params.get("comm")) { + + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", + "Unrecognized heffte communication type"); + } + } + + heffte_m = std::make_shared> + (inbox, outbox, Ippl::getComm(), heffteOptions); + + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if(workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -138,74 +138,74 @@ namespace ippl { int direction, typename FFT::ComplexField_t& f) { - auto fview = f.getView(); - const int nghost = f.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempField(i-nghost, j-nghost, k-nghost).real( - fview(i, j, k).real()); - tempField(i-nghost, j-nghost, k-nghost).imag( - fview(i, j, k).imag()); - }); - - - - - if ( direction == 1 ) - { - heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::full); - } - else if ( direction == -1 ) - { - heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::none); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - - Kokkos::parallel_for("copy to Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k).real() = - tempField(i-nghost, j-nghost, k-nghost).real(); - fview(i, j, k).imag() = - tempField(i-nghost, j-nghost, k-nghost).imag(); - }); + auto fview = f.getView(); + const int nghost = f.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + Kokkos::View + tempField("tempField", fview.extent(0) - 2*nghost, + fview.extent(1) - 2*nghost, + fview.extent(2) - 2*nghost); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempField(i-nghost, j-nghost, k-nghost).real( + fview(i, j, k).real()); + tempField(i-nghost, j-nghost, k-nghost).imag( + fview(i, j, k).imag()); + }); + + + + + if ( direction == 1 ) + { + heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::full); + } + else if ( direction == -1 ) + { + heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::none); + } + else + { + throw std::logic_error( + "Only 1:forward and -1:backward are allowed as directions"); + } + + + Kokkos::parallel_for("copy to Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k).real() = + tempField(i-nghost, j-nghost, k-nghost).real(); + fview(i, j, k).imag() = + tempField(i-nghost, j-nghost, k-nghost).imag(); + }); } @@ -275,46 +275,46 @@ namespace ippl { const ParameterList& params) { - heffte::box3d inbox = {lowInput, highInput}; - heffte::box3d outbox = {lowOutput, highOutput}; + heffte::box3d inbox = {lowInput, highInput}; + heffte::box3d outbox = {lowOutput, highOutput}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = + heffte::default_options(); - if(!params.get("use_heffte_defaults")) { - heffteOptions.use_pencils = params.get("use_pencils"); - heffteOptions.use_reorder = params.get("use_reorder"); + if(!params.get("use_heffte_defaults")) { + heffteOptions.use_pencils = params.get("use_pencils"); + heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU - heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); + heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif - switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); - } - } - - heffte_m = std::make_shared> - (inbox, outbox, params.get("r2c_direction"), Ippl::getComm(), - heffteOptions); + switch (params.get("comm")) { + + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", + "Unrecognized heffte communication type"); + } + } + + heffte_m = std::make_shared> + (inbox, outbox, params.get("r2c_direction"), Ippl::getComm(), + heffteOptions); - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if(workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -325,104 +325,104 @@ namespace ippl { typename FFT::RealField_t& f, typename FFT::ComplexField_t& g) { - auto fview = f.getView(); - auto gview = g.getView(); - const int nghostf = f.getNghost(); - const int nghostg = g.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempFieldf("tempFieldf", fview.extent(0) - 2*nghostf, - fview.extent(1) - 2*nghostf, - fview.extent(2) - 2*nghostf); - - Kokkos::View - tempFieldg("tempFieldg", gview.extent(0) - 2*nghostg, - gview.extent(1) - 2*nghostg, - gview.extent(2) - 2*nghostg); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos f field in FFT", - mdrange_type({nghostf, nghostf, nghostf}, - {fview.extent(0) - nghostf, - fview.extent(1) - nghostf, - fview.extent(2) - nghostf - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempFieldf(i-nghostf, j-nghostf, k-nghostf) = fview(i, j, k); - }); - Kokkos::parallel_for("copy from Kokkos g field in FFT", - mdrange_type({nghostg, nghostg, nghostg}, - {gview.extent(0) - nghostg, - gview.extent(1) - nghostg, - gview.extent(2) - nghostg - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempFieldg(i-nghostg, j-nghostg, k-nghostg).real( - gview(i, j, k).real()); - tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag( - gview(i, j, k).imag()); - }); + auto fview = f.getView(); + auto gview = g.getView(); + const int nghostf = f.getNghost(); + const int nghostg = g.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + Kokkos::View + tempFieldf("tempFieldf", fview.extent(0) - 2*nghostf, + fview.extent(1) - 2*nghostf, + fview.extent(2) - 2*nghostf); + + Kokkos::View + tempFieldg("tempFieldg", gview.extent(0) - 2*nghostg, + gview.extent(1) - 2*nghostg, + gview.extent(2) - 2*nghostg); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from Kokkos f field in FFT", + mdrange_type({nghostf, nghostf, nghostf}, + {fview.extent(0) - nghostf, + fview.extent(1) - nghostf, + fview.extent(2) - nghostf + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempFieldf(i-nghostf, j-nghostf, k-nghostf) = fview(i, j, k); + }); + Kokkos::parallel_for("copy from Kokkos g field in FFT", + mdrange_type({nghostg, nghostg, nghostg}, + {gview.extent(0) - nghostg, + gview.extent(1) - nghostg, + gview.extent(2) - nghostg + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempFieldg(i-nghostg, j-nghostg, k-nghostg).real( + gview(i, j, k).real()); + tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag( + gview(i, j, k).imag()); + }); - if ( direction == 1 ) - { - heffte_m->forward( tempFieldf.data(), tempFieldg.data(), workspace_m.data(), - heffte::scale::full ); - } - else if ( direction == -1 ) - { - heffte_m->backward( tempFieldg.data(), tempFieldf.data(), workspace_m.data(), - heffte::scale::none ); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - - Kokkos::parallel_for("copy to Kokkos f field FFT", - mdrange_type({nghostf, nghostf, nghostf}, - {fview.extent(0) - nghostf, - fview.extent(1) - nghostf, - fview.extent(2) - nghostf - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k) = tempFieldf(i-nghostf, j-nghostf, k-nghostf); - }); - - Kokkos::parallel_for("copy to Kokkos g field FFT", - mdrange_type({nghostg, nghostg, nghostg}, - {gview.extent(0) - nghostg, - gview.extent(1) - nghostg, - gview.extent(2) - nghostg - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - gview(i, j, k).real() = - tempFieldg(i-nghostg, j-nghostg, k-nghostg).real(); - gview(i, j, k).imag() = - tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag(); - }); + if ( direction == 1 ) + { + heffte_m->forward( tempFieldf.data(), tempFieldg.data(), workspace_m.data(), + heffte::scale::full ); + } + else if ( direction == -1 ) + { + heffte_m->backward( tempFieldg.data(), tempFieldf.data(), workspace_m.data(), + heffte::scale::none ); + } + else + { + throw std::logic_error( + "Only 1:forward and -1:backward are allowed as directions"); + } + + + Kokkos::parallel_for("copy to Kokkos f field FFT", + mdrange_type({nghostf, nghostf, nghostf}, + {fview.extent(0) - nghostf, + fview.extent(1) - nghostf, + fview.extent(2) - nghostf + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k) = tempFieldf(i-nghostf, j-nghostf, k-nghostf); + }); + + Kokkos::parallel_for("copy to Kokkos g field FFT", + mdrange_type({nghostg, nghostg, nghostg}, + {gview.extent(0) - nghostg, + gview.extent(1) - nghostg, + gview.extent(2) - nghostg + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + gview(i, j, k).real() = + tempFieldg(i-nghostg, j-nghostg, k-nghostg).real(); + gview(i, j, k).imag() = + tempFieldg(i-nghostg, j-nghostg, k-nghostg).imag(); + }); } @@ -446,8 +446,8 @@ namespace ippl { * 1D FFTs we just have to make the length in other * dimensions to be 1. */ - std::array low; - std::array high; + std::array low; + std::array high; const NDIndex& lDom = layout.getLocalNDIndex(); @@ -477,44 +477,44 @@ namespace ippl { const ParameterList& params) { - heffte::box3d inbox = {low, high}; - heffte::box3d outbox = {low, high}; + heffte::box3d inbox = {low, high}; + heffte::box3d outbox = {low, high}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = + heffte::default_options(); - if(!params.get("use_heffte_defaults")) { - heffteOptions.use_pencils = params.get("use_pencils"); - heffteOptions.use_reorder = params.get("use_reorder"); + if(!params.get("use_heffte_defaults")) { + heffteOptions.use_pencils = params.get("use_pencils"); + heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU - heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); + heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif - switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); - } - } - - heffte_m = std::make_shared> - (inbox, outbox, Ippl::getComm(), heffteOptions); - - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + switch (params.get("comm")) { + + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", + "Unrecognized heffte communication type"); + } + } + + heffte_m = std::make_shared> + (inbox, outbox, Ippl::getComm(), heffteOptions); + + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if(workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -524,66 +524,66 @@ namespace ippl { int direction, typename FFT::Field_t& f) { - auto fview = f.getView(); - const int nghost = f.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempField(i-nghost, j-nghost, k-nghost) = - fview(i, j, k); - }); - - if ( direction == 1 ) - { - heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::full); - } - else if ( direction == -1 ) - { - heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::none); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - Kokkos::parallel_for("copy to Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k) = - tempField(i-nghost, j-nghost, k-nghost); - }); + auto fview = f.getView(); + const int nghost = f.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + Kokkos::View + tempField("tempField", fview.extent(0) - 2*nghost, + fview.extent(1) - 2*nghost, + fview.extent(2) - 2*nghost); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempField(i-nghost, j-nghost, k-nghost) = + fview(i, j, k); + }); + + if ( direction == 1 ) + { + heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::full); + } + else if ( direction == -1 ) + { + heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::none); + } + else + { + throw std::logic_error( + "Only 1:forward and -1:backward are allowed as directions"); + } + + Kokkos::parallel_for("copy to Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k) = + tempField(i-nghost, j-nghost, k-nghost); + }); } @@ -607,8 +607,8 @@ namespace ippl { * 1D FFTs we just have to make the length in other * dimensions to be 1. */ - std::array low; - std::array high; + std::array low; + std::array high; const NDIndex& lDom = layout.getLocalNDIndex(); @@ -638,44 +638,44 @@ namespace ippl { const ParameterList& params) { - heffte::box3d inbox = {low, high}; - heffte::box3d outbox = {low, high}; + heffte::box3d inbox = {low, high}; + heffte::box3d outbox = {low, high}; - heffte::plan_options heffteOptions = - heffte::default_options(); + heffte::plan_options heffteOptions = + heffte::default_options(); - if(!params.get("use_heffte_defaults")) { - heffteOptions.use_pencils = params.get("use_pencils"); - heffteOptions.use_reorder = params.get("use_reorder"); + if(!params.get("use_heffte_defaults")) { + heffteOptions.use_pencils = params.get("use_pencils"); + heffteOptions.use_reorder = params.get("use_reorder"); #ifdef Heffte_ENABLE_GPU - heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); + heffteOptions.use_gpu_aware = params.get("use_gpu_aware"); #endif - switch (params.get("comm")) { - - case a2a: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; - break; - case a2av: - heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; - break; - case p2p: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p; - break; - case p2p_pl: - heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; - break; - default: - throw IpplException("FFT::setup", - "Unrecognized heffte communication type"); - } - } - - heffte_m = std::make_shared> - (inbox, outbox, Ippl::getComm(), heffteOptions); - - //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); - if(workspace_m.size() < heffte_m->size_workspace()) - workspace_m = workspace_t(heffte_m->size_workspace()); + switch (params.get("comm")) { + + case a2a: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoall; + break; + case a2av: + heffteOptions.algorithm = heffte::reshape_algorithm::alltoallv; + break; + case p2p: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p; + break; + case p2p_pl: + heffteOptions.algorithm = heffte::reshape_algorithm::p2p_plined; + break; + default: + throw IpplException("FFT::setup", + "Unrecognized heffte communication type"); + } + } + + heffte_m = std::make_shared> + (inbox, outbox, Ippl::getComm(), heffteOptions); + + //heffte::gpu::device_set(Ippl::Comm->rank() % heffte::gpu::device_count()); + if(workspace_m.size() < heffte_m->size_workspace()) + workspace_m = workspace_t(heffte_m->size_workspace()); } @@ -686,66 +686,250 @@ namespace ippl { int direction, typename FFT::Field_t& f) { - auto fview = f.getView(); - const int nghost = f.getNghost(); - - /** - *This copy to a temporary Kokkos view is needed because of following - *reasons: - *1) heffte wants the input and output fields without ghost layers - *2) heffte accepts data in layout left (by default) eventhough this - *can be changed during heffte box creation - */ - Kokkos::View - tempField("tempField", fview.extent(0) - 2*nghost, - fview.extent(1) - 2*nghost, - fview.extent(2) - 2*nghost); - - using mdrange_type = Kokkos::MDRangePolicy>; - - Kokkos::parallel_for("copy from Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - tempField(i-nghost, j-nghost, k-nghost) = - fview(i, j, k); - }); - - if ( direction == 1 ) - { - heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::full); - } - else if ( direction == -1 ) - { - heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), - heffte::scale::none); - } - else - { - throw std::logic_error( - "Only 1:forward and -1:backward are allowed as directions"); - } - - Kokkos::parallel_for("copy to Kokkos FFT", - mdrange_type({nghost, nghost, nghost}, - {fview.extent(0) - nghost, - fview.extent(1) - nghost, - fview.extent(2) - nghost - }), - KOKKOS_LAMBDA(const size_t i, - const size_t j, - const size_t k) - { - fview(i, j, k) = - tempField(i-nghost, j-nghost, k-nghost); - }); + auto fview = f.getView(); + const int nghost = f.getNghost(); + + /** + *This copy to a temporary Kokkos view is needed because of following + *reasons: + *1) heffte wants the input and output fields without ghost layers + *2) heffte accepts data in layout left (by default) eventhough this + *can be changed during heffte box creation + */ + Kokkos::View + tempField("tempField", fview.extent(0) - 2*nghost, + fview.extent(1) - 2*nghost, + fview.extent(2) - 2*nghost); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempField(i-nghost, j-nghost, k-nghost) = + fview(i, j, k); + }); + + if ( direction == 1 ) + { + heffte_m->forward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::full); + } + else if ( direction == -1 ) + { + heffte_m->backward(tempField.data(), tempField.data(), workspace_m.data(), + heffte::scale::none); + } + else + { + throw std::logic_error( + "Only 1:forward and -1:backward are allowed as directions"); + } + + Kokkos::parallel_for("copy to Kokkos FFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k) = + tempField(i-nghost, j-nghost, k-nghost); + }); + + } + + + //========================================================================= + // FFT NUFFTransform Constructors + //========================================================================= + + /** + Create a new FFT object of type NUFFTransform, with a + given layout and cuFINUFFT parameters. + */ + + template + FFT::FFT(const Layout_t& layout, + int type, + const ParameterList& params) + { + + + /** + * cuFINUFFT requires to pass a 3D array even for 2D and + * 1D FFTs we just have to fill in other + * dimensions to be 1. Note this is different from Heffte + * where we fill 0. + */ + + std::array nmodes; + + const NDIndex& lDom = layout.getLocalNDIndex(); + + nmodes.fill(1); + + for(size_t d = 0; d < Dim; ++d) { + nmodes[d] = lDom[d].length();; + } + + type_m = type; + setup(nmodes, params); + } + + + /** + setup performs the initialization necessary. + */ + template + void + FFT::setup(const std::array& nmodes, + const ParameterList& params) + { + + cufinufft_opts opts; + ier = cufinufft_default_opts(type, Dim, &opts); + + if(!params.get("use_cufinufft_defaults")) { + tol = params.get("tolerance"); + opts.gpu_method = params.get("gpu_method"); + opts.gpu_sort = params.get("gpu_sort"); + opts.gpu_kerevalmeth = params.get("gpu_kerevalmeth"); + } + + int maxbatchsize = 0; //default option. ignored for ntransf = 1 which + // is our case + + int iflag; + + if(type_m == 1) { + iflag = -1; + } + else if(type_m == 2) { + iflag = 1; + } + else { + throw std::logic_error("Only type 1 and type 2 NUFFT are allowed now"); + } + + ier = makeplan(type_m, Dim, nmodes, iflag, 1, tol, + maxbatchsize, &plan, &opts); + + } + + + + template + template + void + FFT::transform(const ParticleAttrib< Vector, Properties... >& R, + ParticleAttrib& Q, + typename FFT::ComplexField_t& f) + { + auto fview = f.getView(); + auto Rview = R.getView(); + auto Qview = Q.getView(); + const int nghost = f.getNghost(); + + auto localNp = R.getParticleCount(); + + /** + * cuFINUFFT's layout is left, hence we allocate the temporary + * Kokkos views with the same layout + */ + Kokkos::View + tempField("tempField", fview.extent(0) - 2*nghost, + fview.extent(1) - 2*nghost, + fview.extent(2) - 2*nghost); + + + std::array, 3> tempR; + + tempR.fill(NULL); + + for(size_t d = 0; d < Dim; ++d) { + Kokkos::realloc(tempR[d], localNp); + } + + Kokkos::View*,Kokkos::LayoutLeft> tempQ("tempQ", localNp); + + using mdrange_type = Kokkos::MDRangePolicy>; + + Kokkos::parallel_for("copy from field data NUFFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + tempField(i-nghost, j-nghost, k-nghost).real( + fview(i, j, k).real()); + tempField(i-nghost, j-nghost, k-nghost).imag( + fview(i, j, k).imag()); + }); + + + Kokkos::parallel_for("copy from particle data NUFFT", + localNp, + KOKKOS_LAMBDA(const size_t i) + { + for(size_t d = 0; d < Dim; ++d) { + temp[R](i) = Rview(i)[d]; + } + tempQ(i).real(Qview(i)); + tempQ(i).imag(0.0); + }); + + ier = setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, + NULL, NULL, NULL, plan); + + ier = transform(tempQ.data(), tempField.data(), plan); + + + if(type_m == 1) { + Kokkos::parallel_for("copy to field data NUFFT", + mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost + }), + KOKKOS_LAMBDA(const size_t i, + const size_t j, + const size_t k) + { + fview(i, j, k).real() = + tempField(i-nghost, j-nghost, k-nghost).real(); + fview(i, j, k).imag() = + tempField(i-nghost, j-nghost, k-nghost).imag(); + }); + } + else if(type_m == 2) { + Kokkos::parallel_for("copy to particle data NUFFT", + localNp, + KOKKOS_LAMBDA(const size_t i) + { + Qview(i) = tempQ(i).real(); + }); + } + } + + template + FFT::~FFT() { + + ier = destroy(plan); } } From 64d3df9980d1476d3fc1903260194e84366cf365 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 13 Feb 2023 16:54:07 +0100 Subject: [PATCH 2/9] Interface made and test done but have some compilation issues --- CMakeLists.txt | 7 +++++ src/CMakeLists.txt | 8 +++++- src/FFT/FFT.h | 60 ++++++++++++++++++++++++++--------------- src/FFT/FFT.hpp | 52 ++++++++++++++++++----------------- test/FFT/CMakeLists.txt | 6 +++++ 5 files changed, 87 insertions(+), 46 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 91e072bf8..8f15ec370 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -62,6 +62,13 @@ if (ENABLE_FFT) message (STATUS "Found Heffte_DIR: ${Heffte_DIR}") endif () +option (ENABLE_NUFFT "Enable NUFFT transform" OFF) +if (ENABLE_NUFFT) + add_definitions (-DENABLE_NUFFT) + find_package(CUFINUFFT REQUIRED) + message (STATUS "Found CUFINUFFT_DIR: ${CUFINUFFT_DIR}") +endif () + option (ENABLE_SOLVERS "Enable IPPL solvers" OFF) add_subdirectory (src) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index ad4b1f186..b4c04d6c5 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -94,7 +94,13 @@ include_directories ( add_library ( ippl ${IPPL_SRCS} ${IPPL_SRCS_FORT} ) -target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY}) + +if (ENABLE_NUFFT) + target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY} ${CUFINUFFT_LIBRARY_DIR}/libcufinufft.a) +else() + target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY}) +endif() + install (TARGETS ippl DESTINATION lib) install (FILES ${IPPL_BASEDIR_HDRS} DESTINATION include) diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index 703fc9373..16fab61f3 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -37,6 +37,7 @@ #include "FieldLayout/FieldLayout.h" #include "Field/Field.h" +#include "Particle/ParticleAttrib.h" #include "Utility/ParameterList.h" #include "Utility/IpplException.h" @@ -122,21 +123,33 @@ namespace ippl { struct CufinufftType {}; template <> - struct Cufinufft { - using makeplan = cufinufftf_makeplan; - using setpts = cufinufftf_setpts; - using transform = cufinufftf_execute; - using destroy = cufinufftf_destroy; - using plan_t = cufinufftf_plan; + struct CufinufftType { + //using makeplan = typename cufinufftf_makeplan; + //using setpts = typename cufinufftf_setpts; + //using execute = typename cufinufftf_execute; + //using destroy = typename cufinufftf_destroy; + //using plan_t = typename cufinufftf_plan; + + + //typedef typename cufinufftf_makeplan makeplan; + //typedef typename cufinufftf_setpts setpts; + //typedef typename cufinufftf_execute execute; + //typedef typename cufinufftf_destroy destroy; + //typedef typename cufinufftf_plan plan_t; }; template <> - struct Cufinufft { - using makeplan = cufinufft_makeplan; - using setpts = cufinufft_setpts; - using transform = cufinufft_execute; - using destroy = cufinufft_destroy; - using plan_t = cufinufft_plan; + struct CufinufftType { + //using makeplan = typename cufinufft_makeplan; + //using setpts = typename cufinufft_setpts; + //using execute = typename cufinufft_execute; + //using destroy = typename cufinufft_destroy; + //using plan_t = typename cufinufft_plan; + //typedef typename cufinufft_makeplan makeplan; + //typedef typename cufinufft_setpts setpts; + //typedef typename cufinufft_execute execute; + //typedef typename cufinufft_destroy destroy; + //typedef typename cufinufft_plan plan_t; }; #endif } @@ -333,14 +346,15 @@ namespace ippl { public: typedef FieldLayout Layout_t; - typedef std::complex Complex_t; - typedef Field ComplexField_t; + typedef std::complex StdComplex_t; + typedef Kokkos::complex KokkosComplex_t; + typedef Field ComplexField_t; - using makeplan = detail::Cufinufft::makeplan; - using setpts = detail::Cufinufft::setpts; - using transform = detail::Cufinufft::transform; - using destroy = detail::Cufinufft::destroy; - using plan_t = detail::Cufinufft::plan_t; + //using makeplan = typename detail::CufinufftType::makeplan; + //using setpts = typename detail::CufinufftType::setpts; + //using execute = typename detail::CufinufftType::execute; + //using destroy = typename detail::CufinufftType::destroy; + //using plan_t = typename detail::CufinufftType::plan_t; /** Create a new FFT object with the layout for the input Field, type * (1 or 2) for the NUFFT and parameters for cuFINUFFT. @@ -355,6 +369,9 @@ namespace ippl { template void transform(const ParticleAttrib< Vector, Properties... >& R, ParticleAttrib& Q, ComplexField_t& f); + //template + //void transform(const ParticleAttrib< Vector>& R, + // ParticleAttrib>& Q, ComplexField_t& f); private: @@ -362,10 +379,11 @@ namespace ippl { /** setup performs the initialization necessary. */ - void setup(const std::array& nmodes, + void setup(std::array& nmodes, const ParameterList& params); - plan_t plan_m; + //plan_t plan_m; + cufinufft_plan plan_m; int ier_m; T tol_m; int type_m; diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index f804413b5..59abf7184 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -793,15 +793,15 @@ namespace ippl { */ template void - FFT::setup(const std::array& nmodes, + FFT::setup(std::array& nmodes, const ParameterList& params) { cufinufft_opts opts; - ier = cufinufft_default_opts(type, Dim, &opts); + ier_m = cufinufft_default_opts(type_m, Dim, &opts); if(!params.get("use_cufinufft_defaults")) { - tol = params.get("tolerance"); + tol_m = params.get("tolerance"); opts.gpu_method = params.get("gpu_method"); opts.gpu_sort = params.get("gpu_sort"); opts.gpu_kerevalmeth = params.get("gpu_kerevalmeth"); @@ -821,9 +821,10 @@ namespace ippl { else { throw std::logic_error("Only type 1 and type 2 NUFFT are allowed now"); } - - ier = makeplan(type_m, Dim, nmodes, iflag, 1, tol, - maxbatchsize, &plan, &opts); + + int dim = static_cast(Dim); + ier_m = cufinufft_makeplan(type_m, dim, nmodes.data(), iflag, 1, tol_m, + maxbatchsize, &plan_m, &opts); } @@ -835,6 +836,9 @@ namespace ippl { FFT::transform(const ParticleAttrib< Vector, Properties... >& R, ParticleAttrib& Q, typename FFT::ComplexField_t& f) + //FFT::transform(const ParticleAttrib< Vector>& R, + // ParticleAttrib>& Q, + // typename FFT::ComplexField_t& f) { auto fview = f.getView(); auto Rview = R.getView(); @@ -847,21 +851,21 @@ namespace ippl { * cuFINUFFT's layout is left, hence we allocate the temporary * Kokkos views with the same layout */ - Kokkos::View + Kokkos::View tempField("tempField", fview.extent(0) - 2*nghost, fview.extent(1) - 2*nghost, fview.extent(2) - 2*nghost); - std::array, 3> tempR; + Vector, 3> tempR; - tempR.fill(NULL); for(size_t d = 0; d < Dim; ++d) { Kokkos::realloc(tempR[d], localNp); } + - Kokkos::View*,Kokkos::LayoutLeft> tempQ("tempQ", localNp); + Kokkos::View tempQ("tempQ", localNp); using mdrange_type = Kokkos::MDRangePolicy>; @@ -875,10 +879,10 @@ namespace ippl { const size_t j, const size_t k) { - tempField(i-nghost, j-nghost, k-nghost).real( - fview(i, j, k).real()); - tempField(i-nghost, j-nghost, k-nghost).imag( - fview(i, j, k).imag()); + tempField(i-nghost, j-nghost, k-nghost).x = + fview(i, j, k).real(); + tempField(i-nghost, j-nghost, k-nghost).y = + fview(i, j, k).imag(); }); @@ -887,16 +891,16 @@ namespace ippl { KOKKOS_LAMBDA(const size_t i) { for(size_t d = 0; d < Dim; ++d) { - temp[R](i) = Rview(i)[d]; + tempR[d](i) = Rview(i)[d]; } - tempQ(i).real(Qview(i)); - tempQ(i).imag(0.0); + tempQ(i).x = Qview(i).real(); + tempQ(i).y = Qview(i).imag(); }); - ier = setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, - NULL, NULL, NULL, plan); + ier_m = cufinufft_setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, + NULL, NULL, NULL, plan_m); - ier = transform(tempQ.data(), tempField.data(), plan); + ier_m = cufinufft_execute(tempQ.data(), tempField.data(), plan_m); if(type_m == 1) { @@ -911,9 +915,9 @@ namespace ippl { const size_t k) { fview(i, j, k).real() = - tempField(i-nghost, j-nghost, k-nghost).real(); + tempField(i-nghost, j-nghost, k-nghost).x; fview(i, j, k).imag() = - tempField(i-nghost, j-nghost, k-nghost).imag(); + tempField(i-nghost, j-nghost, k-nghost).y; }); } else if(type_m == 2) { @@ -921,7 +925,7 @@ namespace ippl { localNp, KOKKOS_LAMBDA(const size_t i) { - Qview(i) = tempQ(i).real(); + Qview(i) = tempQ(i).x; }); } } @@ -929,7 +933,7 @@ namespace ippl { template FFT::~FFT() { - ier = destroy(plan); + ier_m = cufinufft_destroy(plan_m); } } diff --git a/test/FFT/CMakeLists.txt b/test/FFT/CMakeLists.txt index 5d0332166..7b7ecfdde 100644 --- a/test/FFT/CMakeLists.txt +++ b/test/FFT/CMakeLists.txt @@ -39,6 +39,12 @@ target_link_libraries ( ${IPPL_LIBS} ${MPI_CXX_LIBRARIES} ) +add_executable (TestNUFFT1 TestNUFFT1.cpp) +target_link_libraries ( + TestNUFFT1 + ${IPPL_LIBS} + ${MPI_CXX_LIBRARIES} +) # vi: set et ts=4 sw=4 sts=4: # Local Variables: From e1ab9d118a0b2bdaaabba8b24831e56ac496fa96 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Mon, 13 Feb 2023 16:55:04 +0100 Subject: [PATCH 3/9] test file added --- test/FFT/TestNUFFT1.cpp | 199 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 199 insertions(+) create mode 100644 test/FFT/TestNUFFT1.cpp diff --git a/test/FFT/TestNUFFT1.cpp b/test/FFT/TestNUFFT1.cpp new file mode 100644 index 000000000..822236627 --- /dev/null +++ b/test/FFT/TestNUFFT1.cpp @@ -0,0 +1,199 @@ +#include "Ippl.h" +#include "Utility/ParameterList.h" + +#include +#include +#include +#include +#include + +template +struct Bunch : public ippl::ParticleBase +{ + + Bunch(PLayout& playout) + : ippl::ParticleBase(playout) + { + this->addAttribute(Q); + } + + ~Bunch(){ } + + typedef ippl::ParticleAttrib> charge_container_type; + charge_container_type Q; + +}; + +template +struct generate_random { + + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + using view_type_complex = typename ippl::detail::ViewType, 1>::view_type; + // Output View for the random numbers + view_type x; + + view_type_complex Q; + + // The GeneratorPool + GeneratorPool rand_pool; + + T minU, maxU; + + // Initialize all members + generate_random(view_type x_,view_type_complex Q_, GeneratorPool rand_pool_, + T& minU_, T& maxU_) + : x(x_), Q(Q_), rand_pool(rand_pool_), + minU(minU_), maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + for (unsigned d = 0; d < Dim; ++d) { + x(i)[d] = rand_gen.drand(minU[d], maxU[d]); + } + Q(i).real() = rand_gen.drand(0.0, 1.0); + Q(i).imag() = rand_gen.drand(0.0, 1.0); + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + + +int main(int argc, char *argv[]) { + + Ippl ippl(argc,argv); + + constexpr unsigned int dim = 3; + const double pi = std::acos(-1.0); + + typedef ippl::ParticleSpatialLayout playout_type; + typedef Bunch bunch_type; + + + std::array pt = {32, 32, 32}; + ippl::Index I(pt[0]); + ippl::Index J(pt[1]); + ippl::Index K(pt[2]); + ippl::NDIndex owned(I, J, K); + + ippl::e_dim_tag decomp[dim]; // Specifies SERIAL, PARALLEL dims + for (unsigned int d=0; d layout(owned, decomp); + + std::array dx = { + 2.0 * pi / double(pt[0]), + 2.0 * pi / double(pt[1]), + 2.0 * pi / double(pt[2]), + }; + + typedef ippl::Vector Vector_t; + + Vector_t hx = {dx[0], dx[1], dx[2]}; + Vector_t origin = {-pi, -pi, -pi}; + ippl::UniformCartesian mesh(owned, hx, origin); + + playout_type pl(layout, mesh); + + bunch_type bunch(pl); + bunch.setParticleBC(ippl::BC::PERIODIC); + + using size_type = ippl::detail::size_type; + + + size_type Np = std::pow(32,3) * 10; + + typedef ippl::Field, dim> field_type; + + field_type field(mesh, layout); + + ippl::ParameterList fftParams; + + fftParams.add("use_cufinufft_defaults", true); + + typedef ippl::FFT FFT_type; + + std::unique_ptr fft; + + int type = 1; + + fft = std::make_unique(layout, type, fftParams); + + Vector_t minU = {-pi, -pi, -pi}; + Vector_t maxU = {pi, pi, pi}; + + + size_type nloc = Np/Ippl::Comm->size(); + + bunch.create(nloc); + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42)); + Kokkos::parallel_for(nloc, + generate_random, dim>( + bunch.R.getView(), bunch.Q.getView(), rand_pool64, minU, maxU)); + + + fft->transform(bunch.R, bunch.Q, field); + + auto field_result = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), field.getView()); + + Kokkos::complex max_error_abs(0.0, 0.0); + Kokkos::complex max_error_rel(0.0, 0.0); + + //Pick some mode to check. We choose it same as cuFINUFFT testcase example2d1many.cpp in + //the first 2 dimensions + ippl::Vector kVec; + kVec[0] = (int)(0.37 * pt[0]); + kVec[1] = (int)(0.26 * pt[1]); + kVec[2] = (int)(0.20 * pt[2]); + + //Linearize based on LayoutLeft and the results from cuFINUFFT are already fftshifted + //int it = (pt[0]/2 + kVec[0]) + (pt[0] * (pt[1]/2 + kVec[1])) + + // (pt[0] * pt[1] * (pt[2]/2 + kVec[2])); + + int iInd = (pt[0]/2 + kVec[0]); + int jInd = (pt[1]/2 + kVec[1]); + int kInd = (pt[2]/2 + kVec[2]); + + + Kokkos::complex reducedValue(0.0, 0.0); + + auto Rview = bunch.R.getView(); + auto Qview = bunch.Q.getView(); + + Kokkos::complex imag = {0.0, 1.0}; + + Kokkos::parallel_reduce("NUDFT type1", nloc, + KOKKOS_LAMBDA(const size_t idx, Kokkos::complex& valL) { + + double arg = 0.0; + for(size_t d = 0; d < dim; ++d) { + arg += kVec[d]*Rview(idx)[d]; + } + + valL += (Kokkos::Experimental::cos(arg) + - imag * Kokkos::Experimental::sin(arg)) * Qview(idx); + }, Kokkos::Sum>(reducedValue)); + + double abs_error_real = std::fabs(reducedValue.real() - field_result(iInd, jInd, kInd).real()); + double rel_error_real = std::fabs(reducedValue.real() - field_result(iInd, jInd, kInd).real()) /std::fabs(reducedValue.real()); + double abs_error_imag = std::fabs(reducedValue.imag() - field_result(iInd, jInd, kInd).imag()); + double rel_error_imag = std::fabs(reducedValue.imag() - field_result(iInd, jInd, kInd).imag()) /std::fabs(reducedValue.imag()); + + std::cout << "Abs Error in real part: " << std::setprecision(16) + << abs_error_real << "Rel. error: " << std::setprecision(16) << rel_error_real << std::endl; + std::cout << "Abs Error in imag part: " << std::setprecision(16) + << abs_error_imag << "Rel. error: " << std::setprecision(16) << rel_error_imag << std::endl; + + + //Kokkos::complex max_error(0.0, 0.0); + //MPI_Reduce(&max_error_local, &max_error, 1, + // MPI_C_DOUBLE_COMPLEX, MPI_MAX, 0, Ippl::getComm()); + + return 0; +} From 9f7534f129b3e31118d898686605043bb2a33bdb Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 14 Feb 2023 08:56:56 +0100 Subject: [PATCH 4/9] Find cmake file added for cufinufft --- CMakeModules/FindCUFINUFFT.cmake | 31 +++++++++++++++++++++++++++++++ src/CMakeLists.txt | 2 +- 2 files changed, 32 insertions(+), 1 deletion(-) create mode 100644 CMakeModules/FindCUFINUFFT.cmake diff --git a/CMakeModules/FindCUFINUFFT.cmake b/CMakeModules/FindCUFINUFFT.cmake new file mode 100644 index 000000000..755062d33 --- /dev/null +++ b/CMakeModules/FindCUFINUFFT.cmake @@ -0,0 +1,31 @@ +# +# Find CUFINUFFT includes and library +# +# CUFINUFFT_INCLUDE_DIR - where to find cufinufft.h +# CUFINUFFT_LIBRARY - libcufinufft.a path +# CUFINUFFT_FOUND - do not attempt to use if "no" or undefined. + +FIND_PATH(CUFINUFFT_INCLUDE_DIR cufinufft.h + HINTS $ENV{CUFINUFFT_INCLUDE_PATH} $ENV{CUFINUFFT_INCLUDE_DIR} $ENV{CUFINUFFT_PREFIX}/include $ENV{CUFINUFFT_DIR}/include ${PROJECT_SOURCE_DIR}/include + PATHS ENV C_INCLUDE_PATH +) + +FIND_LIBRARY(CUFINUFFT_LIBRARY_DIR libcufinufft.a + HINTS $ENV{CUFINUFFT_LIBRARY_PATH} $ENV{CUFINUFFT_LIBRARY_DIR} $ENV{CUFINUFFT_PREFIX}/lib-static $ENV{CUFINUFFT_DIR}/lib-static $ENV{CUFINUFFT}/lib-static ${PROJECT_SOURCE_DIR}/lib-static + PATHS ENV LIBRARY_PATH +) + +IF(CUFINUFFT_INCLUDE_DIR AND CUFINUFFT_LIBRARY_DIR) + SET( CUFINUFFT_FOUND "YES" ) +ENDIF() + +IF (CUFINUFFT_FOUND) + IF (NOT CUFINUFFT_FIND_QUIETLY) + MESSAGE(STATUS "Found cufinufft library dir: ${CUFINUFFT_LIBRARY_DIR}") + MESSAGE(STATUS "Found cufinufft include dir: ${CUFINUFFT_INCLUDE_DIR}") + ENDIF (NOT CUFINUFFT_FIND_QUIETLY) +ELSE (CUFINUFFT_FOUND) + IF (CUFINUFFT_FIND_REQUIRED) + MESSAGE(FATAL_ERROR "Could not find CUFINUFFT!") + ENDIF (CUFINUFFT_FIND_REQUIRED) +ENDIF (CUFINUFFT_FOUND) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index b4c04d6c5..8c96a6bc7 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -96,7 +96,7 @@ add_library ( ippl ${IPPL_SRCS} ${IPPL_SRCS_FORT} ) if (ENABLE_NUFFT) - target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY} ${CUFINUFFT_LIBRARY_DIR}/libcufinufft.a) + target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY} ${CUFINUFFT_LIBRARY_DIR}) else() target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY}) endif() From dccf8cac5b049465c2ec25486b1b4e7a4e94d594 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 14 Feb 2023 13:27:34 +0100 Subject: [PATCH 5/9] Test for type 2 NUFFT also added --- CMakeModules/FindCUFINUFFT.cmake | 6 +- src/FFT/FFT.hpp | 31 +++-- test/FFT/CMakeLists.txt | 6 + test/FFT/TestNUFFT1.cpp | 30 ++-- test/FFT/TestNUFFT2.cpp | 229 +++++++++++++++++++++++++++++++ 5 files changed, 273 insertions(+), 29 deletions(-) create mode 100644 test/FFT/TestNUFFT2.cpp diff --git a/CMakeModules/FindCUFINUFFT.cmake b/CMakeModules/FindCUFINUFFT.cmake index 755062d33..691eb510f 100644 --- a/CMakeModules/FindCUFINUFFT.cmake +++ b/CMakeModules/FindCUFINUFFT.cmake @@ -7,11 +7,11 @@ FIND_PATH(CUFINUFFT_INCLUDE_DIR cufinufft.h HINTS $ENV{CUFINUFFT_INCLUDE_PATH} $ENV{CUFINUFFT_INCLUDE_DIR} $ENV{CUFINUFFT_PREFIX}/include $ENV{CUFINUFFT_DIR}/include ${PROJECT_SOURCE_DIR}/include - PATHS ENV C_INCLUDE_PATH + PATHS ENV CPP_INCLUDE_PATH ) -FIND_LIBRARY(CUFINUFFT_LIBRARY_DIR libcufinufft.a - HINTS $ENV{CUFINUFFT_LIBRARY_PATH} $ENV{CUFINUFFT_LIBRARY_DIR} $ENV{CUFINUFFT_PREFIX}/lib-static $ENV{CUFINUFFT_DIR}/lib-static $ENV{CUFINUFFT}/lib-static ${PROJECT_SOURCE_DIR}/lib-static +FIND_LIBRARY(CUFINUFFT_LIBRARY_DIR libcufinufft.so + HINTS $ENV{CUFINUFFT_LIBRARY_PATH} $ENV{CUFINUFFT_LIBRARY_DIR} $ENV{CUFINUFFT_PREFIX}/lib $ENV{CUFINUFFT_DIR}/lib $ENV{CUFINUFFT}/lib ${PROJECT_SOURCE_DIR}/lib PATHS ENV LIBRARY_PATH ) diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index 59abf7184..4ef372730 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -799,6 +799,7 @@ namespace ippl { cufinufft_opts opts; ier_m = cufinufft_default_opts(type_m, Dim, &opts); + tol_m = 1e-6; if(!params.get("use_cufinufft_defaults")) { tol_m = params.get("tolerance"); @@ -836,9 +837,6 @@ namespace ippl { FFT::transform(const ParticleAttrib< Vector, Properties... >& R, ParticleAttrib& Q, typename FFT::ComplexField_t& f) - //FFT::transform(const ParticleAttrib< Vector>& R, - // ParticleAttrib>& Q, - // typename FFT::ComplexField_t& f) { auto fview = f.getView(); auto Rview = R.getView(); @@ -857,12 +855,15 @@ namespace ippl { fview.extent(2) - 2*nghost); - Vector, 3> tempR; + //Vector, 3> tempR; + Kokkos::View tempRx("tempRx", localNp); + Kokkos::View tempRy("tempRy", localNp); + Kokkos::View tempRz("tempRz", localNp); - for(size_t d = 0; d < Dim; ++d) { - Kokkos::realloc(tempR[d], localNp); - } + //for(size_t d = 0; d < Dim; ++d) { + // Kokkos::realloc(tempR[d], localNp); + //} Kokkos::View tempQ("tempQ", localNp); @@ -890,14 +891,19 @@ namespace ippl { localNp, KOKKOS_LAMBDA(const size_t i) { - for(size_t d = 0; d < Dim; ++d) { - tempR[d](i) = Rview(i)[d]; - } + //for(size_t d = 0; d < Dim; ++d) { + // tempR[d](i) = Rview(i)[d]; + //} + tempRx(i) = Rview(i)[0]; + tempRy(i) = Rview(i)[1]; + tempRz(i) = Rview(i)[2]; tempQ(i).x = Qview(i).real(); tempQ(i).y = Qview(i).imag(); }); - ier_m = cufinufft_setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, + //ier_m = cufinufft_setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, + // NULL, NULL, NULL, plan_m); + ier_m = cufinufft_setpts(localNp, tempRx.data(), tempRy.data(), tempRz.data(), 0, NULL, NULL, NULL, plan_m); ier_m = cufinufft_execute(tempQ.data(), tempField.data(), plan_m); @@ -925,7 +931,8 @@ namespace ippl { localNp, KOKKOS_LAMBDA(const size_t i) { - Qview(i) = tempQ(i).x; + Qview(i).real() = tempQ(i).x; + Qview(i).imag() = tempQ(i).y; }); } } diff --git a/test/FFT/CMakeLists.txt b/test/FFT/CMakeLists.txt index 7b7ecfdde..4d3e5fe90 100644 --- a/test/FFT/CMakeLists.txt +++ b/test/FFT/CMakeLists.txt @@ -45,6 +45,12 @@ target_link_libraries ( ${IPPL_LIBS} ${MPI_CXX_LIBRARIES} ) +add_executable (TestNUFFT2 TestNUFFT2.cpp) +target_link_libraries ( + TestNUFFT2 + ${IPPL_LIBS} + ${MPI_CXX_LIBRARIES} +) # vi: set et ts=4 sw=4 sts=4: # Local Variables: diff --git a/test/FFT/TestNUFFT1.cpp b/test/FFT/TestNUFFT1.cpp index 822236627..06ac71234 100644 --- a/test/FFT/TestNUFFT1.cpp +++ b/test/FFT/TestNUFFT1.cpp @@ -74,7 +74,7 @@ int main(int argc, char *argv[]) { typedef Bunch bunch_type; - std::array pt = {32, 32, 32}; + std::array pt = {256, 256, 256}; ippl::Index I(pt[0]); ippl::Index J(pt[1]); ippl::Index K(pt[2]); @@ -106,7 +106,7 @@ int main(int argc, char *argv[]) { using size_type = ippl::detail::size_type; - size_type Np = std::pow(32,3) * 10; + size_type Np = std::pow(256,3) * 8; typedef ippl::Field, dim> field_type; @@ -114,7 +114,12 @@ int main(int argc, char *argv[]) { ippl::ParameterList fftParams; - fftParams.add("use_cufinufft_defaults", true); + fftParams.add("gpu_method", 1); + fftParams.add("gpu_sort", 1); + fftParams.add("gpu_kerevalmeth", 1); + fftParams.add("tolerance", 1e-6); + + fftParams.add("use_cufinufft_defaults", false); typedef ippl::FFT FFT_type; @@ -145,20 +150,17 @@ int main(int argc, char *argv[]) { Kokkos::complex max_error_abs(0.0, 0.0); Kokkos::complex max_error_rel(0.0, 0.0); - //Pick some mode to check. We choose it same as cuFINUFFT testcase example2d1many.cpp in - //the first 2 dimensions + //Pick some mode to check. We choose it same as cuFINUFFT testcase cufinufft3d1_test.cu ippl::Vector kVec; kVec[0] = (int)(0.37 * pt[0]); kVec[1] = (int)(0.26 * pt[1]); - kVec[2] = (int)(0.20 * pt[2]); + kVec[2] = (int)(0.13 * pt[2]); - //Linearize based on LayoutLeft and the results from cuFINUFFT are already fftshifted - //int it = (pt[0]/2 + kVec[0]) + (pt[0] * (pt[1]/2 + kVec[1])) + - // (pt[0] * pt[1] * (pt[2]/2 + kVec[2])); + const int nghost = field.getNghost(); - int iInd = (pt[0]/2 + kVec[0]); - int jInd = (pt[1]/2 + kVec[1]); - int kInd = (pt[2]/2 + kVec[2]); + int iInd = (pt[0]/2 + kVec[0] + nghost); + int jInd = (pt[1]/2 + kVec[1] + nghost); + int kInd = (pt[2]/2 + kVec[2] + nghost); Kokkos::complex reducedValue(0.0, 0.0); @@ -186,9 +188,9 @@ int main(int argc, char *argv[]) { double rel_error_imag = std::fabs(reducedValue.imag() - field_result(iInd, jInd, kInd).imag()) /std::fabs(reducedValue.imag()); std::cout << "Abs Error in real part: " << std::setprecision(16) - << abs_error_real << "Rel. error: " << std::setprecision(16) << rel_error_real << std::endl; + << abs_error_real << " Rel. error in real part: " << std::setprecision(16) << rel_error_real << std::endl; std::cout << "Abs Error in imag part: " << std::setprecision(16) - << abs_error_imag << "Rel. error: " << std::setprecision(16) << rel_error_imag << std::endl; + << abs_error_imag << " Rel. error in imag part: " << std::setprecision(16) << rel_error_imag << std::endl; //Kokkos::complex max_error(0.0, 0.0); diff --git a/test/FFT/TestNUFFT2.cpp b/test/FFT/TestNUFFT2.cpp new file mode 100644 index 000000000..147c2ba74 --- /dev/null +++ b/test/FFT/TestNUFFT2.cpp @@ -0,0 +1,229 @@ +#include "Ippl.h" +#include "Utility/ParameterList.h" + +#include +#include +#include +#include +#include + +template +struct Bunch : public ippl::ParticleBase +{ + + Bunch(PLayout& playout) + : ippl::ParticleBase(playout) + { + this->addAttribute(Q); + } + + ~Bunch(){ } + + typedef ippl::ParticleAttrib> charge_container_type; + charge_container_type Q; + +}; + +template +struct generate_random_particles { + + using view_type = typename ippl::detail::ViewType::view_type; + using value_type = typename T::value_type; + // Output View for the random numbers + view_type x; + + // The GeneratorPool + GeneratorPool rand_pool; + + T minU, maxU; + + // Initialize all members + generate_random_particles(view_type x_, GeneratorPool rand_pool_, + T& minU_, T& maxU_) + : x(x_), rand_pool(rand_pool_), + minU(minU_), maxU(maxU_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + for (unsigned d = 0; d < Dim; ++d) { + x(i)[d] = rand_gen.drand(minU[d], maxU[d]); + } + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + +template +struct generate_random_field { + + using view_type = typename ippl::detail::ViewType::view_type; + view_type f; + + // The GeneratorPool + GeneratorPool rand_pool; + + // Initialize all members + generate_random_field(view_type f_, GeneratorPool rand_pool_) + : f(f_), rand_pool(rand_pool_) {} + + KOKKOS_INLINE_FUNCTION + void operator()(const size_t i, const size_t j, const size_t k) const { + // Get a random number state from the pool for the active thread + typename GeneratorPool::generator_type rand_gen = rand_pool.get_state(); + + f(i, j, k).real() = rand_gen.drand(0.0, 1.0); + f(i, j, k).imag() = rand_gen.drand(0.0, 1.0); + + // Give the state back, which will allow another thread to acquire it + rand_pool.free_state(rand_gen); + } +}; + +int main(int argc, char *argv[]) { + + Ippl ippl(argc,argv); + + constexpr unsigned int dim = 3; + const double pi = std::acos(-1.0); + + typedef ippl::ParticleSpatialLayout playout_type; + typedef Bunch bunch_type; + + + ippl::Vector pt = {32, 32, 32}; + ippl::Index I(pt[0]); + ippl::Index J(pt[1]); + ippl::Index K(pt[2]); + ippl::NDIndex owned(I, J, K); + + ippl::e_dim_tag decomp[dim]; // Specifies SERIAL, PARALLEL dims + for (unsigned int d=0; d layout(owned, decomp); + + std::array dx = { + 2.0 * pi / double(pt[0]), + 2.0 * pi / double(pt[1]), + 2.0 * pi / double(pt[2]), + }; + + typedef ippl::Vector Vector_t; + //typedef ippl::Vector, 3> CxVector_t; + + Vector_t hx = {dx[0], dx[1], dx[2]}; + Vector_t origin = {-pi, -pi, -pi}; + ippl::UniformCartesian mesh(owned, hx, origin); + + playout_type pl(layout, mesh); + + bunch_type bunch(pl); + bunch.setParticleBC(ippl::BC::PERIODIC); + + using size_type = ippl::detail::size_type; + + + size_type Np = std::pow(32,3) * 10; + + typedef ippl::Field, dim> field_type; + + field_type field(mesh, layout); + + ippl::ParameterList fftParams; + + fftParams.add("gpu_method", 1); + fftParams.add("gpu_sort", 1); + fftParams.add("gpu_kerevalmeth", 1); + fftParams.add("tolerance", 1e-12); + + fftParams.add("use_cufinufft_defaults", false); + + typedef ippl::FFT FFT_type; + + std::unique_ptr fft; + + int type = 2; + + fft = std::make_unique(layout, type, fftParams); + + Vector_t minU = {-pi, -pi, -pi}; + Vector_t maxU = {pi, pi, pi}; + + + size_type nloc = Np/Ippl::Comm->size(); + + const int nghost = field.getNghost(); + using mdrange_type = Kokkos::MDRangePolicy>; + auto fview = field.getView(); + bunch.create(nloc); + Kokkos::Random_XorShift64_Pool<> rand_pool64((size_type)(42)); + Kokkos::parallel_for(nloc, + generate_random_particles, dim>( + bunch.R.getView(), rand_pool64, minU, maxU)); + + Kokkos::parallel_for(mdrange_type({nghost, nghost, nghost}, + {fview.extent(0) - nghost, + fview.extent(1) - nghost, + fview.extent(2) - nghost}), + generate_random_field, Kokkos::Random_XorShift64_Pool<>, dim>( + field.getView(), rand_pool64)); + + fft->transform(bunch.R, bunch.Q, field); + + auto Q_result = Kokkos::create_mirror_view_and_copy( + Kokkos::HostSpace(), bunch.Q.getView()); + + Kokkos::complex max_error_abs(0.0, 0.0); + Kokkos::complex max_error_rel(0.0, 0.0); + + //Pick some target point to check. We choose it same as cuFINUFFT testcase cufinufft3d2_test.cu + + int idx = nloc/2; + + Kokkos::complex reducedValue(0.0, 0.0); + + auto Rview = bunch.R.getView(); + + Kokkos::complex imag = {0.0, 1.0}; + + Kokkos::parallel_reduce("NUDFT type2", + mdrange_type({0, 0, 0}, + {fview.extent(0) - 2 * nghost, + fview.extent(1) - 2 * nghost, + fview.extent(2) - 2 * nghost}), + KOKKOS_LAMBDA(const int i, + const int j, + const int k, + Kokkos::complex& valL) + { + ippl::Vector iVec = {i, j, k}; + double arg = 0.0; + for(size_t d = 0; d < dim; ++d) { + arg += (iVec[d] - (pt[d]/2)) * Rview(idx)[d]; + } + + valL += (Kokkos::Experimental::cos(arg) + + imag * Kokkos::Experimental::sin(arg)) * fview(i + nghost, j + nghost, k + nghost); + }, Kokkos::Sum>(reducedValue)); + + double abs_error_real = std::fabs(reducedValue.real() - Q_result(idx).real()); + double rel_error_real = std::fabs(reducedValue.real() - Q_result(idx).real()) /std::fabs(reducedValue.real()); + double abs_error_imag = std::fabs(reducedValue.imag() - Q_result(idx).imag()); + double rel_error_imag = std::fabs(reducedValue.imag() - Q_result(idx).imag()) /std::fabs(reducedValue.imag()); + + std::cout << "Abs Error in real part: " << std::setprecision(16) + << abs_error_real << " Rel. error in real part: " << std::setprecision(16) << rel_error_real << std::endl; + std::cout << "Abs Error in imag part: " << std::setprecision(16) + << abs_error_imag << " Rel. error in imag part: " << std::setprecision(16) << rel_error_imag << std::endl; + + + //Kokkos::complex max_error(0.0, 0.0); + //MPI_Reduce(&max_error_local, &max_error, 1, + // MPI_C_DOUBLE_COMPLEX, MPI_MAX, 0, Ippl::getComm()); + + return 0; +} From e526befeb7066ee98c5058029d3b711778149d60 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 14 Feb 2023 14:04:55 +0100 Subject: [PATCH 6/9] few tweaks and cleanups but still lot of things need to be generalized --- CMakeModules/FindCUFINUFFT.cmake | 4 ++-- src/FFT/FFT.h | 8 ++++---- src/FFT/FFT.hpp | 5 +++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/CMakeModules/FindCUFINUFFT.cmake b/CMakeModules/FindCUFINUFFT.cmake index 691eb510f..9098a6e7a 100644 --- a/CMakeModules/FindCUFINUFFT.cmake +++ b/CMakeModules/FindCUFINUFFT.cmake @@ -2,14 +2,14 @@ # Find CUFINUFFT includes and library # # CUFINUFFT_INCLUDE_DIR - where to find cufinufft.h -# CUFINUFFT_LIBRARY - libcufinufft.a path +# CUFINUFFT_LIBRARY - libcufinufft.so path # CUFINUFFT_FOUND - do not attempt to use if "no" or undefined. FIND_PATH(CUFINUFFT_INCLUDE_DIR cufinufft.h HINTS $ENV{CUFINUFFT_INCLUDE_PATH} $ENV{CUFINUFFT_INCLUDE_DIR} $ENV{CUFINUFFT_PREFIX}/include $ENV{CUFINUFFT_DIR}/include ${PROJECT_SOURCE_DIR}/include PATHS ENV CPP_INCLUDE_PATH ) - +#Static library has some issues and gives a cuda error at the end of compilation FIND_LIBRARY(CUFINUFFT_LIBRARY_DIR libcufinufft.so HINTS $ENV{CUFINUFFT_LIBRARY_PATH} $ENV{CUFINUFFT_LIBRARY_DIR} $ENV{CUFINUFFT_PREFIX}/lib $ENV{CUFINUFFT_DIR}/lib $ENV{CUFINUFFT}/lib ${PROJECT_SOURCE_DIR}/lib PATHS ENV LIBRARY_PATH diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index 16fab61f3..cec240b8f 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -66,10 +66,12 @@ namespace ippl { Tag classes for Cosine transforms */ class CosTransform {}; +#ifdef KOKKOS_ENABLE_CUDA /** Tag classes for Non-uniform type of Fourier transforms */ class NUFFTransform {}; +#endif enum FFTComm { a2av = 0, @@ -337,6 +339,7 @@ namespace ippl { }; +#ifdef KOKKOS_ENABLE_CUDA /** Non-uniform FFT class */ @@ -346,7 +349,6 @@ namespace ippl { public: typedef FieldLayout Layout_t; - typedef std::complex StdComplex_t; typedef Kokkos::complex KokkosComplex_t; typedef Field ComplexField_t; @@ -369,9 +371,6 @@ namespace ippl { template void transform(const ParticleAttrib< Vector, Properties... >& R, ParticleAttrib& Q, ComplexField_t& f); - //template - //void transform(const ParticleAttrib< Vector>& R, - // ParticleAttrib>& Q, ComplexField_t& f); private: @@ -392,6 +391,7 @@ namespace ippl { } +#endif #include "FFT/FFT.hpp" #endif // IPPL_FFT_FFT_H diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index 4ef372730..c79b247a7 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -750,6 +750,7 @@ namespace ippl { } +#ifdef KOKKOS_ENABLE_CUDA //========================================================================= // FFT NUFFTransform Constructors //========================================================================= @@ -764,8 +765,6 @@ namespace ippl { int type, const ParameterList& params) { - - /** * cuFINUFFT requires to pass a 3D array even for 2D and * 1D FFTs we just have to fill in other @@ -823,6 +822,7 @@ namespace ippl { throw std::logic_error("Only type 1 and type 2 NUFFT are allowed now"); } + //dim in cufinufft is int int dim = static_cast(Dim); ier_m = cufinufft_makeplan(type_m, dim, nmodes.data(), iflag, 1, tol_m, maxbatchsize, &plan_m, &opts); @@ -943,6 +943,7 @@ namespace ippl { ier_m = cufinufft_destroy(plan_m); } +#endif } // vi: set et ts=4 sw=4 sts=4: From 613a80a0da18c0da71a734ae877dcf7ce4b29495 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Tue, 14 Feb 2023 16:25:18 +0100 Subject: [PATCH 7/9] include directories added --- CMakeModules/FindCUFINUFFT.cmake | 1 + src/CMakeLists.txt | 3 +++ test/FFT/CMakeLists.txt | 1 + 3 files changed, 5 insertions(+) diff --git a/CMakeModules/FindCUFINUFFT.cmake b/CMakeModules/FindCUFINUFFT.cmake index 9098a6e7a..202a044a3 100644 --- a/CMakeModules/FindCUFINUFFT.cmake +++ b/CMakeModules/FindCUFINUFFT.cmake @@ -17,6 +17,7 @@ FIND_LIBRARY(CUFINUFFT_LIBRARY_DIR libcufinufft.so IF(CUFINUFFT_INCLUDE_DIR AND CUFINUFFT_LIBRARY_DIR) SET( CUFINUFFT_FOUND "YES" ) + SET( CUFINUFFT_DIR $ENV{CUFINUFFT_DIR} ) ENDIF() IF (CUFINUFFT_FOUND) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8c96a6bc7..8b4330823 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -96,6 +96,9 @@ add_library ( ippl ${IPPL_SRCS} ${IPPL_SRCS_FORT} ) if (ENABLE_NUFFT) + include_directories ( + BEFORE ${CUFINUFFT_INCLUDE_DIR} + ) target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY} ${CUFINUFFT_LIBRARY_DIR}) else() target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY}) diff --git a/test/FFT/CMakeLists.txt b/test/FFT/CMakeLists.txt index 4d3e5fe90..834e9c762 100644 --- a/test/FFT/CMakeLists.txt +++ b/test/FFT/CMakeLists.txt @@ -3,6 +3,7 @@ message (STATUS "Adding test FFT found in ${_relPath}") include_directories ( ${CMAKE_SOURCE_DIR}/src + ${CUFINUFFT_INCLUDE_DIR} ) link_directories ( From 7198e2f7c13e536fa55acbbd24fae4e56f3c3005 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Thu, 16 Feb 2023 08:48:40 +0100 Subject: [PATCH 8/9] target_include_directories seems to work --- src/CMakeLists.txt | 4 +--- test/FFT/CMakeLists.txt | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 8b4330823..bd6a2205b 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -96,9 +96,7 @@ add_library ( ippl ${IPPL_SRCS} ${IPPL_SRCS_FORT} ) if (ENABLE_NUFFT) - include_directories ( - BEFORE ${CUFINUFFT_INCLUDE_DIR} - ) + target_include_directories(ippl PUBLIC ${CUFINUFFT_INCLUDE_DIR}) target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY} ${CUFINUFFT_LIBRARY_DIR}) else() target_link_libraries(ippl PUBLIC Kokkos::kokkos ${HEFFTE_LIBRARY}) diff --git a/test/FFT/CMakeLists.txt b/test/FFT/CMakeLists.txt index 834e9c762..4d3e5fe90 100644 --- a/test/FFT/CMakeLists.txt +++ b/test/FFT/CMakeLists.txt @@ -3,7 +3,6 @@ message (STATUS "Adding test FFT found in ${_relPath}") include_directories ( ${CMAKE_SOURCE_DIR}/src - ${CUFINUFFT_INCLUDE_DIR} ) link_directories ( From c4f9d714e168679c6f337510b448a44dad6ce949 Mon Sep 17 00:00:00 2001 From: Sriramkrishnan Muralikrishnan Date: Fri, 17 Feb 2023 09:34:29 +0100 Subject: [PATCH 9/9] Function pointers and C-style arrays introduced to solve the type and dimension issues --- src/FFT/FFT.h | 52 ++++++++++++++++++++++--------------------------- src/FFT/FFT.hpp | 36 ++++++++++++++-------------------- 2 files changed, 38 insertions(+), 50 deletions(-) diff --git a/src/FFT/FFT.h b/src/FFT/FFT.h index cec240b8f..6544b54f2 100644 --- a/src/FFT/FFT.h +++ b/src/FFT/FFT.h @@ -33,6 +33,7 @@ #include #include #include +#include #include #include "FieldLayout/FieldLayout.h" @@ -126,32 +127,28 @@ namespace ippl { template <> struct CufinufftType { - //using makeplan = typename cufinufftf_makeplan; - //using setpts = typename cufinufftf_setpts; - //using execute = typename cufinufftf_execute; - //using destroy = typename cufinufftf_destroy; - //using plan_t = typename cufinufftf_plan; - - - //typedef typename cufinufftf_makeplan makeplan; - //typedef typename cufinufftf_setpts setpts; - //typedef typename cufinufftf_execute execute; - //typedef typename cufinufftf_destroy destroy; - //typedef typename cufinufftf_plan plan_t; + std::function makeplan = cufinufftf_makeplan; + std::function setpts = cufinufftf_setpts; + std::function execute = cufinufftf_execute; + std::function destroy = cufinufftf_destroy; + + using complexType = cuFloatComplex; + using plan_t = cufinufftf_plan; }; template <> struct CufinufftType { - //using makeplan = typename cufinufft_makeplan; - //using setpts = typename cufinufft_setpts; - //using execute = typename cufinufft_execute; - //using destroy = typename cufinufft_destroy; - //using plan_t = typename cufinufft_plan; - //typedef typename cufinufft_makeplan makeplan; - //typedef typename cufinufft_setpts setpts; - //typedef typename cufinufft_execute execute; - //typedef typename cufinufft_destroy destroy; - //typedef typename cufinufft_plan plan_t; + std::function makeplan = cufinufft_makeplan; + std::function setpts = cufinufft_setpts; + std::function execute = cufinufft_execute; + std::function destroy = cufinufft_destroy; + + using complexType = cuDoubleComplex; + using plan_t = cufinufft_plan; }; #endif } @@ -352,11 +349,8 @@ namespace ippl { typedef Kokkos::complex KokkosComplex_t; typedef Field ComplexField_t; - //using makeplan = typename detail::CufinufftType::makeplan; - //using setpts = typename detail::CufinufftType::setpts; - //using execute = typename detail::CufinufftType::execute; - //using destroy = typename detail::CufinufftType::destroy; - //using plan_t = typename detail::CufinufftType::plan_t; + using complexType = typename detail::CufinufftType::complexType; + using plan_t = typename detail::CufinufftType::plan_t; /** Create a new FFT object with the layout for the input Field, type * (1 or 2) for the NUFFT and parameters for cuFINUFFT. @@ -381,8 +375,8 @@ namespace ippl { void setup(std::array& nmodes, const ParameterList& params); - //plan_t plan_m; - cufinufft_plan plan_m; + detail::CufinufftType nufft_m; + plan_t plan_m; int ier_m; T tol_m; int type_m; diff --git a/src/FFT/FFT.hpp b/src/FFT/FFT.hpp index c79b247a7..985e50ab4 100644 --- a/src/FFT/FFT.hpp +++ b/src/FFT/FFT.hpp @@ -824,7 +824,7 @@ namespace ippl { //dim in cufinufft is int int dim = static_cast(Dim); - ier_m = cufinufft_makeplan(type_m, dim, nmodes.data(), iflag, 1, tol_m, + ier_m = nufft_m.makeplan(type_m, dim, nmodes.data(), iflag, 1, tol_m, maxbatchsize, &plan_m, &opts); } @@ -849,24 +849,23 @@ namespace ippl { * cuFINUFFT's layout is left, hence we allocate the temporary * Kokkos views with the same layout */ - Kokkos::View + Kokkos::View tempField("tempField", fview.extent(0) - 2*nghost, fview.extent(1) - 2*nghost, fview.extent(2) - 2*nghost); - //Vector, 3> tempR; - Kokkos::View tempRx("tempRx", localNp); - Kokkos::View tempRy("tempRy", localNp); - Kokkos::View tempRz("tempRz", localNp); + //Initialize the pointers to NULL and fill only relevant dimensions + //CUFINUFFT requires the input like this. + Kokkos::View tempR[3] = {}; - //for(size_t d = 0; d < Dim; ++d) { - // Kokkos::realloc(tempR[d], localNp); - //} + for(size_t d = 0; d < Dim; ++d) { + Kokkos::realloc(tempR[d], localNp); + } - Kokkos::View tempQ("tempQ", localNp); + Kokkos::View tempQ("tempQ", localNp); using mdrange_type = Kokkos::MDRangePolicy>; @@ -891,22 +890,17 @@ namespace ippl { localNp, KOKKOS_LAMBDA(const size_t i) { - //for(size_t d = 0; d < Dim; ++d) { - // tempR[d](i) = Rview(i)[d]; - //} - tempRx(i) = Rview(i)[0]; - tempRy(i) = Rview(i)[1]; - tempRz(i) = Rview(i)[2]; + for(size_t d = 0; d < Dim; ++d) { + tempR[d](i) = Rview(i)[d]; + } tempQ(i).x = Qview(i).real(); tempQ(i).y = Qview(i).imag(); }); - //ier_m = cufinufft_setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, - // NULL, NULL, NULL, plan_m); - ier_m = cufinufft_setpts(localNp, tempRx.data(), tempRy.data(), tempRz.data(), 0, + ier_m = nufft_m.setpts(localNp, tempR[0].data(), tempR[1].data(), tempR[2].data(), 0, NULL, NULL, NULL, plan_m); - ier_m = cufinufft_execute(tempQ.data(), tempField.data(), plan_m); + ier_m = nufft_m.execute(tempQ.data(), tempField.data(), plan_m); if(type_m == 1) { @@ -940,7 +934,7 @@ namespace ippl { template FFT::~FFT() { - ier_m = cufinufft_destroy(plan_m); + ier_m = nufft_m.destroy(plan_m); } #endif