Skip to content

Commit dce250b

Browse files
dyzhengdyzhengmohanchengrysgreatWHUweiqingzhou
authored
Refactor: use less memory and optimize performance to calculate force and stress in pw base (#4047)
* Refactor: use less memory to calculate stress in pw base * Fix: nondiagonal matrix element in PW-Stress calculation * Fix: uspp stress calculation * add gemm_op in gpu and add vkb_op * delete comment * delete comment2 * Fix: error in merging * update ops. * add correct vq. * finish stress ops! * Fix: compiling error * Refactor: delete GlobalC used in stress_nl code * Fix: compiling error in CUDA * Fix: DCU error in synchronize_ptrs * Fix: GlobalC in Sto_stress_PW * add new stress and force in high speed. * delete time prints. * Fix: compiling error from merge * Fix: compiling error from merge on GPU * remove some if(GPU) * finish force remove gpus. * Memory: reduce memory allocation for hpsi and spsi in diagH_subspace_init func (GPU) * fix: compiler error * Memory: reduce memory allocation for hpsi and spsi in diagH_subspace_init func (GPU) * Memory: reduce memory allocation for hpsi and spsi in diagH_subspace_init func (GPU) * Fix: force calculation error * Fix: error of Lcao_in_PW * Fix: error in GPU * Fix: force error * Try: use one temp psi for cg * fix: error in cg * Fix: sdft error * fix: compiling error * Refactor: new class tool for pw_stress code * fix: memory leak in stress * Refactor: force calculation for pw code * Fix: force error * delete useless code of old force * fix: compiling error on GPU/DCU * Fix: error on GPU * Fix: uninitialize error on force * Fix: Makefile.objects * update dylmr2 * Fix: compiling error * [pre-commit.ci lite] apply automatic fixes * Fix: force performance * Fix: error in cuda * Fix: compiling error in cuda * Fix: compiling error in cuda * [pre-commit.ci lite] apply automatic fixes * Fix: compiling error in CUDA * Fix: compiling error in CUDA/ROCM * Fix: stress error * [pre-commit.ci lite] apply automatic fixes * update from PR comments * [pre-commit.ci lite] apply automatic fixes * add timer and optimize stress * fix: g_plus_k * [pre-commit.ci lite] apply automatic fixes * Fix: delete pointers * [pre-commit.ci lite] apply automatic fixes * Fix: stress time * [pre-commit.ci lite] apply automatic fixes * fix: add annotations * [pre-commit.ci lite] apply automatic fixes --------- Co-authored-by: dyzheng <zhengdy@bjaisi.com> Co-authored-by: Mohan Chen <mohan.chen.chen.mohan@gmail.com> Co-authored-by: stargrys <771582678@qq.com> Co-authored-by: wqzhou <33364058+WHUweiqingzhou@users.noreply.github.com> Co-authored-by: Religious-J <1569978990@qq.com> Co-authored-by: pre-commit-ci-lite[bot] <117423508+pre-commit-ci-lite[bot]@users.noreply.github.com>
1 parent 2c451de commit dce250b

40 files changed

+10831
-8828
lines changed

source/Makefile.Objects

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,8 @@ OBJS_SRCPW=H_Ewald_pw.o\
576576
fp_energy.o\
577577
forces.o\
578578
forces_us.o\
579+
forces_nl.o\
580+
fs_nonlocal_tools.o\
579581
force_op.o\
580582
stress_op.o\
581583
wf_op.o\

source/module_base/module_device/cuda/memory_op.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,10 @@ template struct delete_memory_op<float, base_device::DEVICE_GPU>;
269269
template struct delete_memory_op<double, base_device::DEVICE_GPU>;
270270
template struct delete_memory_op<std::complex<float>, base_device::DEVICE_GPU>;
271271
template struct delete_memory_op<std::complex<double>, base_device::DEVICE_GPU>;
272+
template struct delete_memory_op<float*, base_device::DEVICE_GPU>;
273+
template struct delete_memory_op<double*, base_device::DEVICE_GPU>;
274+
template struct delete_memory_op<std::complex<float>*, base_device::DEVICE_GPU>;
275+
template struct delete_memory_op<std::complex<double>*, base_device::DEVICE_GPU>;
272276

273277
} // namespace memory
274278
} // end of namespace base_device

source/module_base/module_device/memory_op.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -141,10 +141,13 @@ template struct delete_memory_op<float, base_device::DEVICE_CPU>;
141141
template struct delete_memory_op<double, base_device::DEVICE_CPU>;
142142
template struct delete_memory_op<std::complex<float>, base_device::DEVICE_CPU>;
143143
template struct delete_memory_op<std::complex<double>, base_device::DEVICE_CPU>;
144+
template struct delete_memory_op<float*, base_device::DEVICE_CPU>;
145+
template struct delete_memory_op<double*, base_device::DEVICE_CPU>;
146+
template struct delete_memory_op<std::complex<float>*, base_device::DEVICE_CPU>;
147+
template struct delete_memory_op<std::complex<double>*, base_device::DEVICE_CPU>;
144148

145149
#if !(defined(__CUDA) || defined(__ROCM))
146150

147-
148151
template <typename FPTYPE>
149152
struct resize_memory_op<FPTYPE, base_device::DEVICE_GPU>
150153
{

source/module_base/module_device/rocm/memory_op.hip.cu

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,10 @@ template struct delete_memory_op<float, base_device::DEVICE_GPU>;
265265
template struct delete_memory_op<double, base_device::DEVICE_GPU>;
266266
template struct delete_memory_op<std::complex<float>, base_device::DEVICE_GPU>;
267267
template struct delete_memory_op<std::complex<double>, base_device::DEVICE_GPU>;
268+
template struct delete_memory_op<float*, base_device::DEVICE_GPU>;
269+
template struct delete_memory_op<double*, base_device::DEVICE_GPU>;
270+
template struct delete_memory_op<std::complex<float>*, base_device::DEVICE_GPU>;
271+
template struct delete_memory_op<std::complex<double>*, base_device::DEVICE_GPU>;
268272

269273
} // namespace memory
270274
} // end of namespace base_device

source/module_esolver/esolver_ks_pw.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -910,6 +910,7 @@ void ESolver_KS_PW<T, Device>::cal_stress(ModuleBase::matrix& stress)
910910
: reinterpret_cast<psi::Psi<std::complex<double>, Device>*>(this->kspw_psi);
911911
ss.cal_stress(stress,
912912
GlobalC::ucell,
913+
&GlobalC::ppcell,
913914
this->pw_rhod,
914915
&GlobalC::ucell.symm,
915916
&this->sf,

source/module_esolver/esolver_sdft_pw.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -253,7 +253,9 @@ void ESolver_SDFT_PW::cal_stress(ModuleBase::matrix& stress)
253253
pw_wfc,
254254
this->psi,
255255
this->stowf,
256-
pelec->charge);
256+
pelec->charge,
257+
&GlobalC::ppcell,
258+
GlobalC::ucell);
257259
}
258260

259261
void ESolver_SDFT_PW::after_all_runners()

source/module_hamilt_pw/hamilt_pwdft/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ list(APPEND objects
88
operator_pw/meta_pw.cpp
99
operator_pw/velocity_pw.cpp
1010
operator_pw/operator_pw.cpp
11+
forces_nl.cpp
1112
forces.cpp
1213
forces_us.cpp
1314
stress_func_cc.cpp
@@ -32,6 +33,7 @@ list(APPEND objects
3233
global.cpp
3334
parallel_grid.cpp
3435
elecond.cpp
36+
fs_nonlocal_tools.cpp
3537
)
3638

3739
add_library(

source/module_hamilt_pw/hamilt_pwdft/VNL_in_pw.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -495,7 +495,6 @@ void pseudopot_cell_vnl::getvnl(Device* ctx, const int& ik, std::complex<FPTYPE>
495495
npw,
496496
this->wfcpw->npwk_max,
497497
this->nhm,
498-
GlobalV::NQX,
499498
this->tab.getBound2(),
500499
this->tab.getBound3(),
501500
atom_na,

0 commit comments

Comments
 (0)