diff --git a/source/source_estate/module_dm/cal_edm_tddft.cpp b/source/source_estate/module_dm/cal_edm_tddft.cpp index 4c3587f252..bd33477f97 100644 --- a/source/source_estate/module_dm/cal_edm_tddft.cpp +++ b/source/source_estate/module_dm/cal_edm_tddft.cpp @@ -15,10 +15,10 @@ namespace elecstate { void print_local_matrix(std::ostream& os, const std::complex* matrix_data, - int local_rows, // pv.nrow - int local_cols, // pv.ncol - const std::string& matrix_name = "", - int rank = -1) + int local_rows, + int local_cols, + const std::string& matrix_name, + int rank) { if (!matrix_name.empty() || rank >= 0) { @@ -59,6 +59,7 @@ void cal_edm_tddft(Parallel_Orbitals& pv, K_Vectors& kv, hamilt::Hamilt>* p_hamilt) { + ModuleBase::TITLE("elecstate", "cal_edm_tddft"); ModuleBase::timer::tick("elecstate", "cal_edm_tddft"); const int nlocal = PARAM.globalv.nlocal; @@ -311,6 +312,7 @@ void cal_edm_tddft_tensor(Parallel_Orbitals& pv, K_Vectors& kv, hamilt::Hamilt>* p_hamilt) { + ModuleBase::TITLE("elecstate", "cal_edm_tddft_tensor"); ModuleBase::timer::tick("elecstate", "cal_edm_tddft_tensor"); const int nlocal = PARAM.globalv.nlocal; @@ -541,6 +543,7 @@ void cal_edm_tddft_tensor_lapack(Parallel_Orbitals& pv, K_Vectors& kv, hamilt::Hamilt>* p_hamilt) { + ModuleBase::TITLE("elecstate", "cal_edm_tddft_tensor_lapack"); ModuleBase::timer::tick("elecstate", "cal_edm_tddft_tensor_lapack"); const int nlocal = PARAM.globalv.nlocal; diff --git a/source/source_estate/module_dm/cal_edm_tddft.h b/source/source_estate/module_dm/cal_edm_tddft.h index 5350de4c92..b442bd90cd 100644 --- a/source/source_estate/module_dm/cal_edm_tddft.h +++ b/source/source_estate/module_dm/cal_edm_tddft.h @@ -8,6 +8,13 @@ namespace elecstate { +void print_local_matrix(std::ostream& os, + const std::complex* matrix_data, + int local_rows, // pv.nrow + int local_cols, // pv.ncol + const std::string& matrix_name = "", + int rank = -1); + void cal_edm_tddft(Parallel_Orbitals& pv, LCAO_domain::Setup_DM>& dmat, K_Vectors& kv, diff --git a/source/source_lcao/module_rt/evolve_elec.cpp b/source/source_lcao/module_rt/evolve_elec.cpp index 22498819c3..e43c5da4f3 100644 --- a/source/source_lcao/module_rt/evolve_elec.cpp +++ b/source/source_lcao/module_rt/evolve_elec.cpp @@ -46,7 +46,7 @@ void Evolve_elec::solve_psi(const int& istep, { phm->updateHk(ik); - ModuleBase::timer::tick("Efficiency", "evolve_k"); + ModuleBase::timer::tick("TD_Efficiency", "evolve_k"); psi->fix_k(ik); psi_laststep->fix_k(ik); @@ -70,6 +70,8 @@ void Evolve_elec::solve_psi(const int& istep, } else { + ModuleBase::timer::tick("TD_Efficiency", "host_device_comm"); + const int len_psi_k_1 = use_lapack ? nband : psi->get_nbands(); const int len_psi_k_2 = use_lapack ? nlocal : psi->get_nbasis(); const int len_HS_laststep = use_lapack ? nlocal * nlocal : para_orb.nloc; @@ -135,6 +137,8 @@ void Evolve_elec::solve_psi(const int& istep, len_HS_laststep); syncmem_double_h2d_op()(ekb_tensor.data(), &(ekb(ik, 0)), nband); + ModuleBase::timer::tick("TD_Efficiency", "host_device_comm"); + evolve_psi_tensor(nband, nlocal, &(para_orb), @@ -149,6 +153,7 @@ void Evolve_elec::solve_psi(const int& istep, print_matrix, use_lapack); + ModuleBase::timer::tick("TD_Efficiency", "host_device_comm"); // Need to distribute global psi back to all processes if (use_lapack) { @@ -192,11 +197,14 @@ void Evolve_elec::solve_psi(const int& istep, MPI_Bcast(&(ekb(ik, 0)), nband, MPI_DOUBLE, root_proc, MPI_COMM_WORLD); } #endif + + ModuleBase::timer::tick("TD_Efficiency", "host_device_comm"); + // GlobalV::ofs_running << "Print ekb: " << std::endl; // ekb.print(GlobalV::ofs_running); } - ModuleBase::timer::tick("Efficiency", "evolve_k"); + ModuleBase::timer::tick("TD_Efficiency", "evolve_k"); } // end k ModuleBase::timer::tick("Evolve_elec", "solve_psi"); diff --git a/source/source_lcao/module_rt/evolve_psi.cpp b/source/source_lcao/module_rt/evolve_psi.cpp index b730e574ee..94d65597e8 100644 --- a/source/source_lcao/module_rt/evolve_psi.cpp +++ b/source/source_lcao/module_rt/evolve_psi.cpp @@ -30,11 +30,8 @@ void evolve_psi(const int nband, std::ofstream& ofs_running, const int print_matrix) { - ModuleBase::TITLE("Evolve_psi", "evolve_psi"); - // ofs_running << " Evolving electronic wave functions begins" << std::endl; - + ModuleBase::TITLE("module_rt", "evolve_psi"); time_t time_start = time(nullptr); - // ofs_running << " Start Time : " << ctime(&time_start); #ifdef __MPI @@ -112,12 +109,10 @@ void evolve_psi(const int nband, delete[] Hold; delete[] U_operator; -#endif +#endif // __MPI time_t time_end = time(nullptr); - ModuleBase::GlobalFunc::OUT_TIME("evolve(std::complex)", time_start, time_end); - - // ofs_running << " Evolving electronic wave functions ends" << std::endl; + ModuleBase::GlobalFunc::OUT_TIME("evolve_psi", time_start, time_end); return; } @@ -137,6 +132,9 @@ void evolve_psi_tensor(const int nband, const int print_matrix, const bool use_lapack) { + ModuleBase::TITLE("module_rt", "evolve_psi_tensor"); + time_t time_start = time(nullptr); + // ct_device_type = ct::DeviceType::CpuDevice or ct::DeviceType::GpuDevice ct::DeviceType ct_device_type = ct::DeviceTypeToEnum::value; // ct_Device = ct::DEVICE_CPU or ct::DEVICE_GPU @@ -154,17 +152,12 @@ void evolve_psi_tensor(const int nband, } #endif // __CUDA - // ofs_running << " evolve_psi_tensor::start " << std::endl; - - ModuleBase::TITLE("Evolve_psi", "evolve_psi"); - time_t time_start = time(nullptr); - // ofs_running << " Start Time : " << ctime(&time_start); - #ifdef __MPI - hamilt::MatrixBlock> h_mat, s_mat; p_hamilt->matrix(h_mat, s_mat); + ModuleBase::timer::tick("TD_Efficiency", "host_device_comm"); + // Create Tensor objects for temporary data and sync from host to device const int len_HS = use_lapack ? nlocal * nlocal : pv->nloc; ct::Tensor Stmp(ct::DataType::DT_COMPLEX_DOUBLE, ct_device_type, ct::TensorShape({len_HS})); @@ -198,6 +191,8 @@ void evolve_psi_tensor(const int nband, syncmem_complex_h2d_op()(Hold.data>(), h_mat.p, len_HS); } + ModuleBase::timer::tick("TD_Efficiency", "host_device_comm"); + ct::Tensor U_operator(ct::DataType::DT_COMPLEX_DOUBLE, ct_device_type, ct::TensorShape({len_HS})); U_operator.zero(); @@ -238,7 +233,7 @@ void evolve_psi_tensor(const int nband, /// @brief compute U_operator /// @input Stmp, Htmp, print_matrix /// @output U_operator - Propagator prop(propagator, pv, PARAM.mdp.md_dt); + Propagator prop(propagator, pv, PARAM.inp.td_dt); prop.compute_propagator_tensor(nlocal, Stmp, Htmp, @@ -298,14 +293,8 @@ void evolve_psi_tensor(const int nband, compute_ekb_tensor_lapack(pv, nband, nlocal, Hold, psi_k, ekb, ofs_running); } } - #endif // __MPI - time_t time_end = time(nullptr); - ModuleBase::GlobalFunc::OUT_TIME("evolve(std::complex)", time_start, time_end); - - // ofs_running << " evolve_psi_tensor::end " << std::endl; - #if ((defined __CUDA) /* || (defined __ROCM) */) if (ct_device_type == ct::DeviceType::GpuDevice) { @@ -315,6 +304,9 @@ void evolve_psi_tensor(const int nband, } #endif // __CUDA + time_t time_end = time(nullptr); + ModuleBase::GlobalFunc::OUT_TIME("evolve_psi", time_start, time_end); + return; }