Skip to content

Commit 31bfafc

Browse files
Merge remote-tracking branch 'upstream/develop' into develop
2 parents 32dc099 + 5411878 commit 31bfafc

File tree

104 files changed

+2195
-1330
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

104 files changed

+2195
-1330
lines changed

docs/advanced/input_files/input-main.md

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -732,7 +732,7 @@ If only one value is set (such as `kspacing 0.5`), then kspacing values of a/b/c
732732

733733
[back to top](#full-list-of-input-keywords)
734734

735-
## Variables related to input files
735+
## Input files
736736

737737
These variables are used to control parameters related to input files.
738738

@@ -1656,16 +1656,16 @@ These variables are used to control the geometry relaxation.
16561656

16571657
[back to top](#full-list-of-input-keywords)
16581658

1659-
## Variables related to output information
1659+
## Output information
16601660

16611661
These variables are used to control the output of properties.
16621662

16631663
### out_freq_ion
16641664

16651665
- **Type**: Integer
1666-
- **Description**: After self-consistent-field calculations, control the interval of ionic movements for printing properties. These properties cover charge density, local potential, electrostatic potential, Hamiltonian matrix, overlap matrix, density matrix, Mulliken population analysis and so on.
1666+
- **Description**: Control the interval to print information every few ion steps. These properties cover charge density, local potential, electrostatic potential, Hamiltonian matrix, overlap matrix, density matrix, Mulliken population analysis and so on.
16671667
- **Default**: 0
1668-
- **Note**: If you want to use out_freq_elec, please set out_freq_ion to 1, otherwise out_freq_elec is useless
1668+
- **Note**: The integer indicates to print information every 'out_freq_ion' ion steps.
16691669

16701670
### out_freq_elec
16711671

@@ -1679,11 +1679,11 @@ These variables are used to control the output of properties.
16791679
- **Description**:
16801680
The first integer controls whether to output the charge density on real space grids:
16811681
- 1: Output the charge density (in Bohr^-3) on real space grids into the density files in the folder `OUT.${suffix}`. The files are named as:
1682-
- nspin = 1: `chgs1.cube`;
1682+
- nspin = 1: `chg.cube`;
16831683
- nspin = 2: `chgs1.cube`, and `chgs2.cube`;
16841684
- nspin = 4: `chgs1.cube`, `chgs2.cube`, `chgs3.cube`, and `chgs4.cube`;
16851685
Note that by using the Meta-GGA functional, additional files containing the kinetic energy density will be output with the following names:
1686-
- nspin = 1: `taus1.cube`;
1686+
- nspin = 1: `tau.cube`;
16871687
- nspin = 2: `taus1.cube`, and `taus2.cube`;
16881688
- nspin = 4: `taus1.cube`, `taus2.cube`, `taus3.cube`, and `taus4.cube`;
16891689
- 2: On top of 1, also output the initial charge density files with a suffix name as '_ini', such as `taus1_ini.cube`, etc.

python/pyabacus/CONTRIBUTING.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -189,7 +189,7 @@ list(APPEND _diago
189189
${HSOLVER_PATH}/diago_david.cpp
190190
${HSOLVER_PATH}/diag_const_nums.cpp
191191
${HSOLVER_PATH}/diago_iter_assist.cpp
192-
${HSOLVER_PATH}/kernels/dngvd_op.cpp
192+
${HSOLVER_PATH}/kernels/hegvd_op.cpp
193193
${HSOLVER_PATH}/kernels/bpcg_kernel_op.cpp
194194
${BASE_PATH}/kernels/math_kernel_op.cpp
195195
${BASE_PATH}/kernels/math_kernel_op_vec.cpp

python/pyabacus/src/hsolver/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ list(APPEND _diago
99
${HSOLVER_PATH}/diago_pxxxgvx.cpp
1010

1111

12-
${HSOLVER_PATH}/kernels/dngvd_op.cpp
12+
${HSOLVER_PATH}/kernels/hegvd_op.cpp
1313
${HSOLVER_PATH}/kernels/bpcg_kernel_op.cpp
1414
# dependency
1515
${BASE_PATH}/kernels/math_kernel_op.cpp

python/pyabacus/src/hsolver/py_diago_cg.hpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ class PyDiagoCG
145145
std::copy(hpsi_ptr, hpsi_ptr + nvec * ld_psi, hpsi_out.data<std::complex<double>>());
146146
};
147147

148-
auto subspace_func = [] (const ct::Tensor& psi_in, ct::Tensor& psi_out) { /*do nothing*/ };
148+
auto subspace_func = [](const ct::Tensor& psi_in, ct::Tensor& psi_out, const bool S_orth) { /*do nothing*/ };
149149

150150
auto spsi_func = [this] (const ct::Tensor& psi_in, ct::Tensor& spsi_out) {
151151
const auto ndim = psi_in.shape().ndim();

python/pyabacus/src/hsolver/py_diago_dav_subspace.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -144,7 +144,6 @@ class PyDiagoDavSubspace
144144
dav_ndim,
145145
tol,
146146
max_iter,
147-
need_subspace,
148147
comm_info,
149148
diag_subspace,
150149
nb2d

source/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ list(APPEND device_srcs
3535
source_pw/module_pwdft/kernels/meta_op.cpp
3636
source_pw/module_stodft/kernels/hpsi_norm_op.cpp
3737
source_basis/module_pw/kernels/pw_op.cpp
38-
source_hsolver/kernels/dngvd_op.cpp
38+
source_hsolver/kernels/hegvd_op.cpp
3939
source_hsolver/kernels/bpcg_kernel_op.cpp
4040
source_estate/kernels/elecstate_op.cpp
4141

@@ -70,7 +70,7 @@ if(USE_CUDA)
7070
source_pw/module_stodft/kernels/cuda/hpsi_norm_op.cu
7171
source_pw/module_pwdft/kernels/cuda/onsite_op.cu
7272
source_basis/module_pw/kernels/cuda/pw_op.cu
73-
source_hsolver/kernels/cuda/dngvd_op.cu
73+
source_hsolver/kernels/cuda/hegvd_op.cu
7474
source_hsolver/kernels/cuda/bpcg_kernel_op.cu
7575
source_estate/kernels/cuda/elecstate_op.cu
7676

@@ -101,7 +101,7 @@ if(USE_ROCM)
101101
source_pw/module_pwdft/kernels/rocm/onsite_op.hip.cu
102102
source_pw/module_stodft/kernels/rocm/hpsi_norm_op.hip.cu
103103
source_basis/module_pw/kernels/rocm/pw_op.hip.cu
104-
source_hsolver/kernels/rocm/dngvd_op.hip.cu
104+
source_hsolver/kernels/rocm/hegvd_op.hip.cu
105105
source_hsolver/kernels/rocm/bpcg_kernel_op.hip.cu
106106
source_estate/kernels/rocm/elecstate_op.hip.cu
107107

source/Makefile.Objects

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ OBJS_ESOLVER=esolver.o\
266266
esolver_lj.o\
267267
esolver_dp.o\
268268
esolver_of.o\
269+
esolver_of_tddft.o\
269270
esolver_of_tool.o\
270271
esolver_of_interface.o\
271272
pw_others.o\
@@ -360,6 +361,7 @@ OBJS_HAMILT_OF=kedf_tf.o\
360361
kedf_xwm.o\
361362
kedf_lkt.o\
362363
kedf_manager.o\
364+
evolve_ofdft.o\
363365

364366
OBJS_HAMILT_LCAO=hamilt_lcao.o\
365367
operator_lcao.o\
@@ -395,7 +397,7 @@ OBJS_HSOLVER=diago_cg.o\
395397
hsolver_lcaopw.o\
396398
hsolver_pw_sdft.o\
397399
diago_iter_assist.o\
398-
dngvd_op.o\
400+
hegvd_op.o\
399401
bpcg_kernel_op.o\
400402
diag_const_nums.o\
401403
diag_hs_para.o\
@@ -579,6 +581,8 @@ OBJS_IO=input_conv.o\
579581
output_log.o\
580582
output_mat_sparse.o\
581583
ctrl_output_lcao.o\
584+
ctrl_output_fp.o\
585+
ctrl_output_pw.o\
582586
para_json.o\
583587
abacusjson.o\
584588
general_info.o\

source/source_base/kernels/dsp/dsp_connector.cpp

Lines changed: 22 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,9 @@
66
extern "C"
77
{
88
#define complex_double ignore_complex_double
9-
#include <mt_hthread_blas.h> // MTBLAS_TRANSPOSE etc
9+
#include <mt_hthread_blas.h> // include faster mtblas kernels
1010
#undef complex_double
11-
#include <mtblas_interface.h> // gemm
11+
#include <mtblas_interface.h> // include normal mtblas kernels that automatically operate memory, but slower.
1212
}
1313
namespace mtfunc
1414
{
@@ -22,45 +22,42 @@ void dspDestoryHandle(int id)
2222
{
2323
hthread_dev_close(id);
2424
std::cout << " ** DSP closed on cluster " << id << " **" << std::endl;
25-
} // Close dsp cluster at the end
25+
} // Close dsp cluster at the end of the program
2626

27-
MTBLAS_TRANSPOSE convertBLASTranspose(const char* blasTrans)
27+
// MTBlas secretly removed its MTBLAS_TRANSPOSE data type and used the original CBLAS_TRANSPOSE. So this function is modified.
28+
29+
CBLAS_TRANSPOSE convertBLASTranspose(const char* blasTrans)
2830
{
2931
switch (blasTrans[0])
3032
{
3133
case 'N':
3234
case 'n':
33-
return MtblasNoTrans;
35+
return CblasNoTrans;
3436
case 'T':
3537
case 't':
36-
return MtblasTrans;
38+
return CblasTrans;
3739
case 'C':
3840
case 'c':
39-
return MtblasConjTrans;
41+
return CblasConjTrans;
4042
default:
4143
std::cout << "Invalid BLAS transpose parameter!! Use default instead." << std::endl;
42-
return MtblasNoTrans;
44+
return CblasNoTrans;
4345
}
44-
} // Used to convert normal transpost char to mtblas transpose flag
46+
} // Used to convert normal transpost char to cblas transpose flag
4547

4648
void* malloc_ht(size_t bytes, int cluster_id)
4749
{
48-
// std::cout << "MALLOC " << cluster_id;
4950
void* ptr = hthread_malloc((int)cluster_id, bytes, HT_MEM_RW);
50-
// std::cout << ptr << " SUCCEED" << std::endl;;
5151
return ptr;
52-
}
52+
} // Malloc on dsp. Used to replace original malloc
53+
5354

54-
// Used to replace original malloc
5555

5656
void free_ht(void* ptr)
5757
{
58-
// std::cout << "FREE " << ptr;
5958
hthread_free(ptr);
60-
// std::cout << " FREE SUCCEED" << std::endl;
61-
}
59+
} // Free on dsp. Used to replace original free
6260

63-
// Used to replace original free
6461

6562
void sgemm_mt_(const char* transa,
6663
const char* transb,
@@ -77,7 +74,7 @@ void sgemm_mt_(const char* transa,
7774
const int* ldc,
7875
int cluster_id)
7976
{
80-
mtblas_sgemm(MTBLAS_ORDER::MtblasColMajor,
77+
mtblas_sgemm(CBLAS_ORDER::CblasColMajor,
8178
convertBLASTranspose(transa),
8279
convertBLASTranspose(transb),
8380
*m,
@@ -109,7 +106,7 @@ void dgemm_mt_(const char* transa,
109106
const int* ldc,
110107
int cluster_id)
111108
{
112-
mtblas_dgemm(MTBLAS_ORDER::MtblasColMajor,
109+
mtblas_dgemm(CBLAS_ORDER::CblasColMajor,
113110
convertBLASTranspose(transa),
114111
convertBLASTranspose(transb),
115112
*m,
@@ -141,7 +138,7 @@ void zgemm_mt_(const char* transa,
141138
const int* ldc,
142139
int cluster_id)
143140
{
144-
mtblas_zgemm(MTBLAS_ORDER::MtblasColMajor,
141+
mtblas_zgemm(CBLAS_ORDER::CblasColMajor,
145142
convertBLASTranspose(transa),
146143
convertBLASTranspose(transb),
147144
*m,
@@ -173,7 +170,7 @@ void cgemm_mt_(const char* transa,
173170
const int* ldc,
174171
int cluster_id)
175172
{
176-
mtblas_cgemm(MTBLAS_ORDER::MtblasColMajor,
173+
mtblas_cgemm(CBLAS_ORDER::CblasColMajor,
177174
convertBLASTranspose(transa),
178175
convertBLASTranspose(transb),
179176
*m,
@@ -207,7 +204,7 @@ void sgemm_mth_(const char* transa,
207204
const int* ldc,
208205
int cluster_id)
209206
{
210-
mt_hthread_sgemm(MTBLAS_ORDER::MtblasColMajor,
207+
mt_hthread_sgemm(CBLAS_ORDER::CblasColMajor,
211208
convertBLASTranspose(transa),
212209
convertBLASTranspose(transb),
213210
*m,
@@ -239,7 +236,7 @@ void dgemm_mth_(const char* transa,
239236
const int* ldc,
240237
int cluster_id)
241238
{
242-
mt_hthread_dgemm(MTBLAS_ORDER::MtblasColMajor,
239+
mt_hthread_dgemm(CBLAS_ORDER::CblasColMajor,
243240
convertBLASTranspose(transa),
244241
convertBLASTranspose(transb),
245242
*m,
@@ -275,7 +272,7 @@ void zgemm_mth_(const char* transa,
275272
*alp = *alpha;
276273
std::complex<double>* bet = (std::complex<double>*)malloc_ht(sizeof(std::complex<double>), cluster_id);
277274
*bet = *beta;
278-
mt_hthread_zgemm(MTBLAS_ORDER::MtblasColMajor,
275+
mt_hthread_zgemm(CBLAS_ORDER::CblasColMajor,
279276
convertBLASTranspose(transa),
280277
convertBLASTranspose(transb),
281278
*m,
@@ -314,7 +311,7 @@ void cgemm_mth_(const char* transa,
314311
std::complex<float>* bet = (std::complex<float>*)malloc_ht(sizeof(std::complex<float>), cluster_id);
315312
*bet = *beta;
316313

317-
mt_hthread_cgemm(MTBLAS_ORDER::MtblasColMajor,
314+
mt_hthread_cgemm(CBLAS_ORDER::CblasColMajor,
318315
convertBLASTranspose(transa),
319316
convertBLASTranspose(transb),
320317
*m,

source/source_base/module_container/ATen/kernels/cuda/lapack.cu

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ struct lapack_potrf<T, DEVICE_GPU> {
8888
};
8989

9090
template <typename T>
91-
struct lapack_dnevd<T, DEVICE_GPU> {
91+
struct lapack_heevd<T, DEVICE_GPU> {
9292
using Real = typename GetTypeReal<T>::type;
9393
void operator()(
9494
const char& jobz,
@@ -97,12 +97,12 @@ struct lapack_dnevd<T, DEVICE_GPU> {
9797
const int& dim,
9898
Real* eigen_val)
9999
{
100-
cuSolverConnector::dnevd(cusolver_handle, jobz, uplo, dim, Mat, dim, eigen_val);
100+
cuSolverConnector::heevd(cusolver_handle, jobz, uplo, dim, Mat, dim, eigen_val);
101101
}
102102
};
103103

104104
template <typename T>
105-
struct lapack_dngvd<T, DEVICE_GPU> {
105+
struct lapack_hegvd<T, DEVICE_GPU> {
106106
using Real = typename GetTypeReal<T>::type;
107107
void operator()(
108108
const int& itype,
@@ -113,7 +113,7 @@ struct lapack_dngvd<T, DEVICE_GPU> {
113113
const int& dim,
114114
Real* eigen_val)
115115
{
116-
cuSolverConnector::dngvd(cusolver_handle, itype, jobz, uplo, dim, Mat_A, dim, Mat_B, dim, eigen_val);
116+
cuSolverConnector::hegvd(cusolver_handle, itype, jobz, uplo, dim, Mat_A, dim, Mat_B, dim, eigen_val);
117117
}
118118
};
119119

@@ -175,15 +175,15 @@ template struct lapack_potrf<double, DEVICE_GPU>;
175175
template struct lapack_potrf<std::complex<float>, DEVICE_GPU>;
176176
template struct lapack_potrf<std::complex<double>, DEVICE_GPU>;
177177

178-
template struct lapack_dnevd<float, DEVICE_GPU>;
179-
template struct lapack_dnevd<double, DEVICE_GPU>;
180-
template struct lapack_dnevd<std::complex<float>, DEVICE_GPU>;
181-
template struct lapack_dnevd<std::complex<double>, DEVICE_GPU>;
178+
template struct lapack_heevd<float, DEVICE_GPU>;
179+
template struct lapack_heevd<double, DEVICE_GPU>;
180+
template struct lapack_heevd<std::complex<float>, DEVICE_GPU>;
181+
template struct lapack_heevd<std::complex<double>, DEVICE_GPU>;
182182

183-
template struct lapack_dngvd<float, DEVICE_GPU>;
184-
template struct lapack_dngvd<double, DEVICE_GPU>;
185-
template struct lapack_dngvd<std::complex<float>, DEVICE_GPU>;
186-
template struct lapack_dngvd<std::complex<double>, DEVICE_GPU>;
183+
template struct lapack_hegvd<float, DEVICE_GPU>;
184+
template struct lapack_hegvd<double, DEVICE_GPU>;
185+
template struct lapack_hegvd<std::complex<float>, DEVICE_GPU>;
186+
template struct lapack_hegvd<std::complex<double>, DEVICE_GPU>;
187187

188188
template struct lapack_getrf<float, DEVICE_GPU>;
189189
template struct lapack_getrf<double, DEVICE_GPU>;

0 commit comments

Comments
 (0)