33import re
44
55import numpy as np
6- from dpdata .abacus .scf import get_cell , get_coords , get_nele_from_stru
6+ from dpdata .abacus .stru import get_frame_from_stru , make_unlabeled_stru
77
88from dpgen .auto_test .lib import vasp
99
@@ -208,90 +208,37 @@ def make_abacus_scf_stru(
208208 fp_pp_files ,
209209 fp_orb_files = None ,
210210 fp_dpks_descriptor = None ,
211- fp_params = None ,
212211 type_map = None ,
213212 pporb = "" , # pull all pp orb dpks files to pporb folder
214213):
215- atom_names = sys_data ["atom_names" ]
216- atom_numbs = sys_data ["atom_numbs" ]
217- if type_map is None :
218- type_map = atom_names
219-
220- assert len (atom_names ) == len (atom_numbs ), "Please check the name of atoms. "
221- cell = sys_data ["cells" ].reshape ([3 , 3 ])
222- coord = sys_data ["coords" ].reshape ([sum (atom_numbs ), 3 ])
223- # volume = np.linalg.det(cell)
224- # lattice_const = np.power(volume, 1/3)
225-
226- ret = "ATOMIC_SPECIES\n "
227- for iatom in range (len (atom_names )):
228- assert atom_names [iatom ] in type_map , (
229- f"element { atom_names [iatom ]} is not defined in type_map"
230- )
231- idx = type_map .index (atom_names [iatom ])
232- if "atom_masses" not in sys_data :
233- ret += (
234- atom_names [iatom ]
235- + " 1.00 "
236- + os .path .join (pporb , fp_pp_files [idx ])
237- + "\n "
238- )
239- else :
240- ret += (
241- atom_names [iatom ]
242- + " {:.3f} " .format (sys_data ["atom_masses" ][iatom ])
243- + os .path .join (pporb , fp_pp_files [idx ])
244- + "\n "
245- )
246-
247- if fp_params is not None and "lattice_constant" in fp_params :
248- ret += "\n LATTICE_CONSTANT\n "
249- ret += (
250- str (fp_params ["lattice_constant" ]) + "\n \n "
251- ) # in Bohr, in this way coord and cell are in Angstrom
252- else :
253- ret += "\n LATTICE_CONSTANT\n "
254- ret += str (1 / bohr2ang ) + "\n \n "
255-
256- ret += "LATTICE_VECTORS\n "
257- for ix in range (3 ):
258- for iy in range (3 ):
259- ret += str (cell [ix ][iy ]) + " "
260- ret += "\n "
261- ret += "\n "
262-
263- ret += "ATOMIC_POSITIONS\n "
264- ret += "Cartesian # Cartesian(Unit is LATTICE_CONSTANT)\n "
265- # ret += "\n"
266- natom_tot = 0
267- for iele in range (len (atom_names )):
268- ret += atom_names [iele ] + "\n "
269- ret += "0.0\n "
270- ret += str (atom_numbs [iele ]) + "\n "
271- for iatom in range (atom_numbs [iele ]):
272- ret += "%.12f %.12f %.12f %d %d %d\n " % ( # noqa: UP031
273- coord [natom_tot , 0 ],
274- coord [natom_tot , 1 ],
275- coord [natom_tot , 2 ],
276- 1 ,
277- 1 ,
278- 1 ,
279- )
280- natom_tot += 1
281- assert natom_tot == sum (atom_numbs )
282-
214+ sys_data_copy = copy .deepcopy (sys_data )
215+ # re-construct the path of files by pporb + file name
216+ fp_pp_files = [os .path .join (pporb , i ) for i in fp_pp_files ]
283217 if fp_orb_files is not None :
284- ret += "\n NUMERICAL_ORBITAL\n "
285- assert len (fp_orb_files ) == len (type_map )
286- for iatom in range (len (atom_names )):
287- idx = type_map .index (atom_names [iatom ])
288- ret += os .path .join (pporb , fp_orb_files [idx ]) + "\n "
289-
218+ fp_orb_files = [os .path .join (pporb , i ) for i in fp_orb_files ]
290219 if fp_dpks_descriptor is not None :
291- ret += "\n NUMERICAL_DESCRIPTOR\n "
292- ret += os .path .join (pporb , fp_dpks_descriptor ) + "\n "
220+ fp_dpks_descriptor = os .path .join (pporb , fp_dpks_descriptor )
221+
222+ # we need to make sure that the shape of cells and coords are the same
223+ # and if they are 2D, we need to convert them to 3D
224+ cells = np .array (sys_data ["cells" ])
225+ coords = np .array (sys_data ["coords" ])
226+ assert len (cells .shape ) == len (coords .shape ), (
227+ "cells and coords should have the same shape."
228+ )
293229
294- return ret
230+ if len (cells .shape ) == 2 :
231+ sys_data_copy ["cells" ] = np .array ([cells ])
232+ sys_data_copy ["coords" ] = np .array ([coords ])
233+ c = make_unlabeled_stru (
234+ sys_data_copy ,
235+ 0 ,
236+ pp_file = fp_pp_files ,
237+ numerical_orbital = fp_orb_files ,
238+ numerical_descriptor = fp_dpks_descriptor ,
239+ )
240+
241+ return c
295242
296243
297244def get_abacus_input_parameters (INPUT ):
@@ -306,106 +253,37 @@ def get_abacus_input_parameters(INPUT):
306253 return input_parameters
307254
308255
309- def get_mass_from_STRU (geometry_inlines , atom_names ):
310- nele = get_nele_from_stru (geometry_inlines )
311- assert nele > 0
312- mass_list = [0 for i in atom_names ]
313- pp_file_list = [i for i in atom_names ]
314- for iline , line in enumerate (geometry_inlines ):
315- if line .split () == []:
316- continue
317- if "ATOMIC_SPECIES" == line .split ()[0 ]:
318- for iele1 in range (1 , 1 + nele ):
319- for iele2 in range (nele ):
320- if geometry_inlines [iline + iele1 ].split ()[0 ] == atom_names [iele2 ]:
321- mass_list [iele2 ] = float (
322- geometry_inlines [iline + iele1 ].split ()[1 ]
323- )
324- pp_file_list [iele2 ] = geometry_inlines [iline + iele1 ].split ()[2 ]
325- for iele in range (len (mass_list )):
326- assert mass_list [iele ] > 0
327- return mass_list , pp_file_list
328-
329-
330- def get_natoms_from_stru (geometry_inlines ):
331- key_words_list = [
332- "ATOMIC_SPECIES" ,
333- "NUMERICAL_ORBITAL" ,
334- "LATTICE_CONSTANT" ,
335- "LATTICE_VECTORS" ,
336- "ATOMIC_POSITIONS" ,
337- "NUMERICAL_DESCRIPTOR" ,
338- ]
339- keyword_sequence = []
340- keyword_line_index = []
341- atom_names = []
342- atom_numbs = []
343- tmp_line = []
344- for i in geometry_inlines :
345- if i .strip () != "" :
346- tmp_line .append (i )
347- for iline , line in enumerate (tmp_line ):
348- if line .split () == []:
349- continue
350- have_key_word = False
351- for keyword in key_words_list :
352- if keyword in line and keyword == line .split ()[0 ]:
353- keyword_sequence .append (keyword )
354- keyword_line_index .append (iline )
355- assert len (keyword_line_index ) == len (keyword_sequence )
356- assert len (keyword_sequence ) > 0
357- keyword_line_index .append (len (tmp_line ))
358- for idx , keyword in enumerate (keyword_sequence ):
359- if keyword == "ATOMIC_POSITIONS" :
360- iline = keyword_line_index [idx ] + 2
361- while iline < keyword_line_index [idx + 1 ] - 1 :
362- atom_names .append (tmp_line [iline ].split ()[0 ])
363- atom_numbs .append (int (tmp_line [iline + 2 ].split ()[0 ]))
364- iline += 3 + atom_numbs [- 1 ]
365- return atom_names , atom_numbs
366-
367-
368- def get_additional_from_STRU (geometry_inlines , nele ):
369- dpks_descriptor_kw = "NUMERICAL_DESCRIPTOR"
370- orb_file_kw = "NUMERICAL_ORBITAL"
371- dpks_descriptor = None
372- orb_file = None
373- for iline in range (len (geometry_inlines )):
374- if len (geometry_inlines [iline ]) > 0 :
375- if orb_file_kw == geometry_inlines [iline ].split ()[0 ]:
376- orb_file = []
377- for iele in range (nele ):
378- orb_file .append (geometry_inlines [iline + iele + 1 ].strip ())
379- if dpks_descriptor_kw == geometry_inlines [iline ].split ()[0 ]:
380- dpks_descriptor = geometry_inlines [iline + 1 ].strip ()
381- return orb_file , dpks_descriptor
382-
383-
384- def get_abacus_STRU (STRU , INPUT = None , n_ele = None ):
385- # read in geometry from STRU file. n_ele is the number of elements.
386- # Either n_ele or INPUT should be provided.
387- with open (STRU ) as fp :
388- geometry_inlines = fp .read ().split ("\n " )
389- for iline , line in enumerate (geometry_inlines ):
390- if line .split () == [] or len (line ) == 0 :
391- del geometry_inlines [iline ]
392- geometry_inlines .append ("" )
393- celldm , cell = get_cell (geometry_inlines )
394- atom_names , natoms , types , coords = get_coords (celldm , cell , geometry_inlines )
395- masses , pp_files = get_mass_from_STRU (geometry_inlines , atom_names )
396- orb_files , dpks_descriptor = get_additional_from_STRU (geometry_inlines , len (masses ))
397- data = {}
398- data ["atom_names" ] = atom_names
399- data ["atom_numbs" ] = natoms
400- data ["atom_types" ] = types
401- data ["cells" ] = cell
402- data ["coords" ] = coords
403- data ["atom_masses" ] = (
404- masses # Notice that this key is not defined in dpdata system.
405- )
406- data ["pp_files" ] = pp_files
407- data ["orb_files" ] = orb_files
408- data ["dpks_descriptor" ] = dpks_descriptor
256+ def get_abacus_STRU (STRU ):
257+ """Read STRU file and return a dictionary containing the structure information.
258+
259+ Args:
260+ STRU (str): The path of STRU file.
261+
262+ Returns
263+ -------
264+ dict: A dictionary containing the structure information.
265+ {
266+ "atom_names": list of str,
267+ "atom_numbs": list of int,
268+ "atom_masses": list of float,
269+ "coords": np.ndarray,
270+ "cells": np.ndarray,
271+ "pp_files": list of str,
272+ "orb_files": list of str,
273+ "dpks_descriptor": str,
274+ }
275+ """
276+ data = get_frame_from_stru (STRU )
277+ data ["atom_masses" ] = data .pop ("masses" )
278+ data ["cells" ] = data .pop ("cells" )[0 ]
279+ data ["coords" ] = data .pop ("coords" )[0 ]
280+ assert "pp_files" in data , "pp_files should be provided in STRU file."
281+ if None in data ["pp_files" ]:
282+ data ["pp_files" ] = None
283+ if "orb_files" not in data :
284+ data ["orb_files" ] = None
285+ if "dpks_descriptor" not in data :
286+ data ["dpks_descriptor" ] = None
409287 return data
410288
411289
@@ -419,7 +297,21 @@ def make_supercell_abacus(from_struct, super_cell):
419297 # )
420298 for idx_atm in from_struct ["atom_types" ]:
421299 new_types += [idx_atm ] * super_cell [0 ] * super_cell [1 ] * super_cell [2 ]
422- to_struct ["atom_types" ] = new_types
300+ to_struct ["atom_types" ] = np .array (new_types )
301+
302+ # expand move, spins
303+ for ikey in ["move" , "spins" ]:
304+ if ikey in from_struct :
305+ new_list = []
306+ for ia in range (sum (from_struct ["atom_numbs" ])):
307+ new_list += (
308+ [from_struct [ikey ][0 ][ia ]]
309+ * super_cell [0 ]
310+ * super_cell [1 ]
311+ * super_cell [2 ]
312+ )
313+ to_struct [ikey ] = np .array ([new_list ])
314+
423315 to_atom_num = (
424316 sum (from_struct ["atom_numbs" ]) * super_cell [0 ] * super_cell [1 ] * super_cell [2 ]
425317 )
0 commit comments