|
| 1 | +""" |
| 2 | +Code adapted from the following paper and code: |
| 3 | +
|
| 4 | +@misc{loew2024universalmachinelearninginteratomic, |
| 5 | + title={Universal Machine Learning Interatomic Potentials are Ready for Phonons}, |
| 6 | + author={Antoine Loew and Dewen Sun and Hai-Chen Wang and Silvana Botti and Miguel A. L. Marques}, |
| 7 | + year={2024}, |
| 8 | + eprint={2412.16551}, |
| 9 | + archivePrefix={arXiv}, |
| 10 | + primaryClass={cond-mat.mtrl-sci}, |
| 11 | + url={https://arxiv.org/abs/2412.16551}, |
| 12 | +} |
| 13 | +""" |
| 14 | + |
| 15 | +import logging |
| 16 | +from pathlib import Path |
| 17 | +from typing import Optional |
| 18 | + |
| 19 | +import numpy as np |
| 20 | +import pandas as pd |
| 21 | +import phonopy |
| 22 | +import yaml |
| 23 | +from ase import Atoms |
| 24 | +from phonopy.harmonic.dynmat_to_fc import get_commensurate_points |
| 25 | +from sklearn.metrics import mean_absolute_error |
| 26 | +from tqdm import tqdm |
| 27 | + |
| 28 | +from lambench.models.ase_models import ASEModel |
| 29 | +from lambench.tasks.calculator.phonon.phonon_utils import ( |
| 30 | + THz_TO_K, |
| 31 | + ase_to_phonopy_atoms, |
| 32 | + phonopy_to_ase_atoms, |
| 33 | +) |
| 34 | + |
| 35 | + |
| 36 | +def run_phonon_simulation_single( |
| 37 | + model: ASEModel, |
| 38 | + phonon_file: Path, |
| 39 | + distance: float, |
| 40 | + workdir: Path, |
| 41 | +) -> Optional[dict[str, float]]: |
| 42 | + """ |
| 43 | + Run phonon related calculations for a single given phonon file. |
| 44 | +
|
| 45 | + Parameters: |
| 46 | + model: ASEModel object. |
| 47 | + phonon_file: Path to the phonon file. |
| 48 | + distance: Distance for displacements. |
| 49 | + workdir: Path to the working directory. |
| 50 | + """ |
| 51 | + try: |
| 52 | + # Step 1: Run relaxation |
| 53 | + atoms: Atoms = phonopy_to_ase_atoms(phonon_file) |
| 54 | + atoms = model.run_ase_relaxation(atoms, model.calc, fmax=1e-3) |
| 55 | + |
| 56 | + # Step 2: Convert ASE Atoms object to PhonopyAtoms object |
| 57 | + phonon_atoms = ase_to_phonopy_atoms(atoms) |
| 58 | + phonon = phonopy.Phonopy( |
| 59 | + phonon_atoms, supercell_matrix=atoms.info["supercell_matrix"] |
| 60 | + ) |
| 61 | + |
| 62 | + # Step 3: Generate displacements |
| 63 | + phonon.generate_displacements(distance=distance, is_diagonal=False) |
| 64 | + |
| 65 | + # Step 4: Calculate force constants |
| 66 | + forcesets = [] |
| 67 | + |
| 68 | + for frame in phonon.supercells_with_displacements: |
| 69 | + frame_atom = Atoms( |
| 70 | + cell=frame.cell, |
| 71 | + symbols=frame.symbols, |
| 72 | + scaled_positions=frame.scaled_positions, |
| 73 | + pbc=True, |
| 74 | + ) |
| 75 | + frame_atom.calc = model.calc |
| 76 | + forces = frame_atom.get_forces() |
| 77 | + forcesets.append(forces) |
| 78 | + |
| 79 | + phonon.forces = forcesets |
| 80 | + phonon.produce_force_constants() |
| 81 | + phonon.symmetrize_force_constants() |
| 82 | + |
| 83 | + # Step 5: save output files |
| 84 | + |
| 85 | + phonon.save(workdir / phonon_file.name, settings={"force_constants": True}) |
| 86 | + |
| 87 | + # Step 6: Calculate thermal properties |
| 88 | + phonon.init_mesh() |
| 89 | + phonon.run_mesh() |
| 90 | + phonon.run_thermal_properties(temperatures=(300,)) |
| 91 | + thermal_dict = phonon.get_thermal_properties_dict() |
| 92 | + |
| 93 | + commensurate_q = get_commensurate_points(phonon.supercell_matrix) |
| 94 | + phonon_freqs = np.array([phonon.get_frequencies(q) for q in commensurate_q]) |
| 95 | + |
| 96 | + # Step 7: Updata output files |
| 97 | + with open(workdir / phonon_file.name, "r") as f: |
| 98 | + output = yaml.load(f, yaml.FullLoader) |
| 99 | + |
| 100 | + output["free_e"] = thermal_dict["free_energy"].tolist() |
| 101 | + output["entropy"] = thermal_dict["entropy"].tolist() |
| 102 | + output["heat_capacity"] = thermal_dict["heat_capacity"].tolist() |
| 103 | + output["phonon_freq"] = phonon_freqs.tolist() |
| 104 | + |
| 105 | + # TODO: optional: update and save output files |
| 106 | + return { |
| 107 | + "mp_id": phonon_file.name.split(".")[0], |
| 108 | + "entropy": output["entropy"][0], |
| 109 | + "heat_capacity": output["heat_capacity"][0], |
| 110 | + "free_energy": output["free_e"][0], |
| 111 | + "max_freq": np.max(np.array(phonon_freqs)) * THz_TO_K, |
| 112 | + } |
| 113 | + |
| 114 | + except Exception as e: |
| 115 | + logging.error(f"Error occured for {str(phonon_file.name)}: {e}") |
| 116 | + return None |
| 117 | + |
| 118 | + |
| 119 | +def run_phonon_simulation( |
| 120 | + model: ASEModel, |
| 121 | + test_data: Path, |
| 122 | + distance: float, |
| 123 | + workdir: Path, |
| 124 | +) -> dict[str, float]: |
| 125 | + """ |
| 126 | + This function runs phonon simulations for a list of test systems using the given model. |
| 127 | + """ |
| 128 | + test_files = list(test_data.glob("*.yaml.bz2")) |
| 129 | + if len(test_files) == 0: |
| 130 | + logging.error("No test files found.") |
| 131 | + return {} |
| 132 | + logging.info(f"Running phonon simulations for {len(test_files)} files...") |
| 133 | + |
| 134 | + dataframe_rows = [] |
| 135 | + for test_file in tqdm(test_files): |
| 136 | + result = run_phonon_simulation_single( |
| 137 | + model, |
| 138 | + test_file, |
| 139 | + distance, |
| 140 | + workdir, |
| 141 | + ) |
| 142 | + logging.info(f"Simulation completed for system {str(test_file.name)}.\n") |
| 143 | + |
| 144 | + if result is not None: |
| 145 | + dataframe_rows.append(result) |
| 146 | + preds = pd.DataFrame(dataframe_rows) |
| 147 | + |
| 148 | + # Post-processing |
| 149 | + results = {} |
| 150 | + try: |
| 151 | + labels = pd.read_csv(test_data / "pbe.csv") |
| 152 | + TOTAL_RECORDS = len(labels) |
| 153 | + preds.sort_values("mp_id", inplace=True) |
| 154 | + labels.sort_values("mp_id", inplace=True) |
| 155 | + |
| 156 | + # Filter predictions and labels based on valid mp_ids |
| 157 | + valid_preds = preds[ |
| 158 | + np.isfinite(preds[["free_energy", "heat_capacity"]]).all(axis=1) |
| 159 | + ] |
| 160 | + valid_mp_ids = set(valid_preds["mp_id"]) |
| 161 | + labels = labels[labels["mp_id"].isin(valid_mp_ids)] |
| 162 | + preds = valid_preds |
| 163 | + |
| 164 | + success_rate = len(preds) / TOTAL_RECORDS |
| 165 | + mae_wmax = mean_absolute_error(labels["max_freq"], preds["max_freq"]) |
| 166 | + mae_s = mean_absolute_error(labels["entropy"], preds["entropy"]) |
| 167 | + mae_f = mean_absolute_error(labels["free_energy"], preds["free_energy"]) |
| 168 | + mae_c = mean_absolute_error(labels["heat_capacity"], preds["heat_capacity"]) |
| 169 | + results = { |
| 170 | + "success_rate": success_rate, |
| 171 | + "mae_max_freq": mae_wmax, |
| 172 | + "mae_entropy": mae_s, |
| 173 | + "mae_free_energy": mae_f, |
| 174 | + "mae_heat_capacity": mae_c, |
| 175 | + } |
| 176 | + except Exception as e: |
| 177 | + logging.error(f"Error occured during post-processing: {e}") |
| 178 | + return results |
0 commit comments