Merge pull request #89 from deepmodeling/feat/support-phonon-tasks

anyangml · web-flow · commit ef1799850c5c · 2025-02-26T18:03:02.000+08:00
Feat: support phonon tasks
diff --git a/lambench/models/ase_models.py b/lambench/models/ase_models.py
@@ -3,14 +3,17 @@
 from pathlib import Path
 from typing import Optional
 
-import ase
 import dpdata
 import numpy as np
 from ase.calculators.calculator import Calculator
+from ase import Atoms
 from ase.io import write
 from tqdm import tqdm
 
 from lambench.models.basemodel import BaseLargeAtomModel
+from ase.optimize import FIRE
+from ase.constraints import FixSymmetry
+from ase.filters import FrechetCellFilter
 
 
 class ASEModel(BaseLargeAtomModel):
@@ -81,7 +84,9 @@ def evaluate(self, task) -> Optional[dict[str, float]]:
             return self.run_ase_dptest(self.calc, task.test_data)
         elif isinstance(task, CalculatorTask):
             if task.task_name == "nve_md":
-                from lambench.tasks.calculator.nve_md import run_md_nve_simulation
+                from lambench.tasks.calculator.nve_md.nve_md import (
+                    run_md_nve_simulation,
+                )
 
                 num_steps = task.calculator_params.get("num_steps", 1000)
                 timestep = task.calculator_params.get("timestep", 1.0)
@@ -91,6 +96,18 @@ def evaluate(self, task) -> Optional[dict[str, float]]:
                         self, num_steps, timestep, temperature_K
                     )
                 }
+            elif task.task_name == "phonon_mdr":
+                from lambench.tasks.calculator.phonon.phonon import (
+                    run_phonon_simulation,
+                )
+
+                task.workdir.mkdir(exist_ok=True)
+                distance = task.calculator_params.get("distance", 0.01)
+                return {
+                    "metrics": run_phonon_simulation(
+                        self, task.test_data, distance, task.workdir
+                    )
+                }
             else:
                 raise NotImplementedError(f"Task {task.task_name} is not implemented.")
 
@@ -124,7 +141,7 @@ def run_ase_dptest(calc: Calculator, test_data: Path) -> dict:
                 sys = dpdata.LabeledSystem(filepth, fmt="deepmd/npy")
             for ls in tqdm(sys, desc="Set", leave=False):  # type: ignore
                 for frame in tqdm(ls, desc="Frames", leave=False):
-                    atoms: ase.Atoms = frame.to_ase_structure()[0]  # type: ignore
+                    atoms: Atoms = frame.to_ase_structure()[0]  # type: ignore
                     atoms.calc = calc
 
                     # Energy
@@ -133,7 +150,9 @@ def run_ase_dptest(calc: Calculator, test_data: Path) -> dict:
                         if not np.isfinite(energy_predict):
                             raise ValueError("Energy prediction is non-finite.")
                     except (ValueError, RuntimeError):
-                        file = Path(f"failed_structures/{calc.name}/{atoms.symbols}.cif")
+                        file = Path(
+                            f"failed_structures/{calc.name}/{atoms.symbols}.cif"
+                        )
                         file.parent.mkdir(parents=True, exist_ok=True)
                         write(file, atoms)
                         logging.error(
@@ -229,3 +248,27 @@ def run_ase_dptest(calc: Calculator, test_data: Path) -> dict:
                 }
             )
         return res
+
+    @staticmethod
+    def run_ase_relaxation(
+        atoms: Atoms,
+        calc: Calculator,
+        fmax: float = 5e-3,
+        steps: int = 500,
+        fix_symmetry: bool = True,
+        relax_cell: bool = True,
+    ) -> Optional[Atoms]:
+        atoms.calc = calc
+        if fix_symmetry:
+            atoms.set_constraint(FixSymmetry(atoms))
+        if relax_cell:
+            atoms = FrechetCellFilter(atoms)
+        opt = FIRE(atoms, trajectory=None, logfile=None)
+        try:
+            opt.run(fmax=fmax, steps=steps)
+        except Exception as e:
+            logging.error(f"Relaxation failed: {e}")
+            return None
+        if relax_cell:
+            atoms = atoms.atoms
+        return atoms
diff --git a/lambench/tasks/calculator/calculator_tasks.yml b/lambench/tasks/calculator/calculator_tasks.yml
@@ -4,3 +4,7 @@ nve_md:
     num_steps: 10000
     timestep: 1.0
     temperature: 300
+phonon_mdr:
+  test_data: /bohr/lambench-phonon-y7vk/v1/MDR_PBE_phonon
+  calculator_params:
+    distance: 0.01
diff --git a/lambench/tasks/calculator/nve_md/nve_md.py b/lambench/tasks/calculator/nve_md/nve_md.py
@@ -11,7 +11,7 @@
 import numpy as np
 import time
 from typing import Optional
-from lambench.tasks.calculator.nve_md_data import TEST_DATA
+from lambench.tasks.calculator.nve_md.nve_md_data import TEST_DATA
 import logging
 
 
diff --git a/lambench/tasks/calculator/nve_md/nve_md_data.py b/lambench/tasks/calculator/nve_md/nve_md_data.py
diff --git a/lambench/tasks/calculator/phonon/phonon.py b/lambench/tasks/calculator/phonon/phonon.py
@@ -0,0 +1,178 @@
+"""
+Code adapted from the following paper and code:
+
+@misc{loew2024universalmachinelearninginteratomic,
+      title={Universal Machine Learning Interatomic Potentials are Ready for Phonons},
+      author={Antoine Loew and Dewen Sun and Hai-Chen Wang and Silvana Botti and Miguel A. L. Marques},
+      year={2024},
+      eprint={2412.16551},
+      archivePrefix={arXiv},
+      primaryClass={cond-mat.mtrl-sci},
+      url={https://arxiv.org/abs/2412.16551},
+}
+"""
+
+import logging
+from pathlib import Path
+from typing import Optional
+
+import numpy as np
+import pandas as pd
+import phonopy
+import yaml
+from ase import Atoms
+from phonopy.harmonic.dynmat_to_fc import get_commensurate_points
+from sklearn.metrics import mean_absolute_error
+from tqdm import tqdm
+
+from lambench.models.ase_models import ASEModel
+from lambench.tasks.calculator.phonon.phonon_utils import (
+    THz_TO_K,
+    ase_to_phonopy_atoms,
+    phonopy_to_ase_atoms,
+)
+
+
+def run_phonon_simulation_single(
+    model: ASEModel,
+    phonon_file: Path,
+    distance: float,
+    workdir: Path,
+) -> Optional[dict[str, float]]:
+    """
+    Run phonon related calculations for a single given phonon file.
+
+    Parameters:
+        model: ASEModel object.
+        phonon_file: Path to the phonon file.
+        distance: Distance for displacements.
+        workdir: Path to the working directory.
+    """
+    try:
+        # Step 1: Run relaxation
+        atoms: Atoms = phonopy_to_ase_atoms(phonon_file)
+        atoms = model.run_ase_relaxation(atoms, model.calc, fmax=1e-3)
+
+        # Step 2: Convert ASE Atoms object to PhonopyAtoms object
+        phonon_atoms = ase_to_phonopy_atoms(atoms)
+        phonon = phonopy.Phonopy(
+            phonon_atoms, supercell_matrix=atoms.info["supercell_matrix"]
+        )
+
+        # Step 3: Generate displacements
+        phonon.generate_displacements(distance=distance, is_diagonal=False)
+
+        # Step 4: Calculate force constants
+        forcesets = []
+
+        for frame in phonon.supercells_with_displacements:
+            frame_atom = Atoms(
+                cell=frame.cell,
+                symbols=frame.symbols,
+                scaled_positions=frame.scaled_positions,
+                pbc=True,
+            )
+            frame_atom.calc = model.calc
+            forces = frame_atom.get_forces()
+            forcesets.append(forces)
+
+        phonon.forces = forcesets
+        phonon.produce_force_constants()
+        phonon.symmetrize_force_constants()
+
+        # Step 5: save output files
+
+        phonon.save(workdir / phonon_file.name, settings={"force_constants": True})
+
+        # Step 6: Calculate thermal properties
+        phonon.init_mesh()
+        phonon.run_mesh()
+        phonon.run_thermal_properties(temperatures=(300,))
+        thermal_dict = phonon.get_thermal_properties_dict()
+
+        commensurate_q = get_commensurate_points(phonon.supercell_matrix)
+        phonon_freqs = np.array([phonon.get_frequencies(q) for q in commensurate_q])
+
+        # Step 7: Updata output files
+        with open(workdir / phonon_file.name, "r") as f:
+            output = yaml.load(f, yaml.FullLoader)
+
+        output["free_e"] = thermal_dict["free_energy"].tolist()
+        output["entropy"] = thermal_dict["entropy"].tolist()
+        output["heat_capacity"] = thermal_dict["heat_capacity"].tolist()
+        output["phonon_freq"] = phonon_freqs.tolist()
+
+        # TODO: optional: update and save output files
+        return {
+            "mp_id": phonon_file.name.split(".")[0],
+            "entropy": output["entropy"][0],
+            "heat_capacity": output["heat_capacity"][0],
+            "free_energy": output["free_e"][0],
+            "max_freq": np.max(np.array(phonon_freqs)) * THz_TO_K,
+        }
+
+    except Exception as e:
+        logging.error(f"Error occured for {str(phonon_file.name)}: {e}")
+        return None
+
+
+def run_phonon_simulation(
+    model: ASEModel,
+    test_data: Path,
+    distance: float,
+    workdir: Path,
+) -> dict[str, float]:
+    """
+    This function runs phonon simulations for a list of test systems using the given model.
+    """
+    test_files = list(test_data.glob("*.yaml.bz2"))
+    if len(test_files) == 0:
+        logging.error("No test files found.")
+        return {}
+    logging.info(f"Running phonon simulations for {len(test_files)} files...")
+
+    dataframe_rows = []
+    for test_file in tqdm(test_files):
+        result = run_phonon_simulation_single(
+            model,
+            test_file,
+            distance,
+            workdir,
+        )
+        logging.info(f"Simulation completed for system {str(test_file.name)}.\n")
+
+        if result is not None:
+            dataframe_rows.append(result)
+    preds = pd.DataFrame(dataframe_rows)
+
+    # Post-processing
+    results = {}
+    try:
+        labels = pd.read_csv(test_data / "pbe.csv")
+        TOTAL_RECORDS = len(labels)
+        preds.sort_values("mp_id", inplace=True)
+        labels.sort_values("mp_id", inplace=True)
+
+        # Filter predictions and labels based on valid mp_ids
+        valid_preds = preds[
+            np.isfinite(preds[["free_energy", "heat_capacity"]]).all(axis=1)
+        ]
+        valid_mp_ids = set(valid_preds["mp_id"])
+        labels = labels[labels["mp_id"].isin(valid_mp_ids)]
+        preds = valid_preds
+
+        success_rate = len(preds) / TOTAL_RECORDS
+        mae_wmax = mean_absolute_error(labels["max_freq"], preds["max_freq"])
+        mae_s = mean_absolute_error(labels["entropy"], preds["entropy"])
+        mae_f = mean_absolute_error(labels["free_energy"], preds["free_energy"])
+        mae_c = mean_absolute_error(labels["heat_capacity"], preds["heat_capacity"])
+        results = {
+            "success_rate": success_rate,
+            "mae_max_freq": mae_wmax,
+            "mae_entropy": mae_s,
+            "mae_free_energy": mae_f,
+            "mae_heat_capacity": mae_c,
+        }
+    except Exception as e:
+        logging.error(f"Error occured during post-processing: {e}")
+    return results
diff --git a/lambench/tasks/calculator/phonon/phonon_utils.py b/lambench/tasks/calculator/phonon/phonon_utils.py
@@ -0,0 +1,35 @@
+from ase import Atoms
+from phonopy.structure.atoms import PhonopyAtoms
+from pathlib import Path
+import phonopy
+
+
+# Constants unit conversion
+THz_TO_K = 47.9924
+
+
+def ase_to_phonopy_atoms(atoms: Atoms) -> PhonopyAtoms:
+    """
+    Convert ASE Atoms object to PhonopyAtoms object.
+    """
+    # Extract atomic symbols and positions
+    symbols = atoms.get_chemical_symbols()
+    positions = atoms.get_positions()
+    cell = atoms.get_cell()
+    masses = atoms.get_masses()
+
+    return PhonopyAtoms(symbols=symbols, positions=positions, cell=cell, masses=masses)
+
+
+def phonopy_to_ase_atoms(phonon_file: Path) -> Atoms:
+    """
+    Convert PhonopyAtoms object to ASE Atoms object.
+    """
+    phonon = phonopy.load(phonon_file)
+    return Atoms(
+        cell=phonon.unitcell.cell,
+        symbols=phonon.unitcell.symbols,
+        scaled_positions=phonon.unitcell.scaled_positions,
+        pbc=True,
+        info={"supercell_matrix": phonon.supercell_matrix},
+    )
diff --git a/pyproject.toml b/pyproject.toml
@@ -38,6 +38,8 @@ orb = ["orb-models","pynanoflann@git+https://github.com/dwastberg/pynanoflann#eg
 sevenn = ["sevenn"]
 test = ["pytest"]
 dflow = ["pydflow", "lbg"]
+phonopy = ["phonopy@git+https://github.com/phonopy/phonopy.git", "scikit-learn"]
+
 
 [project.urls]
 Homepage = "https://github.com/deepmodeling/LAMBench"
diff --git a/test/tasks/calculator/test_nve_md.py b/test/tasks/calculator/test_nve_md.py
@@ -1,4 +1,4 @@
-from lambench.tasks.calculator.nve_md import (
+from lambench.tasks.calculator.nve_md.nve_md import (
     nve_simulation_single,
     run_md_nve_simulation,
 )

Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-from lambench.tasks.calculator.nve_md import (`
	`1`	`+from lambench.tasks.calculator.nve_md.nve_md import (`
`2`	`2`	`nve_simulation_single,`
`3`	`3`	`run_md_nve_simulation,`
`4`	`4`	`)`