Merge pull request #98 from deepmodeling/dev-gmy

anyangml · web-flow · commit 4f92efb9d84f · 2025-03-03T12:58:22.000+09:00
feat: batch infer
diff --git a/CITATION.cff b/CITATION.cff
@@ -17,7 +17,8 @@ authors:
     orcid: https://orcid.org/0000-0001-6242-0439
   - family-names: Guo
     given-names: Mingyu
-    affiliation: AI for Science Institute, Beijing
+    affiliation: DP Technology; School of Chemistry, Sun Yat-sen University, Guangzhou
+    orcid: https://orcid.org/0009-0008-3744-1543
   - family-names: Zhang
     given-names: Duo
     affiliation: AI for Science Institute, Beijing
diff --git a/lambench/models/ase_models.py b/lambench/models/ase_models.py
@@ -156,6 +156,16 @@ def evaluate(self, task) -> Optional[dict[str, float]]:
                         self, task.test_data, distance, task.workdir
                     )
                 }
+            elif task.task_name == "batch_inference_efficiency":
+                from lambench.tasks.calculator.infer_efficiency.infer_efficiency import (
+                    run_batch_infer,
+                )
+                warmup_ratio = task.calculator_params.get("warmup_ratio", 0.2)
+                return {
+                    "metrics": run_batch_infer(
+                        self,  task.test_data, warmup_ratio
+                    )
+                }
             else:
                 raise NotImplementedError(f"Task {task.task_name} is not implemented.")
 
diff --git a/lambench/tasks/calculator/calculator_tasks.yml b/lambench/tasks/calculator/calculator_tasks.yml
@@ -8,3 +8,8 @@ phonon_mdr:
   test_data: /bohr/lambench-phonon-y7vk/v1/MDR_PBE_phonon
   calculator_params:
     distance: 0.01
+batch_inference_efficiency:
+  test_data: /bohr/batch-infer-7ipn/v1/batch_infer_confs
+  calculator_params:
+    warmup_ratio: 0.2
+
diff --git a/lambench/tasks/calculator/infer_efficiency/infer_efficiency.py b/lambench/tasks/calculator/infer_efficiency/infer_efficiency.py
@@ -0,0 +1,105 @@
+from lambench.models.ase_models import ASEModel
+from ase import Atoms
+from ase.io import read
+import logging
+import time
+import numpy as np
+from typing import List, Dict, Tuple
+from pathlib import Path
+
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s', filemode='w', filename='infer.log')
+
+
+def run_batch_infer(
+    model: ASEModel,
+    test_data: Path,
+    warmup_ratio: float
+) -> Dict[str, Dict[str, float]]:
+    """
+    Infer for all batches, return average time and success rate for each system.
+    """
+    results = {}
+    subfolders = [subfolder for subfolder in test_data.iterdir() if subfolder.is_dir()]
+    for subfolder in subfolders:
+        system_name = subfolder.name
+        try:
+            batch_result = run_one_batch_infer(model, subfolder, warmup_ratio)
+            average_time = batch_result["average_time_per_step"]
+            success_rate = batch_result["success_rate"]
+            results[system_name] = {
+                "average_time_per_step": average_time,
+                "success_rate": success_rate
+            }
+            logging.info(f"Batch inference completed for system {system_name} with average time {average_time} s and success rate {success_rate:.2f}%")
+        except Exception as e:
+            logging.error(f"Error in batch inference for system {system_name}: {e}")
+            results[system_name] = {
+                "average_time_per_step": None,
+                "success_rate": 0.0
+            }
+    return results
+
+
+def run_one_batch_infer(
+    model: ASEModel,
+    test_data: Path,
+    warmup_ratio: float
+) -> Dict[str, float]:
+    """
+    Infer for one batch, return averaged time and success rate, starting timing at warmup_ratio.
+    """
+    test_files = list(test_data.glob("*.vasp"))
+    test_atoms = [read(file) for file in test_files]
+    start_index = int(len(test_atoms) * warmup_ratio)
+    total_time = 0
+    valid_steps = 0
+    successful_inferences = 0
+    total_inferences = len(test_atoms)
+
+    for i, atoms in enumerate(test_atoms):
+        atoms.calc = model.calc
+        start = time.time()
+        try:
+            energy = atoms.get_potential_energy()
+            forces = atoms.get_forces()
+            stress = atoms.get_stress()
+            volume = atoms.get_volume()
+            stress_tensor = np.zeros((3, 3))
+            stress_tensor[0, 0] = stress[0]
+            stress_tensor[1, 1] = stress[1]
+            stress_tensor[2, 2] = stress[2]
+            stress_tensor[1, 2] = stress[3]
+            stress_tensor[0, 2] = stress[4]
+            stress_tensor[0, 1] = stress[5]
+            stress_tensor[2, 1] = stress[3]
+            stress_tensor[2, 0] = stress[4]
+            stress_tensor[1, 0] = stress[5]
+            virial = -stress_tensor * volume
+            successful_inferences += 1
+        except Exception as e:
+            logging.error(f"Error in inference for {str(atoms.symbols)}: {e}")
+            continue
+
+        end = time.time()
+        elapsed_time = end - start
+
+        if i >= start_index:
+            total_time += elapsed_time
+            valid_steps += 1
+
+        logging.info(f"Inference completed for system {str(atoms.symbols)} in {elapsed_time} s")
+
+    if valid_steps > 0:
+        average_time_per_step = total_time / valid_steps
+    else:
+        average_time_per_step = np.nan
+
+    if total_inferences > 0:
+        success_rate = (successful_inferences / total_inferences) * 100
+    else:
+        success_rate = 0.0
+
+    return {
+        "average_time_per_step": average_time_per_step,
+        "success_rate": success_rate
+    }