diff --git a/deepmd/pd/entrypoints/main.py b/deepmd/pd/entrypoints/main.py
index 4e47dbfe77..fe092111b1 100644
--- a/deepmd/pd/entrypoints/main.py
+++ b/deepmd/pd/entrypoints/main.py
@@ -7,6 +7,7 @@
     Path,
 )
 from typing import (
+    Any,
     Optional,
     Union,
 )
@@ -80,15 +81,15 @@
 
 
 def get_trainer(
-    config,
-    init_model=None,
-    restart_model=None,
-    finetune_model=None,
-    force_load=False,
-    init_frz_model=None,
-    shared_links=None,
-    finetune_links=None,
-):
+    config: dict[str, Any],
+    init_model: Optional[str] = None,
+    restart_model: Optional[str] = None,
+    finetune_model: Optional[str] = None,
+    force_load: bool = False,
+    init_frz_model: Optional[str] = None,
+    shared_links: Optional[dict[str, Any]] = None,
+    finetune_links: Optional[dict[str, Any]] = None,
+) -> training.Trainer:
     multi_task = "model_dict" in config.get("model", {})
 
     # Initialize DDP
@@ -98,17 +99,22 @@ def get_trainer(
         fleet.init(is_collective=True)
 
     def prepare_trainer_input_single(
-        model_params_single, data_dict_single, rank=0, seed=None
-    ):
+        model_params_single: dict[str, Any],
+        data_dict_single: dict[str, Any],
+        rank: int = 0,
+        seed: Optional[int] = None,
+    ) -> tuple[DpLoaderSet, Optional[DpLoaderSet], Optional[DPPath]]:
         training_dataset_params = data_dict_single["training_data"]
         validation_dataset_params = data_dict_single.get("validation_data", None)
         validation_systems = (
             validation_dataset_params["systems"] if validation_dataset_params else None
         )
         training_systems = training_dataset_params["systems"]
-        training_systems = process_systems(training_systems)
+        trn_patterns = training_dataset_params.get("rglob_patterns", None)
+        training_systems = process_systems(training_systems, patterns=trn_patterns)
         if validation_systems is not None:
-            validation_systems = process_systems(validation_systems)
+            val_patterns = validation_dataset_params.get("rglob_patterns", None)
+            validation_systems = process_systems(validation_systems, val_patterns)
 
         # stat files
         stat_file_path_single = data_dict_single.get("stat_file", None)
@@ -342,6 +348,7 @@ def freeze(
     model: str,
     output: str = "frozen_model.json",
     head: Optional[str] = None,
+    do_atomic_virial: bool = False,
 ) -> None:
     paddle.set_flags(
         {
@@ -374,7 +381,7 @@ def freeze(
                 None,  # fparam
                 None,  # aparam
                 # InputSpec([], dtype="bool", name="do_atomic_virial"),  # do_atomic_virial
-                False,  # do_atomic_virial
+                do_atomic_virial,  # do_atomic_virial
             ],
             full_graph=True,
         )
@@ -396,7 +403,7 @@ def freeze(
                 None,  # fparam
                 None,  # aparam
                 # InputSpec([], dtype="bool", name="do_atomic_virial"),  # do_atomic_virial
-                False,  # do_atomic_virial
+                do_atomic_virial,  # do_atomic_virial
                 (
                     InputSpec([-1], "int64", name="send_list"),
                     InputSpec([-1], "int32", name="send_proc"),
@@ -409,6 +416,26 @@ def freeze(
             ],
             full_graph=True,
         )
+    for method_name in [
+        "get_buffer_rcut",
+        "get_buffer_type_map",
+        "get_buffer_dim_fparam",
+        "get_buffer_dim_aparam",
+        "get_buffer_intensive",
+        "get_buffer_sel_type",
+        "get_buffer_numb_dos",
+        "get_buffer_task_dim",
+    ]:
+        if hasattr(model, method_name):
+            setattr(
+                model,
+                method_name,
+                paddle.jit.to_static(
+                    getattr(model, method_name),
+                    input_spec=[],
+                    full_graph=True,
+                ),
+            )
     if output.endswith(".json"):
         output = output[:-5]
     paddle.jit.save(
diff --git a/deepmd/pd/infer/deep_eval.py b/deepmd/pd/infer/deep_eval.py
index 61c3f9e9a3..696531ed7f 100644
--- a/deepmd/pd/infer/deep_eval.py
+++ b/deepmd/pd/infer/deep_eval.py
@@ -1,4 +1,5 @@
 # SPDX-License-Identifier: LGPL-3.0-or-later
+import logging
 from typing import (
     TYPE_CHECKING,
     Any,
@@ -9,6 +10,7 @@
 
 import numpy as np
 import paddle
+from paddle import inference as paddle_inference
 
 from deepmd.dpmodel.common import PRECISION_DICT as NP_PRECISION_DICT
 from deepmd.dpmodel.output_def import (
@@ -16,16 +18,35 @@
     OutputVariableCategory,
     OutputVariableDef,
 )
+from deepmd.infer.deep_dipole import (
+    DeepDipole,
+)
+from deepmd.infer.deep_dos import (
+    DeepDOS,
+)
 from deepmd.infer.deep_eval import DeepEval as DeepEvalWrapper
 from deepmd.infer.deep_eval import (
     DeepEvalBackend,
 )
+from deepmd.infer.deep_polar import (
+    DeepGlobalPolar,
+    DeepPolar,
+)
 from deepmd.infer.deep_pot import (
     DeepPot,
 )
+from deepmd.infer.deep_property import (
+    DeepProperty,
+)
+from deepmd.infer.deep_wfc import (
+    DeepWFC,
+)
 from deepmd.pd.model.model import (
     get_model,
 )
+from deepmd.pd.model.network.network import (
+    TypeEmbedNetConsistent,
+)
 from deepmd.pd.train.wrapper import (
     ModelWrapper,
 )
@@ -42,6 +63,12 @@
     to_numpy_array,
     to_paddle_tensor,
 )
+from deepmd.utils.econf_embd import (
+    sort_element_type,
+)
+from deepmd.utils.model_branch_dict import (
+    get_model_dict,
+)
 
 if TYPE_CHECKING:
     import ase.neighborlist
@@ -50,6 +77,8 @@
         BaseModel,
     )
 
+log = logging.getLogger(__name__)
+
 
 class DeepEval(DeepEvalBackend):
     """Paddle backend implementation of DeepEval.
@@ -80,8 +109,9 @@ def __init__(
         auto_batch_size: Union[bool, int, AutoBatchSize] = True,
         neighbor_list: Optional["ase.neighborlist.NewPrimitiveNeighborList"] = None,
         head: Optional[Union[str, int]] = None,
+        no_jit: bool = False,
         **kwargs: Any,
-    ):
+    ) -> None:
         enable_prim(True)
         self.output_def = output_def
         self.model_path = model_file
@@ -114,12 +144,42 @@ def __init__(
             # model = paddle.jit.to_static(model)
             self.dp = ModelWrapper(model)
             self.dp.set_state_dict(state_dict)
+            self.rcut = self.dp.model["Default"].get_rcut()
+            self.type_map: list[str] = self.dp.model["Default"].get_type_map()
+            self.dp.eval()
+            self.static_model = False
+        elif str(self.model_path).endswith(".json"):
+            self.dp = paddle.jit.load(self.model_path[:-5])
+            self.rcut = self.dp.get_buffer_rcut().item()
+            self.type_map: list[str] = "".join(
+                [chr(x) for x in self.dp.get_buffer_type_map().numpy()]
+            ).split(" ")
+            config = paddle_inference.Config(
+                self.model_path, self.model_path.replace(".json", ".pdiparams")
+            )
+            config.enable_custom_passes(
+                ["add_shadow_output_after_dead_parameter_pass"], True
+            )
+            config.enable_use_gpu(4096, 0)
+            config.disable_glog_info()
+
+            self.predictor = paddle_inference.create_predictor(config)
+            self.coord_handle = self.predictor.get_input_handle("coord")
+            self.atype_handle = self.predictor.get_input_handle("atype")
+            self.box_handle = self.predictor.get_input_handle("box")
+
+            self.atom_energy_handle = self.predictor.get_output_handle("fetch_name_0")
+            self.atom_virial_handle = self.predictor.get_output_handle("fetch_name_1")
+            self.energy_handle = self.predictor.get_output_handle("fetch_name_2")
+            self.force_handle = self.predictor.get_output_handle("fetch_name_3")
+            self.mask_handle = self.predictor.get_output_handle("fetch_name_4")
+            self.virial_handle = self.predictor.get_output_handle("fetch_name_5")
+            self.static_model = True
         else:
-            # self.dp = paddle.jit.load(self.model_path.split(".json")[0])
-            raise ValueError(f"Unknown model file format: {self.model_path}!")
-        self.dp.eval()
-        self.rcut = self.dp.model["Default"].get_rcut()
-        self.type_map = self.dp.model["Default"].get_type_map()
+            raise NotImplementedError(
+                f"Only support .pd or .json format, but got {model_file}"
+            )
+
         if isinstance(auto_batch_size, bool):
             if auto_batch_size:
                 self.auto_batch_size = AutoBatchSize()
@@ -131,9 +191,14 @@ def __init__(
             self.auto_batch_size = auto_batch_size
         else:
             raise TypeError("auto_batch_size should be bool, int, or AutoBatchSize")
-        self._has_spin = getattr(self.dp.model["Default"], "has_spin", False)
+        self._has_spin = (
+            getattr(self.dp.model["Default"], "has_spin", False)
+            if isinstance(self.dp, ModelWrapper)
+            else False
+        )
         if callable(self._has_spin):
-            self._has_spin = self._has_spin()
+            self._has_spin = False
+        self._has_hessian = False
 
     def get_rcut(self) -> float:
         """Get the cutoff radius of this model."""
@@ -149,21 +214,56 @@ def get_type_map(self) -> list[str]:
 
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this DP."""
+        if self.static_model:
+            return self.dp.get_buffer_dim_fparam()
         return self.dp.model["Default"].get_dim_fparam()
 
     def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this DP."""
+        if self.static_model:
+            return self.dp.get_buffer_dim_aparam()
         return self.dp.model["Default"].get_dim_aparam()
 
+    def has_default_fparam(self) -> bool:
+        """Check if the model has default frame parameters."""
+        try:
+            return self.dp.model["Default"].has_default_fparam()
+        except AttributeError:
+            # for compatibility with old models
+            return False
+
     def get_intensive(self) -> bool:
         return self.dp.model["Default"].get_intensive()
 
+    def get_var_name(self) -> str:
+        """Get the name of the property."""
+        if hasattr(self.dp.model["Default"], "get_var_name") and callable(
+            getattr(self.dp.model["Default"], "get_var_name")
+        ):
+            return self.dp.model["Default"].get_var_name()
+        else:
+            raise NotImplementedError
+
     @property
     def model_type(self) -> type["DeepEvalWrapper"]:
         """The the evaluator of the model type."""
+        if self.static_model:
+            return DeepPot
         model_output_type = self.dp.model["Default"].model_output_type()
         if "energy" in model_output_type:
             return DeepPot
+        elif "dos" in model_output_type:
+            return DeepDOS
+        elif "dipole" in model_output_type:
+            return DeepDipole
+        elif "polar" in model_output_type:
+            return DeepPolar
+        elif "global_polar" in model_output_type:
+            return DeepGlobalPolar
+        elif "wfc" in model_output_type:
+            return DeepWFC
+        elif self.get_var_name() in model_output_type:
+            return DeepProperty
         else:
             raise RuntimeError("Unknown model type")
 
@@ -184,18 +284,33 @@ def get_task_dim(self) -> int:
         """Get the output dimension."""
         return self.dp.model["Default"].get_task_dim()
 
-    def get_has_efield(self):
+    def get_has_efield(self) -> bool:
         """Check if the model has efield."""
         return False
 
-    def get_ntypes_spin(self):
+    def get_ntypes_spin(self) -> int:
         """Get the number of spin atom types of this model. Only used in old implement."""
         return 0
 
-    def get_has_spin(self):
+    def get_has_spin(self) -> bool:
         """Check if the model has spin atom types."""
         return self._has_spin
 
+    def get_has_hessian(self) -> bool:
+        """Check if the model has hessian."""
+        return self._has_hessian
+
+    def get_model_branch(self) -> tuple[dict[str, str], dict[str, dict[str, Any]]]:
+        """Get the model branch information."""
+        if "model_dict" in self.model_def_script:
+            model_alias_dict, model_branch_dict = get_model_dict(
+                self.model_def_script["model_dict"]
+            )
+            return model_alias_dict, model_branch_dict
+        else:
+            # single-task model
+            return {"Default": "Default"}, {"Default": {"alias": [], "info": {}}}
+
     def eval(
         self,
         coords: np.ndarray,
@@ -301,6 +416,7 @@ def _get_request_defs(self, atomic: bool) -> list[OutputVariableDef]:
                     OutputVariableCategory.REDU,
                     OutputVariableCategory.DERV_R,
                     OutputVariableCategory.DERV_C_REDU,
+                    OutputVariableCategory.DERV_R_DERV_R,
                 )
             ]
 
@@ -323,7 +439,7 @@ def _eval_func(self, inner_func: Callable, numb_test: int, natoms: int) -> Calla
         """
         if self.auto_batch_size is not None:
 
-            def eval_func(*args, **kwargs):
+            def eval_func(*args: Any, **kwargs: Any) -> Any:
                 return self.auto_batch_size.execute_all(
                     inner_func, numb_test, natoms, *args, **kwargs
                 )
@@ -358,9 +474,127 @@ def _eval_model(
         aparam: Optional[np.ndarray],
         request_defs: list[OutputVariableDef],
     ):
-        model = self.dp.to(DEVICE)
+        if not self.static_model:
+            model = self.dp.to(DEVICE)
         prec = NP_PRECISION_DICT[RESERVED_PRECISION_DICT[GLOBAL_PD_FLOAT_PRECISION]]
 
+        nframes = coords.shape[0]
+        if len(atom_types.shape) == 1:
+            natoms = len(atom_types)
+            atom_types = np.tile(atom_types, nframes).reshape([nframes, -1])
+        else:
+            natoms = len(atom_types[0])
+
+        if self.static_model:
+            self.coord_handle.copy_from_cpu(
+                coords.reshape([nframes, natoms, 3]).astype(prec)
+            )
+            self.atype_handle.copy_from_cpu(
+                atom_types.astype(
+                    NP_PRECISION_DICT[RESERVED_PRECISION_DICT[paddle.int64]]
+                )
+            )
+            if cells is not None:
+                self.box_handle.copy_from_cpu(cells.reshape([nframes, 3, 3]))
+
+            if fparam is not None:
+                raise NotImplementedError(
+                    "fparam_input is not supported for .json files. Please use a .pd file instead."
+                )
+
+            if aparam is not None:
+                raise NotImplementedError(
+                    "aparam_input is not supported for .json files. Please use a .pd file instead."
+                )
+
+        else:
+            coord_input = paddle.to_tensor(
+                coords.reshape([nframes, natoms, 3]).astype(prec),
+                dtype=GLOBAL_PD_FLOAT_PRECISION,
+                place=DEVICE,
+            )
+            type_input = paddle.to_tensor(
+                atom_types.astype(
+                    NP_PRECISION_DICT[RESERVED_PRECISION_DICT[paddle.int64]]
+                ),
+                dtype=paddle.int64,
+                place=DEVICE,
+            )
+            if cells is not None:
+                box_input = paddle.to_tensor(
+                    cells.reshape([nframes, 3, 3]),
+                    dtype=GLOBAL_PD_FLOAT_PRECISION,
+                    place=DEVICE,
+                )
+            else:
+                box_input = None
+            if fparam is not None:
+                fparam_input = to_paddle_tensor(
+                    fparam.reshape([nframes, self.get_dim_fparam()])
+                )
+            else:
+                fparam_input = None
+            if aparam is not None:
+                aparam_input = to_paddle_tensor(
+                    aparam.reshape([nframes, natoms, self.get_dim_aparam()])
+                )
+            else:
+                aparam_input = None
+
+        do_atomic_virial = any(
+            x.category == OutputVariableCategory.DERV_C for x in request_defs
+        )
+        if self.static_model:
+            self.predictor.run()
+            batch_output = {
+                "atom_energy": self.atom_energy_handle.copy_to_cpu(),
+                "energy": self.energy_handle.copy_to_cpu(),
+                "force": self.force_handle.copy_to_cpu(),
+                "mask": self.mask_handle.copy_to_cpu(),
+                "virial": self.virial_handle.copy_to_cpu(),
+            }
+            if do_atomic_virial:
+                batch_output["atom_virial"] = self.atom_virial_handle.copy_to_cpu()
+        else:
+            batch_output = model(
+                coord_input,
+                type_input,
+                box=box_input,
+                do_atomic_virial=do_atomic_virial,
+                fparam=fparam_input,
+                aparam=aparam_input,
+            )
+            if isinstance(batch_output, tuple):
+                batch_output = batch_output[0]
+
+        results = []
+        for odef in request_defs:
+            pd_name = self._OUTDEF_DP2BACKEND[odef.name]
+            if pd_name in batch_output:
+                shape = self._get_output_shape(odef, nframes, natoms)
+                out = batch_output[pd_name].reshape(shape)
+                if not self.static_model:
+                    out = out.numpy()
+                results.append(out)
+            else:
+                shape = self._get_output_shape(odef, nframes, natoms)
+                results.append(
+                    np.full(np.abs(shape), np.nan, dtype=prec)
+                )  # this is kinda hacky
+        return tuple(results)
+
+    def _eval_model_spin(
+        self,
+        coords: np.ndarray,
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
+        spins: np.ndarray,
+        fparam: Optional[np.ndarray],
+        aparam: Optional[np.ndarray],
+        request_defs: list[OutputVariableDef],
+    ) -> tuple[np.ndarray, ...]:
+        model = self.dp.to(DEVICE)
+
         nframes = coords.shape[0]
         if len(atom_types.shape) == 1:
             natoms = len(atom_types)
@@ -369,13 +603,14 @@ def _eval_model(
             natoms = len(atom_types[0])
 
         coord_input = paddle.to_tensor(
-            coords.reshape([nframes, natoms, 3]).astype(prec),
+            coords.reshape([nframes, natoms, 3]),
             dtype=GLOBAL_PD_FLOAT_PRECISION,
             place=DEVICE,
         )
-        type_input = paddle.to_tensor(
-            atom_types.astype(NP_PRECISION_DICT[RESERVED_PRECISION_DICT[paddle.int64]]),
-            dtype=paddle.int64,
+        type_input = paddle.to_tensor(atom_types, dtype=paddle.int64, place=DEVICE)
+        spin_input = paddle.to_tensor(
+            spins.reshape([nframes, natoms, 3]),
+            dtype=GLOBAL_PD_FLOAT_PRECISION,
             place=DEVICE,
         )
         if cells is not None:
@@ -398,12 +633,14 @@ def _eval_model(
             )
         else:
             aparam_input = None
+
         do_atomic_virial = any(
-            x.category == OutputVariableCategory.DERV_C for x in request_defs
+            x.category == OutputVariableCategory.DERV_C_REDU for x in request_defs
         )
         batch_output = model(
             coord_input,
             type_input,
+            spin=spin_input,
             box=box_input,
             do_atomic_virial=do_atomic_virial,
             fparam=fparam_input,
@@ -417,28 +654,24 @@ def _eval_model(
             pd_name = self._OUTDEF_DP2BACKEND[odef.name]
             if pd_name in batch_output:
                 shape = self._get_output_shape(odef, nframes, natoms)
-                out = batch_output[pd_name].reshape(shape).numpy()
+                out = batch_output[pd_name].reshape(shape).detach().cpu().numpy()
                 results.append(out)
             else:
                 shape = self._get_output_shape(odef, nframes, natoms)
                 results.append(
-                    np.full(np.abs(shape), np.nan, dtype=prec)
+                    np.full(
+                        np.abs(shape),
+                        np.nan,
+                        dtype=NP_PRECISION_DICT[
+                            RESERVED_PRECISION_DICT[GLOBAL_PD_FLOAT_PRECISION]
+                        ],
+                    )
                 )  # this is kinda hacky
         return tuple(results)
 
-    def _eval_model_spin(
-        self,
-        coords: np.ndarray,
-        cells: Optional[np.ndarray],
-        atom_types: np.ndarray,
-        spins: np.ndarray,
-        fparam: Optional[np.ndarray],
-        aparam: Optional[np.ndarray],
-        request_defs: list[OutputVariableDef],
-    ):
-        raise NotImplementedError("_eval_model_spin is not supported yet.")
-
-    def _get_output_shape(self, odef, nframes, natoms):
+    def _get_output_shape(
+        self, odef: OutputVariableDef, nframes: int, natoms: int
+    ) -> list[int]:
         if odef.category == OutputVariableCategory.DERV_C_REDU:
             # virial
             return [nframes, *odef.shape[:-1], 9]
@@ -456,6 +689,9 @@ def _get_output_shape(self, odef, nframes, natoms):
             # Something wrong here?
             # return [nframes, *shape, natoms, 1]
             return [nframes, natoms, *odef.shape, 1]
+        elif odef.category == OutputVariableCategory.DERV_R_DERV_R:
+            return [nframes, 3 * natoms, 3 * natoms]
+            # return [nframes, *odef.shape, 3 * natoms, 3 * natoms]
         else:
             raise RuntimeError("unknown category")
 
@@ -480,7 +716,14 @@ def eval_typeebd(self) -> np.ndarray:
         deepmd.pd.model.network.network.TypeEmbedNetConsistent :
             The type embedding network.
         """
-        raise NotImplementedError("eval_typeebd is not supported yet.")
+        out = []
+        for mm in self.dp.model["Default"].sublayers():
+            if mm.__class__.__name__ == TypeEmbedNetConsistent.__name__:
+                out.append(mm(DEVICE))
+        if not out:
+            raise KeyError("The model has no type embedding networks.")
+        typeebd = paddle.concat(out, axis=1)
+        return to_numpy_array(typeebd)
 
     def get_model_def_script(self) -> str:
         """Get model definition script."""
@@ -510,6 +753,22 @@ def get_model_size(self) -> dict:
             "total": sum_param_des + sum_param_fit,
         }
 
+    def get_observed_types(self) -> dict:
+        """Get observed types (elements) of the model during data statistics.
+
+        Returns
+        -------
+        dict
+            A dictionary containing the information of observed type in the model:
+            - 'type_num': the total number of observed types in this model.
+            - 'observed_type': a list of the observed types in this model.
+        """
+        observed_type_list = self.dp.model["Default"].get_observed_type_list()
+        return {
+            "type_num": len(observed_type_list),
+            "observed_type": sort_element_type(observed_type_list),
+        }
+
     def get_model(self) -> "BaseModel":
         """Get the Paddle model.
 
@@ -560,7 +819,9 @@ def eval_descriptor(
         descriptor
             Descriptors.
         """
-        model = self.dp.model["Default"]
+        model = (
+            self.dp.model["Default"] if isinstance(self.dp, ModelWrapper) else self.dp
+        )
         model.set_eval_descriptor_hook(True)
         self.eval(
             coords,
@@ -574,3 +835,58 @@ def eval_descriptor(
         descriptor = model.eval_descriptor()
         model.set_eval_descriptor_hook(False)
         return to_numpy_array(descriptor)
+
+    def eval_fitting_last_layer(
+        self,
+        coords: np.ndarray,
+        cells: Optional[np.ndarray],
+        atom_types: np.ndarray,
+        fparam: Optional[np.ndarray] = None,
+        aparam: Optional[np.ndarray] = None,
+        **kwargs: Any,
+    ) -> np.ndarray:
+        """Evaluate fitting before last layer by using this DP.
+
+        Parameters
+        ----------
+        coords
+            The coordinates of atoms.
+            The array should be of size nframes x natoms x 3
+        cells
+            The cell of the region.
+            If None then non-PBC is assumed, otherwise using PBC.
+            The array should be of size nframes x 9
+        atom_types
+            The atom types
+            The list should contain natoms ints
+        fparam
+            The frame parameter.
+            The array can be of size :
+            - nframes x dim_fparam.
+            - dim_fparam. Then all frames are assumed to be provided with the same fparam.
+        aparam
+            The atomic parameter
+            The array can be of size :
+            - nframes x natoms x dim_aparam.
+            - natoms x dim_aparam. Then all frames are assumed to be provided with the same aparam.
+            - dim_aparam. Then all frames and atoms are provided with the same aparam.
+
+        Returns
+        -------
+        fitting
+            Fitting output before last layer.
+        """
+        model = self.dp.model["Default"]
+        model.set_eval_fitting_last_layer_hook(True)
+        self.eval(
+            coords,
+            cells,
+            atom_types,
+            atomic=False,
+            fparam=fparam,
+            aparam=aparam,
+            **kwargs,
+        )
+        fitting_net = model.eval_fitting_last_layer()
+        model.set_eval_fitting_last_layer_hook(False)
+        return to_numpy_array(fitting_net)
diff --git a/deepmd/pd/model/atomic_model/base_atomic_model.py b/deepmd/pd/model/atomic_model/base_atomic_model.py
index 803964218a..4f40117fb7 100644
--- a/deepmd/pd/model/atomic_model/base_atomic_model.py
+++ b/deepmd/pd/model/atomic_model/base_atomic_model.py
@@ -64,9 +64,9 @@ class BaseAtomicModel(paddle.nn.Layer, BaseAtomicModel_):
         of the atomic model. Implemented by removing the pairs from the nlist.
     rcond : float, optional
         The condition number for the regression of atomic energy.
-    preset_out_bias : Dict[str, list[Optional[paddle.Tensor]]], optional
+    preset_out_bias : dict[str, list[Optional[np.ndarray]]], optional
         Specifying atomic energy contribution in vacuum. Given by key:value pairs.
-        The value is a list specifying the bias. the elements can be None or np.array of output shape.
+        The value is a list specifying the bias. the elements can be None or np.ndarray of output shape.
         For example: [None, [2.]] means type 0 is not set, type 1 is set to [2.]
         The `set_davg_zero` key in the descriptor should be set.
 
@@ -84,6 +84,15 @@ def __init__(
         paddle.nn.Layer.__init__(self)
         BaseAtomicModel_.__init__(self)
         self.type_map = type_map
+        if type_map is not None:
+            self.register_buffer(
+                "buffer_type_map",
+                paddle.to_tensor([ord(c) for c in " ".join(type_map)]),
+            )
+        self.ntypes = len(self.type_map)
+        self.register_buffer(
+            "buffer_ntypes", paddle.to_tensor(self.ntypes, dtype="int64")
+        )
         self.reinit_atom_exclude(atom_exclude_types)
         self.reinit_pair_exclude(pair_exclude_types)
         self.rcond = rcond
@@ -92,7 +101,6 @@ def __init__(
 
     def init_out_stat(self) -> None:
         """Initialize the output bias."""
-        ntypes = self.get_ntypes()
         self.bias_keys: list[str] = list(self.fitting_output_def().keys())
         self.max_out_size = max(
             [self.atomic_output_def()[kk].size for kk in self.bias_keys]
@@ -106,7 +114,7 @@ def init_out_stat(self) -> None:
     def set_out_bias(self, out_bias: paddle.Tensor) -> None:
         self.out_bias = out_bias
 
-    def __setitem__(self, key, value) -> None:
+    def __setitem__(self, key: str, value: paddle.Tensor) -> None:
         if key in ["out_bias"]:
             self.out_bias = value
         elif key in ["out_std"]:
@@ -114,7 +122,7 @@ def __setitem__(self, key, value) -> None:
         else:
             raise KeyError(key)
 
-    def __getitem__(self, key):
+    def __getitem__(self, key: str) -> paddle.Tensor:
         if key in ["out_bias"]:
             return self.out_bias
         elif key in ["out_std"]:
@@ -126,6 +134,18 @@ def get_type_map(self) -> list[str]:
         """Get the type map."""
         return self.type_map
 
+    def get_buffer_type_map(self) -> paddle.Tensor:
+        """
+        Return the type map as a buffer-style Tensor for JIT saving.
+
+        The original type map (e.g., ['Ni', 'O']) is first joined into a single space-separated string
+        (e.g., "Ni O"). Each character in this string is then converted to its ASCII code using `ord()`,
+        and the resulting integer sequence is stored as a 1D paddle.Tensor of dtype int.
+
+        This format allows the type map to be serialized as a raw byte buffer during JIT model saving.
+        """
+        return self.buffer_type_map
+
     def get_compute_stats_distinguish_types(self) -> bool:
         """Get whether the fitting net computes stats which are not distinguished between different types of atoms."""
         return True
@@ -134,6 +154,10 @@ def get_intensive(self) -> bool:
         """Whether the fitting property is intensive."""
         return False
 
+    def has_default_fparam(self) -> bool:
+        """Check if the model has default frame parameters."""
+        return False
+
     def reinit_atom_exclude(
         self,
         exclude_types: Optional[list[int]] = None,
@@ -259,7 +283,6 @@ def forward_common_atomic(
             comm_dict=comm_dict,
         )
         ret_dict = self.apply_out_stat(ret_dict, atype)
-
         # nf x nloc
         atom_mask = ext_atom_mask[:, :nloc].astype(paddle.int32)
         if self.atom_excl is not None:
@@ -272,10 +295,10 @@ def forward_common_atomic(
                 out_shape2 *= ss
             ret_dict[kk] = (
                 ret_dict[kk].reshape([out_shape[0], out_shape[1], out_shape2])
-                * atom_mask.unsqueeze(2).astype(ret_dict[kk].dtype)
+                * atom_mask[:, :, None].astype(ret_dict[kk].dtype)
             ).reshape(out_shape)
         ret_dict["mask"] = atom_mask
-
+        # raise
         return ret_dict
 
     def forward(
@@ -299,7 +322,9 @@ def forward(
         )
 
     def change_type_map(
-        self, type_map: list[str], model_with_new_type_stat=None
+        self,
+        type_map: list[str],
+        model_with_new_type_stat: Optional["BaseAtomicModel"] = None,
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -366,21 +391,25 @@ def compute_or_load_stat(
         self,
         merged: Union[Callable[[], list[dict]], list[dict]],
         stat_file_path: Optional[DPPath] = None,
+        compute_or_load_out_stat: bool = True,
     ) -> NoReturn:
         """
-        Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
+        Compute or load the statistics parameters of the model,
+        such as mean and standard deviation of descriptors or the energy bias of the fitting net.
+        When `sampled` is provided, all the statistics parameters will be calculated (or re-calculated for update),
+        and saved in the `stat_file_path`(s).
+        When `sampled` is not provided, it will check the existence of `stat_file_path`(s)
+        and load the calculated statistics parameters.
 
         Parameters
         ----------
-        merged : Union[Callable[[], list[dict]], list[dict]]
-            - list[dict]: A list of data samples from various data systems.
-                Each element, `merged[i]`, is a data dictionary containing `keys`: `paddle.Tensor`
-                originating from the `i`-th data system.
-            - Callable[[], list[dict]]: A lazy function that returns data samples in the above format
-                only when needed. Since the sampling process can be slow and memory-intensive,
-                the lazy function helps by only sampling once.
-        stat_file_path : Optional[DPPath]
-            The path to the stat file.
+        merged
+            The lazy sampled function to get data frames from different data systems.
+        stat_file_path
+            The dictionary of paths to the statistics files.
+        compute_or_load_out_stat : bool
+            Whether to compute the output statistics.
+            If False, it will only compute the input statistics (e.g. mean and standard deviation of descriptors).
 
         """
         raise NotImplementedError
@@ -416,7 +445,7 @@ def apply_out_stat(
         self,
         ret: dict[str, paddle.Tensor],
         atype: paddle.Tensor,
-    ):
+    ) -> dict[str, paddle.Tensor]:
         """Apply the stat to each atomic output.
         The developer may override the method to define how the bias is applied
         to the atomic output of the model.
@@ -437,9 +466,9 @@ def apply_out_stat(
 
     def change_out_bias(
         self,
-        sample_merged,
+        sample_merged: Union[Callable[[], list[dict]], list[dict]],
         stat_file_path: Optional[DPPath] = None,
-        bias_adjust_mode="change-by-statistic",
+        bias_adjust_mode: str = "change-by-statistic",
     ) -> None:
         """Change the output bias according to the input data and the pretrained model.
 
@@ -489,7 +518,13 @@ def change_out_bias(
     def _get_forward_wrapper_func(self) -> Callable[..., paddle.Tensor]:
         """Get a forward wrapper of the atomic model for output bias calculation."""
 
-        def model_forward(coord, atype, box, fparam=None, aparam=None):
+        def model_forward(
+            coord: paddle.Tensor,
+            atype: paddle.Tensor,
+            box: Optional[paddle.Tensor],
+            fparam: Optional[paddle.Tensor] = None,
+            aparam: Optional[paddle.Tensor] = None,
+        ) -> dict[str, paddle.Tensor]:
             with (
                 paddle.no_grad()
             ):  # it's essential for pure paddle forward function to use auto_batchsize
@@ -518,7 +553,7 @@ def model_forward(coord, atype, box, fparam=None, aparam=None):
 
         return model_forward
 
-    def _default_bias(self):
+    def _default_bias(self) -> paddle.Tensor:
         ntypes = self.get_ntypes()
         return paddle.zeros([self.n_out, ntypes, self.max_out_size], dtype=dtype).to(
             device=device
@@ -572,6 +607,12 @@ def _store_out_stat(
         paddle.assign(out_bias_data, self.out_bias)
         paddle.assign(out_std_data, self.out_std)
 
+    def get_ntypes(self):
+        return len(self.type_map)
+
+    def get_buffer_ntypes(self) -> paddle.Tensor:
+        return self.buffer_ntypes
+
     def _fetch_out_stat(
         self,
         keys: list[str],
diff --git a/deepmd/pd/model/atomic_model/dp_atomic_model.py b/deepmd/pd/model/atomic_model/dp_atomic_model.py
index 00fefa4e2b..816245c28a 100644
--- a/deepmd/pd/model/atomic_model/dp_atomic_model.py
+++ b/deepmd/pd/model/atomic_model/dp_atomic_model.py
@@ -2,7 +2,10 @@
 import functools
 import logging
 from typing import (
+    Any,
+    Callable,
     Optional,
+    Union,
 )
 
 import paddle
@@ -47,10 +50,10 @@ class DPAtomicModel(BaseAtomicModel):
 
     def __init__(
         self,
-        descriptor,
-        fitting,
+        descriptor: BaseDescriptor,
+        fitting: BaseFitting,
         type_map: list[str],
-        **kwargs,
+        **kwargs: Any,
     ) -> None:
         super().__init__(type_map, **kwargs)
         ntypes = len(type_map)
@@ -67,14 +70,17 @@ def __init__(
         self.eval_fitting_last_layer_list = []
 
         # register 'type_map' as buffer
-        def _string_to_array(s: str) -> list[int]:
+        def _string_to_array(s: Union[str, list[str]]) -> list[int]:
             return [ord(c) for c in s]
 
-        self.register_buffer(
-            "buffer_type_map",
-            paddle.to_tensor(_string_to_array(" ".join(self.type_map)), dtype="int32"),
-        )
-        self.buffer_type_map.name = "buffer_type_map"
+        if type_map is not None:
+            self.register_buffer(
+                "buffer_type_map",
+                paddle.to_tensor(
+                    _string_to_array(" ".join(self.type_map)), dtype="int32"
+                ),
+            )
+            self.buffer_type_map.name = "buffer_type_map"
         if hasattr(self.descriptor, "has_message_passing"):
             # register 'has_message_passing' as buffer(cast to int32 as problems may meets with vector<bool>)
             self.register_buffer(
@@ -153,7 +159,27 @@ def get_sel(self) -> list[int]:
         """Get the neighbor selection."""
         return self.sel
 
-    def set_case_embd(self, case_idx: int):
+    def get_buffer_type_map(self) -> paddle.Tensor:
+        """
+        Return the type map as a buffer-style Tensor for JIT saving.
+
+        The original type map (e.g., ['Ni', 'O']) is first joined into a single space-separated string
+        (e.g., "Ni O"). Each character in this string is then converted to its ASCII code using `ord()`,
+        and the resulting integer sequence is stored as a 1D paddle.Tensor of dtype int.
+
+        This format allows the type map to be serialized as a raw byte buffer during JIT model saving.
+        """
+        return self.buffer_type_map
+
+    def get_buffer_rcut(self) -> paddle.Tensor:
+        """Get the cut-off radius as a buffer-style Tensor."""
+        return self.descriptor.get_buffer_rcut()
+
+    def get_buffer_sel(self) -> paddle.Tensor:
+        """Get the neighbor selection as a buffer-style Tensor."""
+        return self.descriptor.get_buffer_sel()
+
+    def set_case_embd(self, case_idx: int) -> None:
         """
         Set the case embedding of this atomic model by the given case_idx,
         typically concatenated with the output of the descriptor and fed into the fitting net.
@@ -173,7 +199,9 @@ def mixed_types(self) -> bool:
         return self.descriptor.mixed_types()
 
     def change_type_map(
-        self, type_map: list[str], model_with_new_type_stat=None
+        self,
+        type_map: list[str],
+        model_with_new_type_stat: Optional["DPAtomicModel"] = None,
     ) -> None:
         """Change the type related params to new ones, according to `type_map` and the original one in the model.
         If there are new types in `type_map`, statistics will be updated accordingly to `model_with_new_type_stat` for these new types.
@@ -214,7 +242,7 @@ def serialize(self) -> dict:
         return dd
 
     @classmethod
-    def deserialize(cls, data) -> "DPAtomicModel":
+    def deserialize(cls, data: dict) -> "DPAtomicModel":
         data = data.copy()
         check_version_compatibility(data.pop("@version", 1), 2, 1)
         data.pop("@class", None)
@@ -259,13 +287,13 @@ def enable_compression(
 
     def forward_atomic(
         self,
-        extended_coord,
-        extended_atype,
-        nlist,
+        extended_coord: paddle.Tensor,
+        extended_atype: paddle.Tensor,
+        nlist: paddle.Tensor,
         mapping: Optional[paddle.Tensor] = None,
         fparam: Optional[paddle.Tensor] = None,
         aparam: Optional[paddle.Tensor] = None,
-        comm_dict: Optional[list[paddle.Tensor]] = None,
+        comm_dict: Optional[dict[str, paddle.Tensor]] = None,
     ) -> dict[str, paddle.Tensor]:
         """Return atomic prediction.
 
@@ -328,8 +356,9 @@ def get_out_bias(self) -> paddle.Tensor:
 
     def compute_or_load_stat(
         self,
-        sampled_func,
+        sampled_func: Callable[[], list[dict]],
         stat_file_path: Optional[DPPath] = None,
+        compute_or_load_out_stat: bool = True,
     ) -> None:
         """
         Compute or load the statistics parameters of the model,
@@ -345,6 +374,9 @@ def compute_or_load_stat(
             The lazy sampled function to get data frames from different data systems.
         stat_file_path
             The dictionary of paths to the statistics files.
+        compute_or_load_out_stat : bool
+            Whether to compute the output statistics.
+            If False, it will only compute the input statistics (e.g. mean and standard deviation of descriptors).
         """
         if stat_file_path is not None and self.type_map is not None:
             # descriptors and fitting net with different type_map
@@ -368,16 +400,29 @@ def wrapped_sampler():
         self.fitting_net.compute_input_stats(
             wrapped_sampler, protection=self.data_stat_protect
         )
-        self.compute_or_load_out_stat(wrapped_sampler, stat_file_path)
+        if compute_or_load_out_stat:
+            self.compute_or_load_out_stat(wrapped_sampler, stat_file_path)
 
     def get_dim_fparam(self) -> int:
         """Get the number (dimension) of frame parameters of this atomic model."""
         return self.fitting_net.get_dim_fparam()
 
+    def get_buffer_dim_fparam(self) -> paddle.Tensor:
+        """Get the number (dimension) of frame parameters of this atomic model as a buffer-style Tensor."""
+        return self.fitting_net.get_buffer_dim_fparam()
+
+    def has_default_fparam(self) -> bool:
+        """Check if the model has default frame parameters."""
+        return self.fitting_net.has_default_fparam()
+
     def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return self.fitting_net.get_dim_aparam()
 
+    def get_buffer_dim_aparam(self) -> paddle.Tensor:
+        """Get the number (dimension) of atomic parameters of this atomic model as a buffer-style Tensor."""
+        return self.fitting_net.get_buffer_dim_aparam()
+
     def get_sel_type(self) -> list[int]:
         """Get the selected atom types of this model.
 
diff --git a/deepmd/pd/model/descriptor/dpa1.py b/deepmd/pd/model/descriptor/dpa1.py
index 7fd3de02e9..1722140316 100644
--- a/deepmd/pd/model/descriptor/dpa1.py
+++ b/deepmd/pd/model/descriptor/dpa1.py
@@ -297,6 +297,11 @@ def __init__(
         self.use_econf_tebd = use_econf_tebd
         self.use_tebd_bias = use_tebd_bias
         self.type_map = type_map
+        if type_map is not None:
+            self.register_buffer(
+                "buffer_type_map",
+                paddle.to_tensor([ord(c) for c in " ".join(type_map)]),
+            )
         self.compress = False
         self.type_embedding = TypeEmbedNet(
             ntypes,
@@ -320,10 +325,18 @@ def get_rcut(self) -> float:
         """Returns the cut-off radius."""
         return self.se_atten.get_rcut()
 
+    def get_buffer_rcut(self) -> paddle.Tensor:
+        """Returns the cut-off radius as a buffer-style Tensor."""
+        return self.se_atten.get_buffer_rcut()
+
     def get_rcut_smth(self) -> float:
         """Returns the radius where the neighbor information starts to smoothly decay to 0."""
         return self.se_atten.get_rcut_smth()
 
+    def get_buffer_rcut_smth(self) -> paddle.Tensor:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0 as a buffer-style Tensor."""
+        return self.se_atten.get_buffer_rcut_smth()
+
     def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return self.se_atten.get_nsel()
@@ -340,6 +353,18 @@ def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
+    def get_buffer_type_map(self) -> paddle.Tensor:
+        """
+        Return the type map as a buffer-style Tensor for JIT saving.
+
+        The original type map (e.g., ['Ni', 'O']) is first joined into a single space-separated string
+        (e.g., "Ni O"). Each character in this string is then converted to its ASCII code using `ord()`,
+        and the resulting integer sequence is stored as a 1D paddle.Tensor of dtype int.
+
+        This format allows the type map to be serialized as a raw byte buffer during JIT model saving.
+        """
+        return self.buffer_type_map
+
     def get_dim_out(self) -> int:
         """Returns the output dimension."""
         ret = self.se_atten.get_dim_out()
diff --git a/deepmd/pd/model/descriptor/dpa2.py b/deepmd/pd/model/descriptor/dpa2.py
index ab3b6f5736..c8eb4ca117 100644
--- a/deepmd/pd/model/descriptor/dpa2.py
+++ b/deepmd/pd/model/descriptor/dpa2.py
@@ -265,6 +265,11 @@ def init_subclass_params(sub_data, sub_class):
         self.use_econf_tebd = use_econf_tebd
         self.use_tebd_bias = use_tebd_bias
         self.type_map = type_map
+        if type_map is not None:
+            self.register_buffer(
+                "buffer_type_map",
+                paddle.to_tensor([ord(c) for c in " ".join(type_map)]),
+            )
         self.type_embedding = TypeEmbedNet(
             ntypes,
             self.repinit_args.tebd_dim,
@@ -318,6 +323,9 @@ def init_subclass_params(sub_data, sub_class):
         self.rcut = self.repinit.get_rcut()
         self.rcut_smth = self.repinit.get_rcut_smth()
         self.ntypes = ntypes
+        self.register_buffer(
+            "buffer_ntypes", paddle.to_tensor(self.ntypes, dtype="int64")
+        )
         self.sel = self.repinit.sel
         # set trainable
         for param in self.parameters():
@@ -332,6 +340,14 @@ def get_rcut_smth(self) -> float:
         """Returns the radius where the neighbor information starts to smoothly decay to 0."""
         return self.rcut_smth
 
+    def get_buffer_rcut(self) -> paddle.Tensor:
+        """Returns the cut-off radius."""
+        return self.repinit.get_buffer_rcut()
+
+    def get_buffer_rcut_smth(self) -> paddle.Tensor:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0 as a buffer-style Tensor."""
+        return self.repinit.get_buffer_rcut_smth()
+
     def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
@@ -342,7 +358,7 @@ def get_sel(self) -> list[int]:
 
     def get_ntypes(self) -> int:
         """Returns the number of element types."""
-        return self.ntypes
+        return self.ntypes if paddle.in_dynamic_mode() else self.buffer_ntypes
 
     def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
@@ -768,7 +784,7 @@ def forward(
             type_embedding = None
         g1, _, _, _, _ = self.repinit(
             nlist_dict[
-                get_multiple_nlist_key(self.repinit.get_rcut(), self.repinit.get_nsel())
+                get_multiple_nlist_key(self.repinit.rcut, sum(self.repinit.sel))
             ],
             extended_coord,
             extended_atype,
diff --git a/deepmd/pd/model/descriptor/dpa3.py b/deepmd/pd/model/descriptor/dpa3.py
index 7754de01e5..80c5b1c000 100644
--- a/deepmd/pd/model/descriptor/dpa3.py
+++ b/deepmd/pd/model/descriptor/dpa3.py
@@ -176,6 +176,11 @@ def init_subclass_params(sub_data, sub_class):
         self.use_loc_mapping = use_loc_mapping
         self.use_tebd_bias = use_tebd_bias
         self.type_map = type_map
+        if type_map is not None:
+            self.register_buffer(
+                "buffer_type_map",
+                paddle.to_tensor([ord(c) for c in " ".join(self.type_map)]),
+            )
         self.tebd_dim = self.repflow_args.n_dim
         self.type_embedding = TypeEmbedNet(
             ntypes,
@@ -207,6 +212,9 @@ def init_subclass_params(sub_data, sub_class):
         self.rcut_smth = self.repflows.get_rcut_smth()
         self.sel = self.repflows.get_sel()
         self.ntypes = ntypes
+        self.register_buffer(
+            "buffer_ntypes", paddle.to_tensor(self.ntypes, dtype="int64")
+        )
 
         # set trainable
         for param in self.parameters():
@@ -221,6 +229,14 @@ def get_rcut_smth(self) -> float:
         """Returns the radius where the neighbor information starts to smoothly decay to 0."""
         return self.rcut_smth
 
+    def get_buffer_rcut(self) -> paddle.Tensor:
+        """Returns the cut-off radius as a buffer-style Tensor."""
+        return self.repflows.get_buffer_rcut()
+
+    def get_buffer_rcut_smth(self) -> paddle.Tensor:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0 as a buffer-style Tensor."""
+        return self.repflows.get_buffer_rcut_smth()
+
     def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
@@ -229,14 +245,30 @@ def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.sel
 
+    def get_buffer_sel(self) -> paddle.Tensor:
+        """Returns the number of selected atoms for each type as a buffer-style Tensor."""
+        return self.repflows.get_sel()
+
     def get_ntypes(self) -> int:
         """Returns the number of element types."""
-        return self.ntypes
+        return self.ntypes if paddle.in_dynamic_mode() else self.buffer_ntypes
 
     def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
+    def get_buffer_type_map(self) -> paddle.Tensor:
+        """
+        Return the type map as a buffer-style Tensor for JIT saving.
+
+        The original type map (e.g., ['Ni', 'O']) is first joined into a single space-separated string
+        (e.g., "Ni O"). Each character in this string is then converted to its ASCII code using `ord()`,
+        and the resulting integer sequence is stored as a 1D paddle.Tensor of dtype int.
+
+        This format allows the type map to be serialized as a raw byte buffer during JIT model saving.
+        """
+        return self.buffer_type_map
+
     def get_dim_out(self) -> int:
         """Returns the output dimension of this descriptor."""
         ret = self.repflows.dim_out
diff --git a/deepmd/pd/model/descriptor/repflows.py b/deepmd/pd/model/descriptor/repflows.py
index 756562e333..2b9760bbe6 100644
--- a/deepmd/pd/model/descriptor/repflows.py
+++ b/deepmd/pd/model/descriptor/repflows.py
@@ -238,6 +238,9 @@ def __init__(
         self.a_rcut_smth = float(a_rcut_smth)
         self.a_sel = a_sel
         self.ntypes = ntypes
+        self.register_buffer(
+            "buffer_ntypes", paddle.to_tensor(self.ntypes, dtype="int64")
+        )
         self.nlayers = nlayers
         # for other common desciptor method
         sel = [e_sel] if isinstance(e_sel, int) else e_sel
@@ -245,7 +248,9 @@ def __init__(
         self.ndescrpt = self.nnei * 4  # use full descriptor.
         assert len(sel) == 1
         self.sel = sel
+        self.register_buffer("buffer_sel", paddle.to_tensor(sel))
         self.rcut = e_rcut
+        self.register_buffer("buffer_rcut", paddle.to_tensor(self.e_rcut))
         self.rcut_smth = e_rcut_smth
         self.sec = self.sel
         self.split_sel = self.sel
@@ -355,6 +360,10 @@ def get_rcut(self) -> float:
         """Returns the cut-off radius."""
         return self.e_rcut
 
+    def get_buffer_rcut(self) -> paddle.Tensor:
+        """Returns the cut-off radius as a buffer-style Tensor."""
+        return self.buffer_rcut
+
     def get_rcut_smth(self) -> float:
         """Returns the radius where the neighbor information starts to smoothly decay to 0."""
         return self.e_rcut_smth
@@ -367,9 +376,13 @@ def get_sel(self) -> list[int]:
         """Returns the number of selected atoms for each type."""
         return self.sel
 
+    def get_buffer_sel(self) -> paddle.Tensor:
+        """Returns the number of selected atoms for each type as a buffer-style Tensor."""
+        return self.buffer_sel
+
     def get_ntypes(self) -> int:
         """Returns the number of element types."""
-        return self.ntypes
+        return self.ntypes if paddle.in_dynamic_mode() else self.buffer_ntypes
 
     def get_dim_out(self) -> int:
         """Returns the output dimension."""
diff --git a/deepmd/pd/model/descriptor/repformers.py b/deepmd/pd/model/descriptor/repformers.py
index 4151833f35..f0d18a0908 100644
--- a/deepmd/pd/model/descriptor/repformers.py
+++ b/deepmd/pd/model/descriptor/repformers.py
@@ -210,8 +210,14 @@ def __init__(
         """
         super().__init__()
         self.rcut = float(rcut)
+        self.register_buffer("buffer_rcut", paddle.to_tensor(self.rcut))
         self.rcut_smth = float(rcut_smth)
+        self.register_buffer("buffer_rcut_smth", paddle.to_tensor(self.rcut_smth))
         self.ntypes = ntypes
+        self.register_buffer(
+            "buffer_ntypes", paddle.to_tensor(self.ntypes, dtype="int64")
+        )
+
         self.nlayers = nlayers
         sel = [sel] if isinstance(sel, int) else sel
         self.nnei = sum(sel)
@@ -323,6 +329,14 @@ def get_rcut_smth(self) -> float:
         """Returns the radius where the neighbor information starts to smoothly decay to 0."""
         return self.rcut_smth
 
+    def get_buffer_rcut(self) -> paddle.Tensor:
+        """Returns the cut-off radius as a buffer-style Tensor."""
+        return self.buffer_rcut
+
+    def get_buffer_rcut_smth(self) -> paddle.Tensor:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0 as a buffer-style Tensor."""
+        return self.buffer_rcut_smth
+
     def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
@@ -333,7 +347,7 @@ def get_sel(self) -> list[int]:
 
     def get_ntypes(self) -> int:
         """Returns the number of element types."""
-        return self.ntypes
+        return self.ntypes if paddle.in_dynamic_mode() else self.buffer_ntypes
 
     def get_dim_out(self) -> int:
         """Returns the output dimension."""
diff --git a/deepmd/pd/model/descriptor/se_a.py b/deepmd/pd/model/descriptor/se_a.py
index 92c010fcf0..109c7ba3c4 100644
--- a/deepmd/pd/model/descriptor/se_a.py
+++ b/deepmd/pd/model/descriptor/se_a.py
@@ -95,6 +95,11 @@ def __init__(
             raise NotImplementedError("old implementation of spin is not supported.")
         super().__init__()
         self.type_map = type_map
+        if type_map is not None:
+            self.register_buffer(
+                "buffer_type_map",
+                paddle.to_tensor([ord(c) for c in " ".join(type_map)]),
+            )
         self.compress = False
         self.prec = PRECISION_DICT[precision]
         self.sea = DescrptBlockSeA(
@@ -122,6 +127,14 @@ def get_rcut_smth(self) -> float:
         """Returns the radius where the neighbor information starts to smoothly decay to 0."""
         return self.sea.get_rcut_smth()
 
+    def get_buffer_rcut(self) -> paddle.Tensor:
+        """Returns the cut-off radius as a buffer-style Tensor."""
+        return self.sea.get_buffer_rcut()
+
+    def get_buffer_rcut_smth(self) -> paddle.Tensor:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0 as a buffer-style Tensor."""
+        return self.sea.get_buffer_rcut_smth()
+
     def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return self.sea.get_nsel()
@@ -138,6 +151,18 @@ def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
+    def get_buffer_type_map(self) -> paddle.Tensor:
+        """
+        Return the type map as a buffer-style Tensor for JIT saving.
+
+        The original type map (e.g., ['Ni', 'O']) is first joined into a single space-separated string
+        (e.g., "Ni O"). Each character in this string is then converted to its ASCII code using `ord()`,
+        and the resulting integer sequence is stored as a 1D paddle.Tensor of dtype int.
+
+        This format allows the type map to be serialized as a raw byte buffer during JIT model saving.
+        """
+        return self.buffer_type_map
+
     def get_dim_out(self) -> int:
         """Returns the output dimension."""
         return self.sea.get_dim_out()
@@ -438,7 +463,9 @@ def __init__(
         """
         super().__init__()
         self.rcut = float(rcut)
+        self.register_buffer("buffer_rcut", paddle.to_tensor(self.rcut))
         self.rcut_smth = float(rcut_smth)
+        self.register_buffer("buffer_rcut_smth", paddle.to_tensor(self.rcut_smth))
         self.neuron = neuron
         self.filter_neuron = self.neuron
         self.axis_neuron = axis_neuron
@@ -449,6 +476,9 @@ def __init__(
         self.resnet_dt = resnet_dt
         self.env_protection = env_protection
         self.ntypes = len(sel)
+        self.register_buffer(
+            "buffer_ntypes", paddle.to_tensor(self.ntypes, dtype="int64")
+        )
         self.type_one_side = type_one_side
         self.seed = seed
         # order matters, placed after the assignment of self.ntypes
@@ -513,6 +543,14 @@ def get_rcut_smth(self) -> float:
         """Returns the radius where the neighbor information starts to smoothly decay to 0."""
         return self.rcut_smth
 
+    def get_buffer_rcut(self) -> paddle.Tensor:
+        """Returns the cut-off radius as a buffer-style Tensor."""
+        return self.buffer_rcut
+
+    def get_buffer_rcut_smth(self) -> paddle.Tensor:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0 as a buffer-style Tensor."""
+        return self.buffer_rcut_smth
+
     def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
@@ -523,7 +561,7 @@ def get_sel(self) -> list[int]:
 
     def get_ntypes(self) -> int:
         """Returns the number of element types."""
-        return self.ntypes
+        return self.ntypes if paddle.in_dynamic_mode() else self.buffer_ntypes
 
     def get_dim_out(self) -> int:
         """Returns the output dimension."""
diff --git a/deepmd/pd/model/descriptor/se_atten.py b/deepmd/pd/model/descriptor/se_atten.py
index 39c3e6ace4..ceae16f409 100644
--- a/deepmd/pd/model/descriptor/se_atten.py
+++ b/deepmd/pd/model/descriptor/se_atten.py
@@ -153,7 +153,9 @@ def __init__(
         super().__init__()
         del type
         self.rcut = float(rcut)
+        self.register_buffer("buffer_rcut", paddle.to_tensor(self.rcut))
         self.rcut_smth = float(rcut_smth)
+        self.register_buffer("buffer_rcut_smth", paddle.to_tensor(self.rcut_smth))
         self.neuron = neuron
         self.filter_neuron = self.neuron
         self.axis_neuron = axis_neuron
@@ -185,6 +187,10 @@ def __init__(
             sel = [sel]
 
         self.ntypes = ntypes
+        self.register_buffer(
+            "buffer_ntypes", paddle.to_tensor(self.ntypes, dtype="int64")
+        )
+
         self.sel = sel
         self.sec = self.sel
         self.split_sel = self.sel
@@ -278,6 +284,14 @@ def get_rcut_smth(self) -> float:
         """Returns the radius where the neighbor information starts to smoothly decay to 0."""
         return self.rcut_smth
 
+    def get_buffer_rcut(self) -> paddle.Tensor:
+        """Returns the cut-off radius as a buffer-style Tensor."""
+        return self.buffer_rcut
+
+    def get_buffer_rcut_smth(self) -> paddle.Tensor:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0 as a buffer-style Tensor."""
+        return self.buffer_rcut_smth
+
     def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
@@ -288,7 +302,7 @@ def get_sel(self) -> list[int]:
 
     def get_ntypes(self) -> int:
         """Returns the number of element types."""
-        return self.ntypes
+        return self.ntypes if paddle.in_dynamic_mode() else self.buffer_ntypes
 
     def get_dim_in(self) -> int:
         """Returns the input dimension."""
diff --git a/deepmd/pd/model/descriptor/se_t_tebd.py b/deepmd/pd/model/descriptor/se_t_tebd.py
index 16e4de5d12..e9d4053612 100644
--- a/deepmd/pd/model/descriptor/se_t_tebd.py
+++ b/deepmd/pd/model/descriptor/se_t_tebd.py
@@ -165,6 +165,11 @@ def __init__(
         self.prec = PRECISION_DICT[precision]
         self.use_econf_tebd = use_econf_tebd
         self.type_map = type_map
+        if type_map is not None:
+            self.register_buffer(
+                "buffer_type_map",
+                paddle.to_tensor([ord(c) for c in " ".join(type_map)]),
+            )
         self.smooth = smooth
         self.type_embedding = TypeEmbedNet(
             ntypes,
@@ -208,6 +213,18 @@ def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
+    def get_buffer_type_map(self) -> paddle.Tensor:
+        """
+        Return the type map as a buffer-style Tensor for JIT saving.
+
+        The original type map (e.g., ['Ni', 'O']) is first joined into a single space-separated string
+        (e.g., "Ni O"). Each character in this string is then converted to its ASCII code using `ord()`,
+        and the resulting integer sequence is stored as a 1D paddle.Tensor of dtype int.
+
+        This format allows the type map to be serialized as a raw byte buffer during JIT model saving.
+        """
+        return self.buffer_type_map
+
     def get_dim_out(self) -> int:
         """Returns the output dimension."""
         ret = self.se_ttebd.get_dim_out()
@@ -535,7 +552,9 @@ def __init__(
     ) -> None:
         super().__init__()
         self.rcut = float(rcut)
+        self.register_buffer("buffer_rcut", paddle.to_tensor(self.rcut))
         self.rcut_smth = float(rcut_smth)
+        self.register_buffer("buffer_rcut_smth", paddle.to_tensor(self.rcut_smth))
         self.neuron = neuron
         self.filter_neuron = self.neuron
         self.tebd_dim = tebd_dim
@@ -553,6 +572,10 @@ def __init__(
             sel = [sel]
 
         self.ntypes = ntypes
+        self.register_buffer(
+            "buffer_ntypes", paddle.to_tensor(self.ntypes, dtype="int64")
+        )
+
         self.sel = sel
         self.sec = self.sel
         self.split_sel = self.sel
@@ -615,6 +638,14 @@ def get_rcut_smth(self) -> float:
         """Returns the radius where the neighbor information starts to smoothly decay to 0."""
         return self.rcut_smth
 
+    def get_buffer_rcut(self) -> paddle.Tensor:
+        """Returns the cut-off radius as a buffer-style Tensor."""
+        return self.buffer_rcut
+
+    def get_buffer_rcut_smth(self) -> paddle.Tensor:
+        """Returns the radius where the neighbor information starts to smoothly decay to 0 as a buffer-style Tensor."""
+        return self.buffer_rcut_smth
+
     def get_nsel(self) -> int:
         """Returns the number of selected atoms in the cut-off radius."""
         return sum(self.sel)
@@ -625,7 +656,7 @@ def get_sel(self) -> list[int]:
 
     def get_ntypes(self) -> int:
         """Returns the number of element types."""
-        return self.ntypes
+        return self.ntypes if paddle.in_dynamic_mode() else self.buffer_ntypes
 
     def get_dim_in(self) -> int:
         """Returns the input dimension."""
diff --git a/deepmd/pd/model/model/ener_model.py b/deepmd/pd/model/model/ener_model.py
index 2e28ae0765..36cd1211ba 100644
--- a/deepmd/pd/model/model/ener_model.py
+++ b/deepmd/pd/model/model/ener_model.py
@@ -34,6 +34,18 @@ def __init__(
         DPModelCommon.__init__(self)
         DPEnergyModel_.__init__(self, *args, **kwargs)
 
+    def get_buffer_type_map(self) -> paddle.Tensor:
+        """
+        Return the type map as a buffer-style Tensor for JIT saving.
+
+        The original type map (e.g., ['Ni', 'O']) is first joined into a single space-separated string
+        (e.g., "Ni O"). Each character in this string is then converted to its ASCII code using `ord()`,
+        and the resulting integer sequence is stored as a 1D paddle.Tensor of dtype int.
+
+        This format allows the type map to be serialized as a raw byte buffer during JIT model saving.
+        """
+        return super().get_buffer_type_map()
+
     def translated_output_def(self):
         out_def_data = self.model_output_def().get_data()
         output_def = {
diff --git a/deepmd/pd/model/model/make_model.py b/deepmd/pd/model/model/make_model.py
index 077dac1160..42c406f8d7 100644
--- a/deepmd/pd/model/model/make_model.py
+++ b/deepmd/pd/model/model/make_model.py
@@ -529,6 +529,14 @@ def get_dim_aparam(self) -> int:
             """Get the number (dimension) of atomic parameters of this atomic model."""
             return self.atomic_model.get_dim_aparam()
 
+        def get_buffer_dim_fparam(self) -> paddle.Tensor:
+            """Get the number (dimension) of frame parameters of this atomic model as a buffer-style Tensor."""
+            return self.atomic_model.get_buffer_dim_fparam()
+
+        def get_buffer_dim_aparam(self) -> paddle.Tensor:
+            """Get the number (dimension) of atomic parameters of this atomic model as a buffer-style Tensor."""
+            return self.atomic_model.get_buffer_dim_aparam()
+
         def get_sel_type(self) -> list[int]:
             """Get the selected atom types of this model.
 
@@ -553,6 +561,22 @@ def get_type_map(self) -> list[str]:
             """Get the type map."""
             return self.atomic_model.get_type_map()
 
+        def get_buffer_rcut(self) -> paddle.Tensor:
+            """Get the cut-off radius as a buffer-style Tensor."""
+            return self.atomic_model.get_buffer_rcut()
+
+        def get_buffer_type_map(self) -> paddle.Tensor:
+            """
+            Return the type map as a buffer-style Tensor for JIT saving.
+
+            The original type map (e.g., ['Ni', 'O']) is first joined into a single space-separated string
+            (e.g., "Ni O"). Each character in this string is then converted to its ASCII code using `ord()`,
+            and the resulting integer sequence is stored as a 1D paddle.Tensor of dtype int.
+
+            This format allows the type map to be serialized as a raw byte buffer during JIT model saving.
+            """
+            return self.atomic_model.get_buffer_type_map()
+
         def get_nsel(self) -> int:
             """Returns the total number of selected neighboring atoms in the cut-off radius."""
             return self.atomic_model.get_nsel()
diff --git a/deepmd/pd/model/model/model.py b/deepmd/pd/model/model/model.py
index 06a2c6910f..0151a9c36b 100644
--- a/deepmd/pd/model/model/model.py
+++ b/deepmd/pd/model/model/model.py
@@ -53,3 +53,7 @@ def get_min_nbor_dist(self) -> Optional[float]:
     def get_ntypes(self):
         """Returns the number of element types."""
         return len(self.get_type_map())
+
+    def get_buffer_ntypes(self) -> paddle.Tensor:
+        """Returns the number of element types as a buffer-style Tensor."""
+        return self.get_buffer_ntypes()
diff --git a/deepmd/pd/model/task/fitting.py b/deepmd/pd/model/task/fitting.py
index 953ec5bf0e..398630e1d2 100644
--- a/deepmd/pd/model/task/fitting.py
+++ b/deepmd/pd/model/task/fitting.py
@@ -248,7 +248,13 @@ def __init__(
         self.mixed_types = mixed_types
         self.resnet_dt = resnet_dt
         self.numb_fparam = numb_fparam
+        self.register_buffer(
+            "buffer_numb_fparam", paddle.to_tensor([numb_fparam], dtype=paddle.int64)
+        )
         self.numb_aparam = numb_aparam
+        self.register_buffer(
+            "buffer_numb_aparam", paddle.to_tensor([numb_aparam], dtype=paddle.int64)
+        )
         self.dim_case_embd = dim_case_embd
         self.default_fparam = default_fparam
         self.activation_function = activation_function
@@ -257,6 +263,11 @@ def __init__(
         self.rcond = rcond
         self.seed = seed
         self.type_map = type_map
+        if type_map is not None:
+            self.register_buffer(
+                "buffer_type_map",
+                paddle.to_tensor([ord(c) for c in " ".join(self.type_map)]),
+            )
         self.use_aparam_as_mask = use_aparam_as_mask
         # order matters, should be place after the assignment of ntypes
         self.reinit_exclude(exclude_types)
@@ -435,6 +446,14 @@ def get_dim_aparam(self) -> int:
         """Get the number (dimension) of atomic parameters of this atomic model."""
         return self.numb_aparam
 
+    def get_buffer_dim_fparam(self) -> paddle.Tensor:
+        """Get the number (dimension) of frame parameters of this atomic model as a buffer-style Tensor."""
+        return self.buffer_numb_fparam
+
+    def get_buffer_dim_aparam(self) -> paddle.Tensor:
+        """Get the number (dimension) of atomic parameters of this atomic model as a buffer-style Tensor."""
+        return self.buffer_numb_aparam
+
     # make jit happy
     exclude_types: list[int]
 
@@ -456,6 +475,18 @@ def get_type_map(self) -> list[str]:
         """Get the name to each type of atoms."""
         return self.type_map
 
+    def get_buffer_type_map(self) -> paddle.Tensor:
+        """
+        Return the type map as a buffer-style Tensor for JIT saving.
+
+        The original type map (e.g., ['Ni', 'O']) is first joined into a single space-separated string
+        (e.g., "Ni O"). Each character in this string is then converted to its ASCII code using `ord()`,
+        and the resulting integer sequence is stored as a 1D paddle.Tensor of dtype int.
+
+        This format allows the type map to be serialized as a raw byte buffer during JIT model saving.
+        """
+        return self.buffer_type_map
+
     def set_case_embd(self, case_idx: int):
         """
         Set the case embedding of this fitting net by the given case_idx,
diff --git a/deepmd/pd/utils/decomp.py b/deepmd/pd/utils/decomp.py
index 3b7bddbcd1..ab9a57dbfd 100644
--- a/deepmd/pd/utils/decomp.py
+++ b/deepmd/pd/utils/decomp.py
@@ -112,10 +112,7 @@ def masked_add__decomp(
     """
     assert mask.dtype == paddle.bool, f"mask must be bool type, but got {mask.dtype}"
     # indices is bool mask
-    mask_coord = paddle.concat(
-        paddle.nonzero(mask, as_tuple=True),
-        axis=1,
-    )  # [nz, dim]
+    mask_coord = paddle.nonzero(mask, as_tuple=False)  # [nz, dim]
     if not paddle.is_tensor(v):
         v = paddle.full([mask_coord.shape[0]], v, dtype=x.dtype)
     t = paddle.scatter_nd_add(
diff --git a/deepmd/pd/utils/stat.py b/deepmd/pd/utils/stat.py
index ca9fa96703..4132d0a5f7 100644
--- a/deepmd/pd/utils/stat.py
+++ b/deepmd/pd/utils/stat.py
@@ -4,6 +4,7 @@
     defaultdict,
 )
 from typing import (
+    Any,
     Callable,
     Optional,
     Union,
@@ -35,7 +36,9 @@
 log = logging.getLogger(__name__)
 
 
-def make_stat_input(datasets, dataloaders, nbatches):
+def make_stat_input(
+    datasets: list[Any], dataloaders: list[Any], nbatches: int
+) -> dict[str, Any]:
     """Pack data for statistics.
 
     Args:
@@ -59,6 +62,14 @@ def make_stat_input(datasets, dataloaders, nbatches):
             except StopIteration:
                 iterator = iter(dataloaders[i])
                 stat_data = next(iterator)
+            if (
+                "find_fparam" in stat_data
+                and "fparam" in stat_data
+                and stat_data["find_fparam"] == 0.0
+            ):
+                # for model using default fparam
+                stat_data.pop("fparam")
+                stat_data.pop("find_fparam")
             for dd in stat_data:
                 if stat_data[dd] is None:
                     sys_stat[dd] = None
@@ -127,9 +138,9 @@ def _save_to_file(
 
 
 def _post_process_stat(
-    out_bias,
-    out_std,
-):
+    out_bias: paddle.Tensor,
+    out_std: paddle.Tensor,
+) -> tuple[paddle.Tensor, paddle.Tensor]:
     """Post process the statistics.
 
     For global statistics, we do not have the std for each type of atoms,
@@ -140,7 +151,10 @@ def _post_process_stat(
     """
     new_std = {}
     for kk, vv in out_bias.items():
-        new_std[kk] = np.ones_like(vv)
+        if vv.shape == out_std[kk].shape:
+            new_std[kk] = out_std[kk]
+        else:
+            new_std[kk] = np.ones_like(vv)
     return out_bias, new_std
 
 
@@ -148,7 +162,7 @@ def _compute_model_predict(
     sampled: Union[Callable[[], list[dict]], list[dict]],
     keys: list[str],
     model_forward: Callable[..., paddle.Tensor],
-):
+) -> dict[str, list[paddle.Tensor]]:
     auto_batch_size = AutoBatchSize()
     model_predict = {kk: [] for kk in keys}
     for system in sampled:
@@ -211,7 +225,7 @@ def _make_preset_out_bias(
 def _fill_stat_with_global(
     atomic_stat: Union[np.ndarray, None],
     global_stat: np.ndarray,
-):
+) -> Union[np.ndarray, None]:
     """This function is used to fill atomic stat with global stat.
 
     Parameters
@@ -244,7 +258,7 @@ def compute_output_stats(
     model_forward: Optional[Callable[..., paddle.Tensor]] = None,
     stats_distinguish_types: bool = True,
     intensive: bool = False,
-):
+) -> dict[str, Any]:
     """
     Compute the output statistics (e.g. energy bias) for the fitting net from packed data.
 
@@ -411,7 +425,7 @@ def compute_output_stats_global(
     model_pred: Optional[dict[str, np.ndarray]] = None,
     stats_distinguish_types: bool = True,
     intensive: bool = False,
-):
+) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
     """This function only handle stat computation from reduced global labels."""
     # return directly if model predict is empty for global
     if model_pred == {}:
@@ -519,7 +533,7 @@ def compute_output_stats_global(
         }
     atom_numbs = {kk: merged_natoms[kk].sum(-1) for kk in bias_atom_e.keys()}
 
-    def rmse(x):
+    def rmse(x: np.ndarray) -> float:
         return np.sqrt(np.mean(np.square(x)))
 
     for kk in bias_atom_e.keys():
@@ -541,7 +555,7 @@ def compute_output_stats_atomic(
     ntypes: int,
     keys: list[str],
     model_pred: Optional[dict[str, np.ndarray]] = None,
-):
+) -> tuple[dict[str, np.ndarray], dict[str, np.ndarray]]:
     # get label dict from sample; for each key, only picking the system with atomic labels.
     outputs = {
         kk: [
diff --git a/doc/third-party/ase.md b/doc/third-party/ase.md
index 6ede63e2f9..183efa7cbb 100644
--- a/doc/third-party/ase.md
+++ b/doc/third-party/ase.md
@@ -6,6 +6,10 @@ See [Environment variables](../env.md) for the runtime environment variables.
 
 Deep potential can be set up as a calculator with ASE to obtain potential energies and forces.
 
+::::{tab-set}
+
+:::{tab-item} TensorFlow {{ tensorflow_icon }}
+
 ```python
 from ase import Atoms
 from deepmd.calculator import DP
@@ -20,6 +24,46 @@ print(water.get_potential_energy())
 print(water.get_forces())
 ```
 
+:::
+
+:::{tab-item} PyTorch {{ pytorch_icon }}
+
+```python
+from ase import Atoms
+from deepmd.calculator import DP
+
+water = Atoms(
+    "H2O",
+    positions=[(0.7601, 1.9270, 1), (1.9575, 1, 1), (1.0, 1.0, 1.0)],
+    cell=[100, 100, 100],
+    calculator=DP(model="frozen_model.pth"),
+)
+print(water.get_potential_energy())
+print(water.get_forces())
+```
+
+:::
+
+:::{tab-item} Paddle {{ paddle_icon }}
+
+```python
+from ase import Atoms
+from deepmd.calculator import DP
+
+water = Atoms(
+    "H2O",
+    positions=[(0.7601, 1.9270, 1), (1.9575, 1, 1), (1.0, 1.0, 1.0)],
+    cell=[100, 100, 100],
+    calculator=DP(model="frozen_model.json"),
+)
+print(water.get_potential_energy())
+print(water.get_forces())
+```
+
+:::
+
+::::
+
 Optimization is also available:
 
 ```python
diff --git a/source/api_cc/src/DeepPotPD.cc b/source/api_cc/src/DeepPotPD.cc
index d81a63b131..94931a8415 100644
--- a/source/api_cc/src/DeepPotPD.cc
+++ b/source/api_cc/src/DeepPotPD.cc
@@ -164,15 +164,23 @@ inline void enableTimestamp(bool enable = true) {
 }
 }  // namespace logg
 
-std::vector<int> createNlistTensorPD(
-    const std::vector<std::vector<int>>& data) {
-  std::vector<int> ret;
+void fillNlistTensor(const std::vector<std::vector<int>>& data,
+                     std::unique_ptr<paddle_infer::Tensor>& flat_tensor) {
+  size_t total_size = 0;
   for (const auto& row : data) {
-    ret.insert(ret.end(), row.begin(), row.end());
+    total_size += row.size();
+  }
+  std::vector<int> flat_data;
+  flat_data.reserve(total_size);
+  for (const auto& row : data) {
+    flat_data.insert(flat_data.end(), row.begin(), row.end());
   }
-  return ret;
-}
 
+  int nloc = data.size();
+  int nnei = nloc > 0 ? total_size / nloc : 0;
+  flat_tensor->Reshape({1, nloc, nnei});
+  flat_tensor->CopyFromCpu(flat_data.data());
+}
 DeepPotPD::DeepPotPD() : inited(false) {}
 DeepPotPD::DeepPotPD(const std::string& model,
                      const int& gpu_rank,
@@ -375,16 +383,14 @@ void DeepPotPD::compute(ENERGYVTYPE& ener,
   auto coord_wrapped_Tensor = predictor_fl->GetInputHandle("coord");
   coord_wrapped_Tensor->Reshape({1, nall_real, 3});
   coord_wrapped_Tensor->CopyFromCpu(coord_wrapped.data());
-
   auto atype_Tensor = predictor_fl->GetInputHandle("atype");
   atype_Tensor->Reshape({1, nall_real});
   atype_Tensor->CopyFromCpu(datype.data());
-
   if (ago == 0) {
-    nlist_data.copy_from_nlist(lmp_list);
+    nlist_data.copy_from_nlist(lmp_list, nall - nghost);
     nlist_data.shuffle_exclude_empty(fwd_map);
     nlist_data.padding();
-    if (do_message_passing == 1 && nghost > 0) {
+    if (do_message_passing) {
       auto sendproc_tensor = predictor_fl->GetInputHandle("send_proc");
       auto recvproc_tensor = predictor_fl->GetInputHandle("recv_proc");
       auto recvnum_tensor = predictor_fl->GetInputHandle("recv_num");
@@ -446,11 +452,8 @@ void DeepPotPD::compute(ENERGYVTYPE& ener,
       this->mapping_tensor->CopyFromCpu(mapping.data());
     }
   }
-  std::vector<int> firstneigh = createNlistTensorPD(nlist_data.jlist);
   this->firstneigh_tensor = predictor_fl->GetInputHandle("nlist");
-  this->firstneigh_tensor->Reshape(
-      {1, nloc, (int)firstneigh.size() / (int)nloc});
-  this->firstneigh_tensor->CopyFromCpu(firstneigh.data());
+  fillNlistTensor(nlist_data.jlist, this->firstneigh_tensor);
   bool do_atom_virial_tensor = atomic;
   if (!fparam.empty()) {
     std::unique_ptr<paddle_infer::Tensor> fparam_tensor;
@@ -510,7 +513,7 @@ void DeepPotPD::compute(ENERGYVTYPE& ener,
   }
 }
 template void DeepPotPD::compute<double, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
+    std::vector<ENERGYTYPE>& ener,
     std::vector<double>& force,
     std::vector<double>& virial,
     std::vector<double>& atom_energy,
@@ -522,11 +525,10 @@ template void DeepPotPD::compute<double, std::vector<ENERGYTYPE>>(
     const InputNlist& lmp_list,
     const int& ago,
     const std::vector<double>& fparam,
-    const std::vector<double>& aparam_,
+    const std::vector<double>& aparam,
     const bool atomic);
-
 template void DeepPotPD::compute<float, std::vector<ENERGYTYPE>>(
-    std::vector<ENERGYTYPE>& dener,
+    std::vector<ENERGYTYPE>& ener,
     std::vector<float>& force,
     std::vector<float>& virial,
     std::vector<float>& atom_energy,
@@ -538,9 +540,8 @@ template void DeepPotPD::compute<float, std::vector<ENERGYTYPE>>(
     const InputNlist& lmp_list,
     const int& ago,
     const std::vector<float>& fparam,
-    const std::vector<float>& aparam_,
+    const std::vector<float>& aparam,
     const bool atomic);
-
 // ENERGYVTYPE: std::vector<ENERGYTYPE> or ENERGYTYPE
 template <typename VALUETYPE, typename ENERGYVTYPE>
 void DeepPotPD::compute(ENERGYVTYPE& ener,
@@ -562,9 +563,9 @@ void DeepPotPD::compute(ENERGYVTYPE& ener,
   coord_wrapped_Tensor->Reshape({1, natoms, 3});
   coord_wrapped_Tensor->CopyFromCpu(coord_wrapped.data());
 
-  std::vector<std::int64_t> atype_64(atype.begin(), atype.end());
   auto atype_Tensor = predictor->GetInputHandle("atype");
   atype_Tensor->Reshape({1, natoms});
+  std::vector<std::int64_t> atype_64(atype.begin(), atype.end());
   atype_Tensor->CopyFromCpu(atype_64.data());
 
   std::unique_ptr<paddle_infer::Tensor> box_Tensor;
@@ -573,15 +574,15 @@ void DeepPotPD::compute(ENERGYVTYPE& ener,
     box_Tensor->Reshape({1, 9});
     box_Tensor->CopyFromCpu((box.data()));
   }
-  std::unique_ptr<paddle_infer::Tensor> fparam_tensor;
   if (!fparam.empty()) {
-    fparam_tensor = predictor->GetInputHandle("box");
+    std::unique_ptr<paddle_infer::Tensor> fparam_tensor;
+    fparam_tensor = predictor->GetInputHandle("fparam");
     fparam_tensor->Reshape({1, static_cast<int>(fparam.size())});
     fparam_tensor->CopyFromCpu((fparam.data()));
   }
-  std::unique_ptr<paddle_infer::Tensor> aparam_tensor;
   if (!aparam.empty()) {
-    aparam_tensor = predictor->GetInputHandle("box");
+    std::unique_ptr<paddle_infer::Tensor> aparam_tensor;
+    aparam_tensor = predictor->GetInputHandle("aparam");
     aparam_tensor->Reshape(
         {1, natoms, static_cast<int>(aparam.size()) / natoms});
     aparam_tensor->CopyFromCpu((aparam.data()));
@@ -628,11 +629,11 @@ void DeepPotPD::compute(ENERGYVTYPE& ener,
 
 template void DeepPotPD::compute<double, std::vector<ENERGYTYPE>>(
     std::vector<ENERGYTYPE>& ener,
-    std::vector<double>& dforce,
+    std::vector<double>& force,
     std::vector<double>& virial,
     std::vector<double>& atom_energy,
     std::vector<double>& atom_virial,
-    const std::vector<double>& dcoord,
+    const std::vector<double>& coord,
     const std::vector<int>& atype,
     const std::vector<double>& box,
     const std::vector<double>& fparam,
@@ -645,7 +646,7 @@ template void DeepPotPD::compute<float, std::vector<ENERGYTYPE>>(
     std::vector<float>& virial,
     std::vector<float>& atom_energy,
     std::vector<float>& atom_virial,
-    const std::vector<float>& dcoord,
+    const std::vector<float>& coord,
     const std::vector<int>& atype,
     const std::vector<float>& box,
     const std::vector<float>& fparam,
diff --git a/source/tests/pd/model/test_atomic_model_atomic_stat.py b/source/tests/pd/model/test_atomic_model_atomic_stat.py
index 93aa7b8905..bfc86edc12 100644
--- a/source/tests/pd/model/test_atomic_model_atomic_stat.py
+++ b/source/tests/pd/model/test_atomic_model_atomic_stat.py
@@ -5,6 +5,7 @@
     Path,
 )
 from typing import (
+    NoReturn,
     Optional,
 )
 
@@ -114,10 +115,10 @@ def forward(
 
 
 class TestAtomicModelStat(unittest.TestCase, TestCaseSingleFrameWithNlist):
-    def tearDown(self):
+    def tearDown(self) -> None:
         self.tempdir.cleanup()
 
-    def setUp(self):
+    def setUp(self) -> None:
         TestCaseSingleFrameWithNlist.setUp(self)
         self.merged_output_stat = [
             {
@@ -171,7 +172,7 @@ def setUp(self):
             pass
         self.stat_file_path = DPPath(h5file, "a")
 
-    def test_output_stat(self):
+    def test_output_stat(self) -> None:
         nf, nloc, nnei = self.nlist.shape
         ds = DescrptDPA1(
             self.rcut,
@@ -237,10 +238,12 @@ def cvt_ret(x):
         expected_ret1["foo"] = ret0["foo"] + foo_bias[at]
         expected_ret1["bar"] = ret0["bar"] + bar_bias[at]
         for kk in ["foo", "bar"]:
-            np.testing.assert_almost_equal(ret1[kk], expected_ret1[kk])
+            np.testing.assert_almost_equal(
+                ret1[kk], expected_ret1[kk], err_msg=f"{kk} not equal"
+            )
 
         # 3. test bias load from file
-        def raise_error():
+        def raise_error() -> NoReturn:
             raise RuntimeError
 
         md0.compute_or_load_out_stat(raise_error, stat_file_path=self.stat_file_path)
@@ -284,10 +287,10 @@ def raise_error():
 class TestAtomicModelStatMergeGlobalAtomic(
     unittest.TestCase, TestCaseSingleFrameWithNlist
 ):
-    def tearDown(self):
+    def tearDown(self) -> None:
         self.tempdir.cleanup()
 
-    def setUp(self):
+    def setUp(self) -> None:
         TestCaseSingleFrameWithNlist.setUp(self)
         self.merged_output_stat = [
             {
@@ -341,7 +344,7 @@ def setUp(self):
             pass
         self.stat_file_path = DPPath(h5file, "a")
 
-    def test_output_stat(self):
+    def test_output_stat(self) -> None:
         nf, nloc, nnei = self.nlist.shape
         ds = DescrptDPA1(
             self.rcut,
@@ -401,7 +404,7 @@ def cvt_ret(x):
             np.testing.assert_almost_equal(ret1[kk], expected_ret1[kk])
 
         # 3. test bias load from file
-        def raise_error():
+        def raise_error() -> NoReturn:
             raise RuntimeError
 
         md0.compute_or_load_out_stat(raise_error, stat_file_path=self.stat_file_path)
diff --git a/source/tests/pd/model/test_deeppot.py b/source/tests/pd/model/test_deeppot.py
new file mode 100644
index 0000000000..24696dea86
--- /dev/null
+++ b/source/tests/pd/model/test_deeppot.py
@@ -0,0 +1,140 @@
+# SPDX-License-Identifier: LGPL-3.0-or-later
+import json
+import os
+import unittest
+from copy import (
+    deepcopy,
+)
+from pathlib import (
+    Path,
+)
+
+import numpy as np
+import paddle
+
+from deepmd.infer.deep_pot import DeepPot as DeepPotUni
+from deepmd.pd.entrypoints.main import (
+    freeze,
+    get_trainer,
+)
+from deepmd.pd.infer.deep_eval import (
+    DeepPot,
+)
+
+
+class TestDeepPot(unittest.TestCase):
+    def setUp(self) -> None:
+        input_json = str(Path(__file__).parent / "water/se_atten.json")
+        with open(input_json) as f:
+            self.config = json.load(f)
+        self.config["training"]["numb_steps"] = 1
+        self.config["training"]["save_freq"] = 1
+        self.config["training"]["training_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/single")
+        ]
+        self.config["training"]["validation_data"]["systems"] = [
+            str(Path(__file__).parent / "water/data/single")
+        ]
+        self.input_json = "test_dp_test.json"
+        with open(self.input_json, "w") as fp:
+            json.dump(self.config, fp, indent=4)
+
+        trainer = get_trainer(deepcopy(self.config))
+        trainer.run()
+
+        ori_place = paddle.device.get_device()
+        paddle.device.set_device("cpu")
+        input_dict, label_dict, _ = trainer.get_data(is_train=False)
+        paddle.device.set_device(ori_place)
+        trainer.wrapper(**input_dict, label=label_dict, cur_lr=1.0)
+        self.model = "model.pd"
+
+    def tearDown(self) -> None:
+        for f in os.listdir("."):
+            if f in ["lcurve.out", self.input_json]:
+                os.remove(f)
+
+    def test_dp_test(self) -> None:
+        dp = DeepPot(str(self.model))
+        cell = np.array(
+            [
+                5.122106549439247480e00,
+                4.016537340154059388e-01,
+                6.951654033828678081e-01,
+                4.016537340154059388e-01,
+                6.112136112297989143e00,
+                8.178091365465004481e-01,
+                6.951654033828678081e-01,
+                8.178091365465004481e-01,
+                6.159552512682983760e00,
+            ]
+        ).reshape(1, 3, 3)
+        coord = np.array(
+            [
+                2.978060152121375648e00,
+                3.588469695887098077e00,
+                2.792459820604495491e00,
+                3.895592322591093115e00,
+                2.712091020667753760e00,
+                1.366836847133650501e00,
+                9.955616170888935690e-01,
+                4.121324820711413039e00,
+                1.817239061889086571e00,
+                3.553661462345699906e00,
+                5.313046969500791583e00,
+                6.635182659098815883e00,
+                6.088601018589653080e00,
+                6.575011420004332585e00,
+                6.825240650611076099e00,
+            ]
+        ).reshape(1, -1, 3)
+        atype = np.array([0, 0, 0, 1, 1]).reshape(1, -1)
+
+        ret = dp.eval(coord, cell, atype, atomic=True)
+        e, f, v, ae, av = ret[0], ret[1], ret[2], ret[3], ret[4]
+        self.assertEqual(e.shape, (1, 1))
+        self.assertEqual(f.shape, (1, 5, 3))
+        self.assertEqual(v.shape, (1, 9))
+        self.assertEqual(ae.shape, (1, 5, 1))
+        self.assertEqual(av.shape, (1, 5, 9))
+
+        self.assertEqual(dp.get_type_map(), ["O", "H"])
+        self.assertEqual(dp.get_ntypes(), 2)
+        self.assertEqual(dp.get_dim_fparam(), 0)
+        self.assertEqual(dp.get_dim_aparam(), 0)
+        self.assertEqual(dp.deep_eval.model_type, DeepPot)
+
+    def test_uni(self) -> None:
+        dp = DeepPotUni("model.pd")
+        self.assertIsInstance(dp, DeepPot)
+        # its methods has been tested in test_dp_test
+
+    def test_eval_typeebd(self) -> None:
+        dp = DeepPot(str(self.model))
+        eval_typeebd = dp.eval_typeebd()
+        self.assertEqual(
+            eval_typeebd.shape, (len(self.config["model"]["type_map"]) + 1, 8)
+        )
+        np.testing.assert_allclose(eval_typeebd[-1], np.zeros_like(eval_typeebd[-1]))
+
+
+@unittest.skip(reason="Freezed model(.json) do not support getting attributes")
+class TestDeepPotFrozen(TestDeepPot):
+    def setUp(self) -> None:
+        super().setUp()
+        frozen_model = "frozen_model.json"
+        freeze(
+            model=self.model,
+            output=frozen_model,
+            head=None,
+            do_atomic_virial=True,
+        )
+        self.model = frozen_model
+
+    # Note: this can not actually disable cuda device to be used
+    # only can be used to test whether devices are mismatched
+    @unittest.skipIf(not paddle.device.is_compiled_with_cuda(), "CUDA not available")
+    @unittest.mock.patch("deepmd.pd.utils.env.DEVICE", paddle.CPUPlace())
+    @unittest.mock.patch("deepmd.pd.infer.deep_eval.DEVICE", paddle.CPUPlace())
+    def test_dp_test_cpu(self) -> None:
+        self.test_dp_test()
diff --git a/source/tests/pd/model/test_descriptor_dpa1.py b/source/tests/pd/model/test_descriptor_dpa1.py
index bfcf4ba6ee..f4cf6a8005 100644
--- a/source/tests/pd/model/test_descriptor_dpa1.py
+++ b/source/tests/pd/model/test_descriptor_dpa1.py
@@ -368,7 +368,7 @@ def translate_se_atten_and_type_embd_dicts_to_dpa1(
     source_dict,
     type_embd_dict,
 ):
-    all_keys = list(target_dict.keys())
+    all_keys = [key for key in target_dict.keys() if "buffer_" not in key]
     record = [False for ii in all_keys]
     for kk, vv in source_dict.items():
         tk = "se_atten." + kk
diff --git a/source/tests/pd/model/test_descriptor_dpa2.py b/source/tests/pd/model/test_descriptor_dpa2.py
index 12017bb840..b8e48580d0 100644
--- a/source/tests/pd/model/test_descriptor_dpa2.py
+++ b/source/tests/pd/model/test_descriptor_dpa2.py
@@ -190,7 +190,7 @@ def translate_type_embd_dicts_to_dpa2(
     source_dict,
     type_embd_dict,
 ):
-    all_keys = list(target_dict.keys())
+    all_keys = [key for key in target_dict.keys() if "buffer_" not in key]
     record = [False for ii in all_keys]
     for kk, vv in source_dict.items():
         record[all_keys.index(kk)] = True