Skip to content

Commit f9f1759

Browse files
Refactor property fitting interface (#4471)
Two main changes: 1. The program can read data files in `npy` format with any prefix(`tc.npy`, `band_gap.npy`...). One just needs to write the name of the property and the corresponding dimension in `model/fitting` in `input.json`. 2. Data normalisation has been added to the program. Specifically, the mean and standard deviation of the properties are calculated when calculating the stat, the output of `fitting_net` is multiplied by the standard deviation and then added to the mean to get the output of the `PropertyModel`, and when calculating the loss, the loss is again normalised. <!-- This is an auto-generated comment: release notes by coderabbit.ai --> ## Summary by CodeRabbit ## Release Notes - **New Features** - Introduced new parameters for property loss calculations and model fitting, enhancing flexibility and functionality. - Added methods for retrieving property names and checking property intensity across various classes. - **Bug Fixes** - Improved validation and error handling for property-related calculations. - **Documentation** - Enhanced documentation for property fitting, including detailed parameter explanations and structured training examples. - Added documentation for new parameters in the JSON configuration related to property fitting. - **Tests** - Added new test cases to validate the functionality of updated methods and properties. - Updated existing tests to utilize dynamic property names instead of hardcoded strings. - **Chores** - Updated configuration files and test data to align with new property handling features. <!-- end of auto-generated comment: release notes by coderabbit.ai --> --------- Signed-off-by: Chenqqian Zhang <100290172+Chengqian-Zhang@users.noreply.github.com> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
1 parent f8605ee commit f9f1759

File tree

43 files changed

+652
-146
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+652
-146
lines changed

deepmd/dpmodel/atomic_model/__init__.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,9 @@
4242
from .polar_atomic_model import (
4343
DPPolarAtomicModel,
4444
)
45+
from .property_atomic_model import (
46+
DPPropertyAtomicModel,
47+
)
4548

4649
__all__ = [
4750
"BaseAtomicModel",
@@ -50,6 +53,7 @@
5053
"DPDipoleAtomicModel",
5154
"DPEnergyAtomicModel",
5255
"DPPolarAtomicModel",
56+
"DPPropertyAtomicModel",
5357
"DPZBLLinearEnergyAtomicModel",
5458
"LinearEnergyAtomicModel",
5559
"PairTabAtomicModel",

deepmd/dpmodel/atomic_model/property_atomic_model.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
11
# SPDX-License-Identifier: LGPL-3.0-or-later
2+
import numpy as np
3+
24
from deepmd.dpmodel.fitting.property_fitting import (
35
PropertyFittingNet,
46
)
@@ -15,3 +17,25 @@ def __init__(self, descriptor, fitting, type_map, **kwargs):
1517
"fitting must be an instance of PropertyFittingNet for DPPropertyAtomicModel"
1618
)
1719
super().__init__(descriptor, fitting, type_map, **kwargs)
20+
21+
def apply_out_stat(
22+
self,
23+
ret: dict[str, np.ndarray],
24+
atype: np.ndarray,
25+
):
26+
"""Apply the stat to each atomic output.
27+
28+
In property fitting, each output will be multiplied by label std and then plus the label average value.
29+
30+
Parameters
31+
----------
32+
ret
33+
The returned dict by the forward_atomic method
34+
atype
35+
The atom types. nf x nloc. It is useless in property fitting.
36+
37+
"""
38+
out_bias, out_std = self._fetch_out_stat(self.bias_keys)
39+
for kk in self.bias_keys:
40+
ret[kk] = ret[kk] * out_std[kk][0] + out_bias[kk][0]
41+
return ret

deepmd/dpmodel/fitting/property_fitting.py

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,9 @@ class PropertyFittingNet(InvarFitting):
4141
this list is of length :math:`N_l + 1`, specifying if the hidden layers and the output layer are trainable.
4242
intensive
4343
Whether the fitting property is intensive.
44-
bias_method
45-
The method of applying the bias to each atomic output, user can select 'normal' or 'no_bias'.
46-
If 'normal' is used, the computed bias will be added to the atomic output.
47-
If 'no_bias' is used, no bias will be added to the atomic output.
44+
property_name:
45+
The name of fitting property, which should be consistent with the property name in the dataset.
46+
If the data file is named `humo.npy`, this parameter should be "humo".
4847
resnet_dt
4948
Time-step `dt` in the resnet construction:
5049
:math:`y = x + dt * \phi (Wx + b)`
@@ -74,7 +73,7 @@ def __init__(
7473
rcond: Optional[float] = None,
7574
trainable: Union[bool, list[bool]] = True,
7675
intensive: bool = False,
77-
bias_method: str = "normal",
76+
property_name: str = "property",
7877
resnet_dt: bool = True,
7978
numb_fparam: int = 0,
8079
numb_aparam: int = 0,
@@ -89,9 +88,8 @@ def __init__(
8988
) -> None:
9089
self.task_dim = task_dim
9190
self.intensive = intensive
92-
self.bias_method = bias_method
9391
super().__init__(
94-
var_name="property",
92+
var_name=property_name,
9593
ntypes=ntypes,
9694
dim_descrpt=dim_descrpt,
9795
dim_out=task_dim,
@@ -113,9 +111,9 @@ def __init__(
113111
@classmethod
114112
def deserialize(cls, data: dict) -> "PropertyFittingNet":
115113
data = data.copy()
116-
check_version_compatibility(data.pop("@version"), 3, 1)
114+
check_version_compatibility(data.pop("@version"), 4, 1)
117115
data.pop("dim_out")
118-
data.pop("var_name")
116+
data["property_name"] = data.pop("var_name")
119117
data.pop("tot_ener_zero")
120118
data.pop("layer_name")
121119
data.pop("use_aparam_as_mask", None)
@@ -131,6 +129,8 @@ def serialize(self) -> dict:
131129
**InvarFitting.serialize(self),
132130
"type": "property",
133131
"task_dim": self.task_dim,
132+
"intensive": self.intensive,
134133
}
134+
dd["@version"] = 4
135135

136136
return dd

deepmd/dpmodel/model/property_model.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# SPDX-License-Identifier: LGPL-3.0-or-later
2-
from deepmd.dpmodel.atomic_model.dp_atomic_model import (
3-
DPAtomicModel,
2+
from deepmd.dpmodel.atomic_model import (
3+
DPPropertyAtomicModel,
44
)
55
from deepmd.dpmodel.model.base_model import (
66
BaseModel,
@@ -13,7 +13,7 @@
1313
make_model,
1414
)
1515

16-
DPPropertyModel_ = make_model(DPAtomicModel)
16+
DPPropertyModel_ = make_model(DPPropertyAtomicModel)
1717

1818

1919
@BaseModel.register("property")

deepmd/entrypoints/test.py

Lines changed: 14 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -779,9 +779,17 @@ def test_property(
779779
tuple[list[np.ndarray], list[int]]
780780
arrays with results and their shapes
781781
"""
782-
data.add("property", dp.task_dim, atomic=False, must=True, high_prec=True)
782+
var_name = dp.get_var_name()
783+
assert isinstance(var_name, str)
784+
data.add(var_name, dp.task_dim, atomic=False, must=True, high_prec=True)
783785
if has_atom_property:
784-
data.add("atom_property", dp.task_dim, atomic=True, must=False, high_prec=True)
786+
data.add(
787+
f"atom_{var_name}",
788+
dp.task_dim,
789+
atomic=True,
790+
must=False,
791+
high_prec=True,
792+
)
785793

786794
if dp.get_dim_fparam() > 0:
787795
data.add(
@@ -832,12 +840,12 @@ def test_property(
832840
aproperty = ret[1]
833841
aproperty = aproperty.reshape([numb_test, natoms * dp.task_dim])
834842

835-
diff_property = property - test_data["property"][:numb_test]
843+
diff_property = property - test_data[var_name][:numb_test]
836844
mae_property = mae(diff_property)
837845
rmse_property = rmse(diff_property)
838846

839847
if has_atom_property:
840-
diff_aproperty = aproperty - test_data["atom_property"][:numb_test]
848+
diff_aproperty = aproperty - test_data[f"atom_{var_name}"][:numb_test]
841849
mae_aproperty = mae(diff_aproperty)
842850
rmse_aproperty = rmse(diff_aproperty)
843851

@@ -854,7 +862,7 @@ def test_property(
854862
detail_path = Path(detail_file)
855863

856864
for ii in range(numb_test):
857-
test_out = test_data["property"][ii].reshape(-1, 1)
865+
test_out = test_data[var_name][ii].reshape(-1, 1)
858866
pred_out = property[ii].reshape(-1, 1)
859867

860868
frame_output = np.hstack((test_out, pred_out))
@@ -868,7 +876,7 @@ def test_property(
868876

869877
if has_atom_property:
870878
for ii in range(numb_test):
871-
test_out = test_data["atom_property"][ii].reshape(-1, 1)
879+
test_out = test_data[f"atom_{var_name}"][ii].reshape(-1, 1)
872880
pred_out = aproperty[ii].reshape(-1, 1)
873881

874882
frame_output = np.hstack((test_out, pred_out))

deepmd/infer/deep_eval.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,8 +70,6 @@ class DeepEvalBackend(ABC):
7070
"dipole_derv_c_redu": "virial",
7171
"dos": "atom_dos",
7272
"dos_redu": "dos",
73-
"property": "atom_property",
74-
"property_redu": "property",
7573
"mask_mag": "mask_mag",
7674
"mask": "mask",
7775
# old models in v1
@@ -276,6 +274,10 @@ def get_has_spin(self) -> bool:
276274
"""Check if the model has spin atom types."""
277275
return False
278276

277+
def get_var_name(self) -> str:
278+
"""Get the name of the fitting property."""
279+
raise NotImplementedError
280+
279281
@abstractmethod
280282
def get_ntypes_spin(self) -> int:
281283
"""Get the number of spin atom types of this model. Only used in old implement."""

deepmd/infer/deep_property.py

Lines changed: 33 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,25 +37,41 @@ class DeepProperty(DeepEval):
3737
Keyword arguments.
3838
"""
3939

40-
@property
4140
def output_def(self) -> ModelOutputDef:
42-
"""Get the output definition of this model."""
43-
return ModelOutputDef(
41+
"""
42+
Get the output definition of this model.
43+
But in property_fitting, the output definition is not known until the model is loaded.
44+
So we need to rewrite the output definition after the model is loaded.
45+
See detail in change_output_def.
46+
"""
47+
pass
48+
49+
def change_output_def(self) -> None:
50+
"""
51+
Change the output definition of this model.
52+
In property_fitting, the output definition is known after the model is loaded.
53+
We need to rewrite the output definition and related information.
54+
"""
55+
self.output_def = ModelOutputDef(
4456
FittingOutputDef(
4557
[
4658
OutputVariableDef(
47-
"property",
48-
shape=[-1],
59+
self.get_var_name(),
60+
shape=[self.get_task_dim()],
4961
reducible=True,
5062
atomic=True,
63+
intensive=self.get_intensive(),
5164
),
5265
]
5366
)
5467
)
55-
56-
def change_output_def(self) -> None:
57-
self.output_def["property"].shape = self.task_dim
58-
self.output_def["property"].intensive = self.get_intensive()
68+
self.deep_eval.output_def = self.output_def
69+
self.deep_eval._OUTDEF_DP2BACKEND[self.get_var_name()] = (
70+
f"atom_{self.get_var_name()}"
71+
)
72+
self.deep_eval._OUTDEF_DP2BACKEND[f"{self.get_var_name()}_redu"] = (
73+
self.get_var_name()
74+
)
5975

6076
@property
6177
def task_dim(self) -> int:
@@ -120,10 +136,12 @@ def eval(
120136
aparam=aparam,
121137
**kwargs,
122138
)
123-
atomic_property = results["property"].reshape(
139+
atomic_property = results[self.get_var_name()].reshape(
124140
nframes, natoms, self.get_task_dim()
125141
)
126-
property = results["property_redu"].reshape(nframes, self.get_task_dim())
142+
property = results[f"{self.get_var_name()}_redu"].reshape(
143+
nframes, self.get_task_dim()
144+
)
127145

128146
if atomic:
129147
return (
@@ -141,5 +159,9 @@ def get_intensive(self) -> bool:
141159
"""Get whether the property is intensive."""
142160
return self.deep_eval.get_intensive()
143161

162+
def get_var_name(self) -> str:
163+
"""Get the name of the fitting property."""
164+
return self.deep_eval.get_var_name()
165+
144166

145167
__all__ = ["DeepProperty"]

deepmd/pt/infer/deep_eval.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,15 @@ def get_dim_aparam(self) -> int:
184184
def get_intensive(self) -> bool:
185185
return self.dp.model["Default"].get_intensive()
186186

187+
def get_var_name(self) -> str:
188+
"""Get the name of the property."""
189+
if hasattr(self.dp.model["Default"], "get_var_name") and callable(
190+
getattr(self.dp.model["Default"], "get_var_name")
191+
):
192+
return self.dp.model["Default"].get_var_name()
193+
else:
194+
raise NotImplementedError
195+
187196
@property
188197
def model_type(self) -> type["DeepEvalWrapper"]:
189198
"""The the evaluator of the model type."""
@@ -200,7 +209,7 @@ def model_type(self) -> type["DeepEvalWrapper"]:
200209
return DeepGlobalPolar
201210
elif "wfc" in model_output_type:
202211
return DeepWFC
203-
elif "property" in model_output_type:
212+
elif self.get_var_name() in model_output_type:
204213
return DeepProperty
205214
else:
206215
raise RuntimeError("Unknown model type")

0 commit comments

Comments
 (0)