Skip to content

Commit 1a2d878

Browse files
authored
Merge pull request #338 from anyangml2nd/feat/remake-OOD-dataset-v3
Chore: update to OOD-v3 with downsampling
2 parents 61a4fe7 + 9316281 commit 1a2d878

File tree

7 files changed

+94
-95
lines changed

7 files changed

+94
-95
lines changed

lambench/metrics/direct_task_weights.yml

Lines changed: 34 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -11,40 +11,48 @@ HEA25_S:
1111
energy_weight: 1.0
1212
force_weight: 1.0
1313
virial_weight: 1.0
14-
energy_std: 0.4030134901622356
15-
force_std: 1.5479359067976695
16-
virial_std: 1.4293255096528095
14+
energy_std: 0.43116544603059137
15+
force_std: 1.6451851726356517
16+
virial_std: 1.5346916671705537
17+
HEA25_bulk:
18+
domain: Inorganic Materials
19+
energy_weight: 1.0
20+
force_weight: 1.0
21+
virial_weight: 1.0
22+
energy_std: 0.510765388371663
23+
force_std: 2.2626383273307247
24+
virial_std: 2.3506419913635357
1725
MoS2:
1826
domain: Inorganic Materials
1927
energy_weight: 1.0
2028
force_weight: 1.0
2129
virial_weight: 1.0
22-
energy_std: 0.08333066480136275
23-
force_std: 0.9536237886182164
24-
virial_std: 0.42877076652059987
30+
energy_std: 0.08167024857360158
31+
force_std: 0.9568415738836734
32+
virial_std: 0.4161647190798736
2533
MD22:
2634
domain: Molecules
2735
energy_weight: 1.0
2836
force_weight: 1.0
2937
virial_weight: null
30-
energy_std: 0.007773825548398275
31-
force_std: 1.1374624718556865
38+
energy_std: 0.007941836149915322
39+
force_std: 1.1391327961625524
3240
virial_std: null
3341
REANN_CO2_Ni100:
3442
domain: Catalysis
3543
energy_weight: 1.0
3644
force_weight: 1.0
3745
virial_weight: null
38-
energy_std: 0.011019047357696455
39-
force_std: 0.6822029356434589
46+
energy_std: 0.01166496756645258
47+
force_std: 0.691476732207179
4048
virial_std: null
4149
NequIP_NC_2022:
4250
domain: Inorganic Materials
4351
energy_weight: 1.0
4452
force_weight: 1.0
4553
virial_weight: null
46-
energy_std: 0.007872460518687731
47-
force_std: 0.9434954645234603
54+
energy_std: 0.007043606595996931
55+
force_std: 0.9434989207476847
4856
virial_std: null
4957
AIMD-Chig:
5058
domain: Molecules
@@ -54,29 +62,21 @@ AIMD-Chig:
5462
energy_std: 0.00937981704049093
5563
force_std: 0.8699872017197637
5664
virial_std: null
57-
CGM_MLP_NC2023:
58-
domain: Catalysis
59-
energy_weight: 1.0
60-
force_weight: 1.0
61-
virial_weight: null
62-
energy_std: 0.9875093490539156
63-
force_std: 2.022155629243275
64-
virial_std: null
6565
Cu_MgO_catalysts:
6666
domain: Catalysis
6767
energy_weight: 1.0
6868
force_weight: 1.0
6969
virial_weight: null
70-
energy_std: 0.013837285997415875
71-
force_std: 0.18923742282763548
70+
energy_std: 0.014202887558640341
71+
force_std: 0.18708301630679647
7272
virial_std: null
7373
Ca_batteries_CM2021:
7474
domain: Inorganic Materials
7575
energy_weight: 1.0
7676
force_weight: 1.0
7777
virial_weight: null
78-
energy_std: 0.016540792734058157
79-
force_std: 0.17821831827935788
78+
energy_std: 0.015689329430759177
79+
force_std: 0.17632637938539042
8080
virial_std: null
8181
HPt_NC_2022:
8282
domain: Catalysis
@@ -91,12 +91,20 @@ Si_ZEO22:
9191
energy_weight: 1.0
9292
force_weight: 1.0
9393
virial_weight: null
94-
energy_std: 0.03534121167926313
95-
force_std: 1.2410267785352673
94+
energy_std: 0.035252909678594395
95+
force_std: 1.232529293429265
9696
virial_std: null
9797

9898

9999
## DEPRECATED
100+
# CGM_MLP_NC2023:
101+
# domain: Catalysis
102+
# energy_weight: 1.0
103+
# force_weight: 1.0
104+
# virial_weight: null
105+
# energy_std: 0.9875093490539156
106+
# force_std: 2.022155629243275
107+
# virial_std: null
100108
# Collision:
101109
# domain: Reactions
102110
# energy_weight: 1.0
@@ -129,14 +137,6 @@ Si_ZEO22:
129137
# energy_std: 0.7749643377228371
130138
# force_std: 1.1503770816187873
131139
# virial_std: 0.8678699239404154
132-
# HEA25_bulk:
133-
# domain: Inorganic Materials
134-
# energy_weight: 1.0
135-
# force_weight: 1.0
136-
# virial_weight: 1.0
137-
# energy_std: 0.4086027291354181
138-
# force_std: 2.075184012071992
139-
# virial_std: 2.065014356039771
140140
# HEMC_HEMB:
141141
# domain: Inorganic Materials
142142
# energy_weight: 1.0

lambench/metrics/results/README.md

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@ Figure 2: Accuracy-Efficiency Trade-off, $\bar{M}^m_{FF}$ vs $M_E^m$.
3434

3535
We categorize all force-field prediction tasks into 3 domains:
3636

37-
- **Inorganic Materials**: `Torres2019Analysis`, `Batzner2022equivariant`, `Sours2023Applications`, `Lopanitsyna2023Modeling_A`, `Lopanitsyna2023Modeling_B`, `Gao2025Spontaneous`
37+
- **Inorganic Materials**: `Torres2019Analysis`, `Batzner2022equivariant`, `Sours2023Applications`, `Lopanitsyna2023Modeling`, `Mazitov2024Surface`, `Gao2025Spontaneous`
3838
- **Molecules**: `ANI-1x`, `MD22`, `AIMD-Chig`
39-
- **Catalysis**: `Vandermause2022Active`, `Zhang2019Bridging`, `Zhang2024Active`, `Villanueva2024Water`
39+
- **Catalysis**: `Vandermause2022Active`, `Zhang2019Bridging`, `Villanueva2024Water`
4040

4141
To assess model performance across these domains, we use zero-shot inference with energy-bias term adjustments based on test dataset statistics. Performance metrics are aggregated as follows:
4242

lambench/metrics/results/metadata.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -296,10 +296,10 @@
296296
"DESCRIPTION": "The mean absolute error of the virial prediction per atom."
297297
}
298298
},
299-
"CGM_MLP_NC2023": {
300-
"DISPLAY_NAME": "Zhang2024Active",
301-
"DESCRIPTION": "Dynamic simulations of carbon deposition on metal surfaces like Cu(111), Cr(110), Ti(001), and oxygen-contaminated Cu(111). Calculations were performed using CP2K with PBE-D3/DZVP-SR-GTH and 300Ry/60Ry cutoff. [https://www.nature.com/articles/s41467-023-44525-z]",
302-
"domain": "Catalysis",
299+
"HEA25_bulk": {
300+
"DISPLAY_NAME": "Lopanitsyna2023Modeling",
301+
"DESCRIPTION": "A dataset of high entropy alloy bulk structures, focusing on 25 d-block transition metals, excluding Tc, Cd, Re, Os and Hg. The original dataset were calculated using VASP with PBEsol/PAW, 550 eV cutoff, and Γ-centered k-points. [https://arxiv.org/abs/2212.13254]. The dataset was relabeled with VASP at the PBE level.",
302+
"domain": "Inorganic Materials",
303303
"energy_rmse": {
304304
"DISPLAY_NAME": "E RMSE (meV)",
305305
"DESCRIPTION": "The root mean squared error of the energy prediction.",
@@ -542,8 +542,8 @@
542542
}
543543
},
544544
"HEA25_S": {
545-
"DISPLAY_NAME": "Lopanitsyna2023Modeling",
546-
"DESCRIPTION": "A dataset of 25-atom high entropy alloy surfaces, focusing on 25 d-block transition metals, excluding Tc, Cd, Re, Os and Hg. Calculations were performed using VASP with PBEsol/PAW, 550 eV cutoff, and Γ-centered k-points. [https://arxiv.org/abs/2212.13254]",
545+
"DISPLAY_NAME": "Mazitov2024Surface",
546+
"DESCRIPTION": "A dataset of high entropy alloy surfaces, focusing on 25 d-block transition metals, excluding Tc, Cd, Re, Os and Hg. The original dataset were calculated using VASP with PBEsol/PAW, 550 eV cutoff, and Γ-centered k-points. [https://arxiv.org/abs/2212.13254]. The dataset was relabeled with VASP at the PBE level.",
547547
"domain": "Inorganic Materials",
548548
"energy_rmse": {
549549
"DISPLAY_NAME": "E RMSE (meV)",

lambench/tasks/direct/direct_tasks.yml

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,27 +1,27 @@
11
ANI:
2-
test_data: "/bohr/lambench-ood-zwtr/v2/LAMBench-TestData-v2/ANI"
2+
test_data: "/bohr/lambench-ood-zwtr/v3/LAMBench-TestData-v3/ANI"
33
HEA25_S:
4-
test_data: "/bohr/lambench-ood-zwtr/v2/LAMBench-TestData-v2/HEA25S"
4+
test_data: "/bohr/lambench-ood-zwtr/v3/LAMBench-TestData-v3/HEA25S"
5+
HEA25_bulk:
6+
test_data: "/bohr/lambench-ood-zwtr/v3/LAMBench-TestData-v3/HEA25"
57
MoS2:
6-
test_data: "/bohr/lambench-ood-zwtr/v2/LAMBench-TestData-v2/MoS2"
8+
test_data: "/bohr/lambench-ood-zwtr/v3/LAMBench-TestData-v3/MoS2"
79
MD22:
8-
test_data: "/bohr/lambench-ood-zwtr/v2/LAMBench-TestData-v2/MD22"
10+
test_data: "/bohr/lambench-ood-zwtr/v3/LAMBench-TestData-v3/MD22"
911
REANN_CO2_Ni100:
10-
test_data: "/bohr/lambench-ood-zwtr/v2/LAMBench-TestData-v2/REANN_CO2_Ni100"
12+
test_data: "/bohr/lambench-ood-zwtr/v3/LAMBench-TestData-v3/REANN_CO2_Ni100"
1113
NequIP_NC_2022:
12-
test_data: "/bohr/lambench-ood-zwtr/v2/LAMBench-TestData-v2/NequIP_NC_2022"
14+
test_data: "/bohr/lambench-ood-zwtr/v3/LAMBench-TestData-v3/NequIP_NC_2022"
1315
AIMD-Chig:
14-
test_data: "/bohr/lambench-ood-zwtr/v2/LAMBench-TestData-v2/AIMD_chig"
15-
CGM_MLP_NC2023:
16-
test_data: "/bohr/lambench-ood-zwtr/v2/LAMBench-TestData-v2/CGM_MLP"
16+
test_data: "/bohr/lambench-ood-zwtr/v3/LAMBench-TestData-v3/AIMD_chig"
1717
Cu_MgO_catalysts:
18-
test_data: "/bohr/lambench-ood-zwtr/v2/LAMBench-TestData-v2/Cu_MgO_CO2"
18+
test_data: "/bohr/lambench-ood-zwtr/v3/LAMBench-TestData-v3/Cu_MgO_CO2"
1919
Si_ZEO22:
20-
test_data: "/bohr/lambench-ood-zwtr/v2/LAMBench-TestData-v2/Si_ZEO22"
20+
test_data: "/bohr/lambench-ood-zwtr/v3/LAMBench-TestData-v3/Si_ZEO22"
2121
HPt_NC_2022:
22-
test_data: "/bohr/lambench-ood-zwtr/v2/LAMBench-TestData-v2/HPt_NC2022"
22+
test_data: "/bohr/lambench-ood-zwtr/v3/LAMBench-TestData-v3/HPt_NC2022"
2323
Ca_batteries_CM2021:
24-
test_data: "/bohr/lambench-ood-zwtr/v2/LAMBench-TestData-v2/Ca_batteries"
24+
test_data: "/bohr/lambench-ood-zwtr/v3/LAMBench-TestData-v3/Ca_batteries"
2525
## DEPRECATED
2626
# Collision:
2727
# test_data: "/bohr/lambench-ood-zwtr/v2/LAMBench-TestData-v2/Collision"
@@ -31,8 +31,8 @@ Ca_batteries_CM2021:
3131
# test_data: "/bohr/lambench-ood-zwtr/v1/OOD_test_data_v2/subalex_downsample_9k"
3232
# WBM_downsampled:
3333
# test_data: "/bohr/lambench-ood-zwtr/v1/OOD_test_data_v2/WBM_downsampled"
34-
# HEA25_bulk:
35-
# test_data: "/bohr/lambench-ood-zwtr/v1/OOD_test_data_v2/HEA25"
34+
# CGM_MLP_NC2023:
35+
# test_data: "/bohr/lambench-ood-zwtr/v3/LAMBench-TestData-v3/CGM_MLP"
3636
# HEMC_HEMB:
3737
# test_data: "/bohr/lambench-ood-zwtr/v1/OOD_test_data_v2/HEMC_HEMB"
3838
# Torsionnet500:

tests/metrics/conftest.py

Lines changed: 32 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,22 @@
3737
virial_rmse_natoms=4.42881,
3838
virial_mae_natoms=2.02307,
3939
),
40+
DirectPredictRecord(
41+
id=3,
42+
model_name="test_dp",
43+
task_name="HEA25_bulk",
44+
create_time=None,
45+
energy_rmse=5.76976,
46+
energy_mae=3.89511,
47+
energy_rmse_natoms=0.134269,
48+
energy_mae_natoms=0.0909164,
49+
force_rmse=0.345338,
50+
force_mae=0.209072,
51+
virial_rmse=167.769,
52+
virial_mae=63.4004,
53+
virial_rmse_natoms=3.87829,
54+
virial_mae_natoms=1.47443,
55+
),
4056
DirectPredictRecord(
4157
id=3,
4258
model_name="test_dp",
@@ -117,22 +133,22 @@
117133
virial_rmse_natoms=None,
118134
virial_mae_natoms=None,
119135
),
120-
DirectPredictRecord(
121-
id=12,
122-
model_name="test_dp",
123-
task_name="Cu_MgO_catalysts",
124-
create_time=None,
125-
energy_rmse=0.267982,
126-
energy_mae=0.153377,
127-
energy_rmse_natoms=0.0035446,
128-
energy_mae_natoms=0.00229624,
129-
force_rmse=0.0584197,
130-
force_mae=0.038047,
131-
virial_rmse=None,
132-
virial_mae=None,
133-
virial_rmse_natoms=None,
134-
virial_mae_natoms=None,
135-
),
136+
# DirectPredictRecord(
137+
# id=12,
138+
# model_name="test_dp",
139+
# task_name="Cu_MgO_catalysts",
140+
# create_time=None,
141+
# energy_rmse=0.267982,
142+
# energy_mae=0.153377,
143+
# energy_rmse_natoms=0.0035446,
144+
# energy_mae_natoms=0.00229624,
145+
# force_rmse=0.0584197,
146+
# force_mae=0.038047,
147+
# virial_rmse=None,
148+
# virial_mae=None,
149+
# virial_rmse_natoms=None,
150+
# virial_mae_natoms=None,
151+
# ),
136152
DirectPredictRecord(
137153
id=13,
138154
model_name="test_dp",
@@ -183,22 +199,6 @@
183199
),
184200
## Deprecated
185201
# DirectPredictRecord(
186-
# id=3,
187-
# model_name="test_dp",
188-
# task_name="HEA25_bulk",
189-
# create_time=None,
190-
# energy_rmse=5.76976,
191-
# energy_mae=3.89511,
192-
# energy_rmse_natoms=0.134269,
193-
# energy_mae_natoms=0.0909164,
194-
# force_rmse=0.345338,
195-
# force_mae=0.209072,
196-
# virial_rmse=167.769,
197-
# virial_mae=63.4004,
198-
# virial_rmse_natoms=3.87829,
199-
# virial_mae_natoms=1.47443,
200-
# ),
201-
# DirectPredictRecord(
202202
# id=4,
203203
# model_name="test_dp",
204204
# task_name="HEMC_HEMB",

tests/metrics/test_post_process.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,7 @@ def test_process_results_for_one_model(
2121

2222
assert DIRECT_TASK_WEIGHTS.keys() - result[
2323
"generalizability_force_field_results"
24-
].keys() == {
25-
"CGM_MLP_NC2023",
26-
}
24+
].keys() == {"Cu_MgO_catalysts"}
2725
with caplog.at_level(logging.WARNING):
2826
assert (
2927
"Weighted results for test_dp are marked as None due to missing tasks: "

tests/metrics/test_visualization.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@ def test_aggregate_ood_results_for_one_model(
1414
model.show_calculator_task = False
1515
aggregator = ResultsFetcher()
1616
result = aggregator.aggregate_ood_results_for_one_model(model=model)
17-
np.testing.assert_almost_equal(result["Molecules"], 0.23582485, decimal=5)
18-
np.testing.assert_almost_equal(result["Inorganic Materials"], 0.2931686, decimal=5)
17+
np.testing.assert_almost_equal(result["Molecules"], 0.234724350, decimal=5)
18+
np.testing.assert_almost_equal(result["Inorganic Materials"], 0.2972349, decimal=5)
1919
assert result["Catalysis"] is None
2020
with caplog.at_level(logging.WARNING):
2121
assert (
22-
"Expect one record for test_dp and CGM_MLP_NC2023, but got 0" in caplog.text
22+
"Expect one record for test_dp and Cu_MgO_catalysts, but got 0"
23+
in caplog.text
2324
)

0 commit comments

Comments
 (0)