Skip to content

Commit ff5455d

Browse files
committed
update tuning tests: increase n_obs to 500, dgp_type to 4, and set n_folds to 5; adjust DecisionTreeRegressor to max_depth=1 to underfit without tuning
1 parent 29b7fd7 commit ff5455d

File tree

4 files changed

+18
-25
lines changed

4 files changed

+18
-25
lines changed

doubleml/did/tests/test_did_binary_tune_ml_models.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
def test_doubleml_did_binary_optuna_tune(sampler_name, optuna_sampler, score):
2222
np.random.seed(3152)
2323
df_panel = make_did_CS2021(
24-
n_obs=200,
25-
dgp_type=1,
24+
n_obs=500,
25+
dgp_type=4,
2626
include_never_treated=True,
2727
time_type="float",
2828
n_periods=4,
@@ -39,9 +39,8 @@ def test_doubleml_did_binary_optuna_tune(sampler_name, optuna_sampler, score):
3939

4040
g_value, t_value_pre, t_value_eval = _select_binary_periods(panel_data)
4141

42-
ml_g = DecisionTreeRegressor(random_state=321)
42+
ml_g = DecisionTreeRegressor(random_state=321, max_depth=1) # underfit
4343
ml_m = DecisionTreeClassifier(random_state=654)
44-
4544
dml_did_binary = DoubleMLDIDBinary(
4645
obj_dml_data=panel_data,
4746
g_value=g_value,

doubleml/did/tests/test_did_cs_binary_tune_ml_models.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,8 @@
2121
def test_doubleml_did_cs_binary_optuna_tune(sampler_name, optuna_sampler, score):
2222
np.random.seed(3153)
2323
df_panel = make_did_cs_CS2021(
24-
n_obs=200,
25-
dgp_type=2,
24+
n_obs=500,
25+
dgp_type=4,
2626
include_never_treated=True,
2727
time_type="float",
2828
)
@@ -35,10 +35,9 @@ def test_doubleml_did_cs_binary_optuna_tune(sampler_name, optuna_sampler, score)
3535
x_cols=["Z1", "Z2", "Z3", "Z4"],
3636
)
3737
print(df_panel.head())
38-
theta = df_panel["y1"].mean()
3938
g_value, t_value_pre, t_value_eval = _select_binary_periods(panel_data)
4039

41-
ml_g = DecisionTreeRegressor(random_state=321)
40+
ml_g = DecisionTreeRegressor(random_state=321, max_depth=1) # underfit
4241
ml_m = DecisionTreeClassifier(random_state=654)
4342

4443
dml_did_cs_binary = DoubleMLDIDCSBinary(
@@ -49,11 +48,10 @@ def test_doubleml_did_cs_binary_optuna_tune(sampler_name, optuna_sampler, score)
4948
ml_g=ml_g,
5049
ml_m=ml_m,
5150
score=score,
52-
n_folds=2,
51+
n_folds=5,
5352
)
5453
dml_did_cs_binary.fit()
5554
untuned_score = dml_did_cs_binary.evaluate_learners()
56-
untuned_bias = np.abs(dml_did_cs_binary.coef - theta)
5755

5856
optuna_params = _build_param_space(dml_did_cs_binary, _small_tree_params)
5957

@@ -64,14 +62,10 @@ def test_doubleml_did_cs_binary_optuna_tune(sampler_name, optuna_sampler, score)
6462

6563
dml_did_cs_binary.fit()
6664
tuned_score = dml_did_cs_binary.evaluate_learners()
67-
tuned_bias = np.abs(dml_did_cs_binary.coef - theta)
6865

6966
for learner_name in dml_did_cs_binary.params_names:
7067
tuned_params = tune_res[0][learner_name].best_params
7168
_assert_tree_params(tuned_params)
7269

73-
# ensure tuning improved RMSE
74-
assert tuned_score[learner_name] < untuned_score[learner_name]
75-
76-
# ensure tuning improved bias
77-
assert tuned_bias <= untuned_bias
70+
# ensure tuning improved RMSE or LogLoss
71+
assert tuned_score[learner_name] < untuned_score[learner_name]

doubleml/did/tests/test_did_cs_tune_ml_models.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -19,18 +19,18 @@
1919
def test_doubleml_did_cs_optuna_tune(sampler_name, optuna_sampler, score):
2020
np.random.seed(3151)
2121
dml_data = make_did_SZ2020(
22-
n_obs=200,
23-
dgp_type=2,
22+
n_obs=500,
23+
dgp_type=4,
2424
cross_sectional_data=True,
2525
return_type="DoubleMLDIDData",
2626
)
2727

28-
ml_g = DecisionTreeRegressor(random_state=321)
28+
ml_g = DecisionTreeRegressor(random_state=321, max_depth=1) # underfit
2929
if score == "observational":
3030
ml_m = DecisionTreeClassifier(random_state=654)
31-
dml_did_cs = dml.DoubleMLDIDCS(dml_data, ml_g, ml_m, score=score, n_folds=2)
31+
dml_did_cs = dml.DoubleMLDIDCS(dml_data, ml_g, ml_m, score=score, n_folds=5)
3232
else:
33-
dml_did_cs = dml.DoubleMLDIDCS(dml_data, ml_g, score=score, n_folds=2)
33+
dml_did_cs = dml.DoubleMLDIDCS(dml_data, ml_g, score=score, n_folds=5)
3434
dml_did_cs.fit()
3535
untuned_score = dml_did_cs.evaluate_learners()
3636

doubleml/did/tests/test_did_tune_ml_models.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,14 +20,14 @@ def test_doubleml_did_optuna_tune(sampler_name, optuna_sampler, score):
2020
"""Test DID with ml_g0, ml_g1 (and ml_m for observational score) nuisance models."""
2121

2222
np.random.seed(3150)
23-
dml_data = make_did_SZ2020(n_obs=200, dgp_type=1, return_type="DoubleMLDIDData")
23+
dml_data = make_did_SZ2020(n_obs=500, dgp_type=4, return_type="DoubleMLDIDData")
2424

25-
ml_g = DecisionTreeRegressor(random_state=321)
25+
ml_g = DecisionTreeRegressor(random_state=321, max_depth=1) # underfit
2626
if score == "observational":
2727
ml_m = DecisionTreeClassifier(random_state=654)
28-
dml_did = dml.DoubleMLDID(dml_data, ml_g, ml_m, score=score, n_folds=2)
28+
dml_did = dml.DoubleMLDID(dml_data, ml_g, ml_m, score=score, n_folds=5)
2929
else:
30-
dml_did = dml.DoubleMLDID(dml_data, ml_g, score=score, n_folds=2)
30+
dml_did = dml.DoubleMLDID(dml_data, ml_g, score=score, n_folds=5)
3131
dml_did.fit()
3232
untuned_score = dml_did.evaluate_learners()
3333

0 commit comments

Comments
 (0)