diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml index 2c77cac5..6cab94cb 100644 --- a/.github/workflows/pytest.yml +++ b/.github/workflows/pytest.yml @@ -58,6 +58,7 @@ jobs: matrix.config.os != 'ubuntu-latest' || matrix.config.python-version != '3.12' run: | + pytest --doctest-modules --ignore-glob="doubleml/**/tests/*" --ignore-glob="doubleml/tests/*" pytest -m ci pytest -m ci_rdd diff --git a/doubleml/data/tests/test_dml_data.py b/doubleml/data/tests/test_dml_data.py index 542bbb61..a745fb12 100644 --- a/doubleml/data/tests/test_dml_data.py +++ b/doubleml/data/tests/test_dml_data.py @@ -569,6 +569,7 @@ def test_dml_data_w_missings(generate_data_irm_w_missings): assert dml_data.force_all_x_finite == "allow-nan" +@pytest.mark.ci def test_dml_data_w_missing_d(generate_data1): data = generate_data1 np.random.seed(3141) diff --git a/doubleml/did/datasets/dgp_did_CS2021.py b/doubleml/did/datasets/dgp_did_CS2021.py index 50336cdb..1756d085 100644 --- a/doubleml/did/datasets/dgp_did_CS2021.py +++ b/doubleml/did/datasets/dgp_did_CS2021.py @@ -105,11 +105,35 @@ def make_did_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, time_typ 6. Treatment assignment: - For non-experimental settings (DGP 1-4), the probability of being in treatment group :math:`g` is: + For non-experimental settings (DGP 1-4), the probability of being in treatment group :math:`g` is computed as follows: - .. math:: + - Compute group-specific logits for each observation: + + .. math:: + + \\text{logit}_{i,g} = f_{ps,g}(W_{ps}) + + The logits are clipped to the range [-2.5, 2.5] for numerical stability. + + - Convert logits to uncapped probabilities via softmax: + + .. math:: + + p^{\\text{uncapped}}_{i,g} = \\frac{\\exp(\\text{logit}_{i,g})}{\\sum_{g'} \\exp(\\text{logit}_{i,g'})} + + - Clip uncapped probabilities to the range [0.05, 0.95]: + + .. math:: + + p^{\\text{clipped}}_{i,g} = \\min(\\max(p^{\\text{uncapped}}_{i,g}, 0.05), 0.95) + + - Renormalize clipped probabilities so they sum to 1 for each observation: + + .. math:: + + p_{i,g} = \\frac{p^{\text{clipped}}_{i,g}}{\\sum_{g'} p^{\\text{clipped}}_{i,g'}} - P(G_i = g) = \\frac{\\exp(f_{ps,g}(W_{ps}))}{\\sum_{g'} \\exp(f_{ps,g'}(W_{ps}))} + - Assign each observation to a treatment group by sampling from the categorical distribution defined by :math:`p_{i,g}`. For experimental settings (DGP 5-6), each treatment group (including never-treated) has equal probability: @@ -159,7 +183,7 @@ def make_did_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, time_typ `dim_x` (int, default=4): Dimension of feature vectors. - `xi` (float, default=0.9): + `xi` (float, default=0.5): Scale parameter for the propensity score function. `n_periods` (int, default=5): @@ -188,7 +212,7 @@ def make_did_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, time_typ c = kwargs.get("c", 0.0) dim_x = kwargs.get("dim_x", 4) - xi = kwargs.get("xi", 0.9) + xi = kwargs.get("xi", 0.75) n_periods = kwargs.get("n_periods", 5) anticipation_periods = kwargs.get("anticipation_periods", 0) n_pre_treat_periods = kwargs.get("n_pre_treat_periods", 2) @@ -228,8 +252,11 @@ def make_did_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, time_typ p = np.ones(n_treatment_groups) / n_treatment_groups d_index = np.random.choice(n_treatment_groups, size=n_obs, p=p) else: - unnormalized_p = np.exp(_f_ps_groups(features_ps, xi, n_groups=n_treatment_groups)) - p = unnormalized_p / unnormalized_p.sum(1, keepdims=True) + logits = np.clip(_f_ps_groups(features_ps, xi, n_groups=n_treatment_groups), a_min=-2.5, a_max=2.5) + unnormalized_p = np.exp(logits) + p_uncapped = unnormalized_p / unnormalized_p.sum(1, keepdims=True) + p_clipped = np.clip(p_uncapped, a_min=0.05, a_max=0.95) + p = p_clipped / p_clipped.sum(1, keepdims=True) d_index = np.array([np.random.choice(n_treatment_groups, p=p_row) for p_row in p]) # fixed effects (shape (n_obs, n_time_periods)) diff --git a/doubleml/did/datasets/dgp_did_cs_CS2021.py b/doubleml/did/datasets/dgp_did_cs_CS2021.py index 08021270..2fb044eb 100644 --- a/doubleml/did/datasets/dgp_did_cs_CS2021.py +++ b/doubleml/did/datasets/dgp_did_cs_CS2021.py @@ -85,11 +85,35 @@ def make_did_cs_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, lambd 6. Treatment assignment: - For non-experimental settings (DGP 1-4), the probability of being in treatment group :math:`g` is: + For non-experimental settings (DGP 1-4), the probability of being in treatment group :math:`g` is computed as follows: - .. math:: + - Compute group-specific logits for each observation: + + .. math:: + + \\text{logit}_{i,g} = f_{ps,g}(W_{ps}) + + The logits are clipped to the range [-2.5, 2.5] for numerical stability. + + - Convert logits to uncapped probabilities via softmax: + + .. math:: + + p^{\\text{uncapped}}_{i,g} = \\frac{\\exp(\\text{logit}_{i,g})}{\\sum_{g'} \\exp(\\text{logit}_{i,g'})} + + - Clip uncapped probabilities to the range [0.05, 0.95]: + + .. math:: + + p^{\\text{clipped}}_{i,g} = \\min(\\max(p^{\\text{uncapped}}_{i,g}, 0.05), 0.95) + + - Renormalize clipped probabilities so they sum to 1 for each observation: + + .. math:: + + p_{i,g} = \\frac{p^{\text{clipped}}_{i,g}}{\\sum_{g'} p^{\\text{clipped}}_{i,g'}} - P(G_i = g) = \\frac{\\exp(f_{ps,g}(W_{ps}))}{\\sum_{g'} \\exp(f_{ps,g'}(W_{ps}))} + - Assign each observation to a treatment group by sampling from the categorical distribution defined by :math:`p_{i,g}`. For experimental settings (DGP 5-6), each treatment group (including never-treated) has equal probability: @@ -148,7 +172,7 @@ def make_did_cs_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, lambd `dim_x` (int, default=4): Dimension of feature vectors. - `xi` (float, default=0.9): + `xi` (float, default=0.5): Scale parameter for the propensity score function. `n_periods` (int, default=5): diff --git a/doubleml/did/did.py b/doubleml/did/did.py index 5e86d52e..378aa644 100644 --- a/doubleml/did/did.py +++ b/doubleml/did/did.py @@ -9,6 +9,7 @@ from doubleml.double_ml_score_mixins import LinearScoreMixin from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls +from doubleml.utils._tune_optuna import _dml_tune_optuna # TODO: Remove DoubleMLDIDData with version 0.12.0 @@ -427,6 +428,83 @@ def _nuisance_tuning( return res + def _nuisance_tuning_optuna( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + """ + Optuna-based hyperparameter tuning for DID nuisance models. + + Performs tuning once on the whole dataset using cross-validation, + returning the same optimal parameters for all folds. + """ + + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + + if scoring_methods is None: + if self.score == "observational": + scoring_methods = {"ml_g0": None, "ml_g1": None, "ml_m": None} + else: + scoring_methods = {"ml_g0": None, "ml_g1": None} + + # Separate data by treatment status for conditional mean tuning + mask_d0 = d == 0 + mask_d1 = d == 1 + + x_d0 = x[mask_d0, :] + y_d0 = y[mask_d0] + g0_tune_res = _dml_tune_optuna( + y_d0, + x_d0, + self._learner["ml_g"], + optuna_params["ml_g0"], + scoring_methods["ml_g0"], + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g0", + ) + + x_d1 = x[mask_d1, :] + y_d1 = y[mask_d1] + g1_tune_res = _dml_tune_optuna( + y_d1, + x_d1, + self._learner["ml_g"], + optuna_params["ml_g1"], + scoring_methods["ml_g1"], + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g1", + ) + + # Tune propensity score on full dataset for observational score + m_tune_res = None + if self.score == "observational": + m_tune_res = _dml_tune_optuna( + d, + x, + self._learner["ml_m"], + optuna_params["ml_m"], + scoring_methods["ml_m"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m", + ) + + if self.score == "observational": + results = {"ml_g0": g0_tune_res, "ml_g1": g1_tune_res, "ml_m": m_tune_res} + else: + results = {"ml_g0": g0_tune_res, "ml_g1": g1_tune_res} + + return results + def sensitivity_benchmark(self, benchmarking_set, fit_args=None): """ Computes a benchmark for a given set of features. diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py index d92ebf19..a84304ab 100644 --- a/doubleml/did/did_binary.py +++ b/doubleml/did/did_binary.py @@ -23,6 +23,7 @@ _check_score, ) from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls +from doubleml.utils._tune_optuna import _dml_tune_optuna from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor @@ -666,6 +667,79 @@ def _nuisance_tuning( return res + def _nuisance_tuning_optuna( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + + x, y = check_X_y(self._x_data_subset, self._y_data_subset, ensure_all_finite=False) + x, d = check_X_y(x, self._g_data_subset, ensure_all_finite=False) + + if scoring_methods is None: + if self.score == "observational": + scoring_methods = {"ml_g0": None, "ml_g1": None, "ml_m": None} + else: + scoring_methods = {"ml_g0": None, "ml_g1": None} + + mask_d0 = d == 0 + mask_d1 = d == 1 + + x_d0 = x[mask_d0, :] + y_d0 = y[mask_d0] + g0_param_grid = optuna_params["ml_g0"] + g0_scoring = scoring_methods["ml_g0"] + g0_tune_res = _dml_tune_optuna( + y_d0, + x_d0, + self._learner["ml_g"], + g0_param_grid, + g0_scoring, + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g0", + ) + + x_d1 = x[mask_d1, :] + y_d1 = y[mask_d1] + g1_param_grid = optuna_params["ml_g1"] + g1_scoring = scoring_methods["ml_g1"] + g1_tune_res = _dml_tune_optuna( + y_d1, + x_d1, + self._learner["ml_g"], + g1_param_grid, + g1_scoring, + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g1", + ) + + m_tune_res = None + if self.score == "observational": + m_tune_res = _dml_tune_optuna( + d, + x, + self._learner["ml_m"], + optuna_params["ml_m"], + scoring_methods["ml_m"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m", + ) + + if self.score == "observational": + results = {"ml_g0": g0_tune_res, "ml_g1": g1_tune_res, "ml_m": m_tune_res} + else: + results = {"ml_g0": g0_tune_res, "ml_g1": g1_tune_res} + + return results + def _sensitivity_element_est(self, preds): y = self._y_data_subset d = self._g_data_subset diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py index f0454b2b..609b313c 100644 --- a/doubleml/did/did_cs.py +++ b/doubleml/did/did_cs.py @@ -9,6 +9,7 @@ from doubleml.double_ml_score_mixins import LinearScoreMixin from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d +from doubleml.utils._tune_optuna import _dml_tune_optuna # TODO: Remove DoubleMLDIDData with version 0.12.0 @@ -660,6 +661,82 @@ def _nuisance_tuning( return res + def _nuisance_tuning_optuna( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + x, t = check_X_y(x, self._dml_data.t, ensure_all_finite=False) + + if scoring_methods is None: + if self.score == "observational": + scoring_methods = { + "ml_g_d0_t0": None, + "ml_g_d0_t1": None, + "ml_g_d1_t0": None, + "ml_g_d1_t1": None, + "ml_m": None, + } + else: + scoring_methods = { + "ml_g_d0_t0": None, + "ml_g_d0_t1": None, + "ml_g_d1_t0": None, + "ml_g_d1_t1": None, + } + + masks = { + "d0_t0": (d == 0) & (t == 0), + "d0_t1": (d == 0) & (t == 1), + "d1_t0": (d == 1) & (t == 0), + "d1_t1": (d == 1) & (t == 1), + } + + g_tune_results = {} + for key, mask in masks.items(): + x_subset = x[mask, :] + y_subset = y[mask] + params_key = f"ml_g_{key}" + param_grid = optuna_params[params_key] + scoring = scoring_methods[params_key] + g_tune_results[key] = _dml_tune_optuna( + y_subset, + x_subset, + self._learner["ml_g"], + param_grid, + scoring, + cv, + optuna_settings, + learner_name="ml_g", + params_name=params_key, + ) + + m_tune_res = None + if self.score == "observational": + m_tune_res = _dml_tune_optuna( + d, + x, + self._learner["ml_m"], + optuna_params["ml_m"], + scoring_methods["ml_m"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m", + ) + + results = {f"ml_g_{key}": res_obj for key, res_obj in g_tune_results.items()} + + if self.score == "observational": + results["ml_m"] = m_tune_res + + return results + def sensitivity_benchmark(self, benchmarking_set, fit_args=None): """ Computes a benchmark for a given set of features. diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py index c547ff40..a2af841b 100644 --- a/doubleml/did/did_cs_binary.py +++ b/doubleml/did/did_cs_binary.py @@ -23,6 +23,7 @@ _check_score, ) from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d +from doubleml.utils._tune_optuna import _dml_tune_optuna from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor @@ -765,6 +766,82 @@ def _nuisance_tuning( return res + def _nuisance_tuning_optuna( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + + x, y = check_X_y(self._x_data_subset, self._y_data_subset, ensure_all_finite=False) + _, d = check_X_y(x, self._g_data_subset, ensure_all_finite=False) + _, t = check_X_y(x, self._t_data_subset, ensure_all_finite=False) + + if scoring_methods is None: + if self.score == "observational": + scoring_methods = { + "ml_g_d0_t0": None, + "ml_g_d0_t1": None, + "ml_g_d1_t0": None, + "ml_g_d1_t1": None, + "ml_m": None, + } + else: + scoring_methods = { + "ml_g_d0_t0": None, + "ml_g_d0_t1": None, + "ml_g_d1_t0": None, + "ml_g_d1_t1": None, + } + + masks = { + "d0_t0": (d == 0) & (t == 0), + "d0_t1": (d == 0) & (t == 1), + "d1_t0": (d == 1) & (t == 0), + "d1_t1": (d == 1) & (t == 1), + } + + g_tune_results = {} + for key, mask in masks.items(): + x_subset = x[mask, :] + y_subset = y[mask] + params_key = f"ml_g_{key}" + param_grid = optuna_params[params_key] + scoring = scoring_methods[params_key] + g_tune_results[key] = _dml_tune_optuna( + y_subset, + x_subset, + self._learner["ml_g"], + param_grid, + scoring, + cv, + optuna_settings, + learner_name="ml_g", + params_name=params_key, + ) + + m_tune_res = None + if self.score == "observational": + m_tune_res = _dml_tune_optuna( + d, + x, + self._learner["ml_m"], + optuna_params["ml_m"], + scoring_methods["ml_m"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m", + ) + + results = {f"ml_g_{key}": res_obj for key, res_obj in g_tune_results.items()} + + if self.score == "observational": + results["ml_m"] = m_tune_res + + return results + def _sensitivity_element_est(self, preds): y = self._y_data_subset d = self._g_data_subset diff --git a/doubleml/did/did_multi.py b/doubleml/did/did_multi.py index ba00ea13..54c242d1 100644 --- a/doubleml/did/did_multi.py +++ b/doubleml/did/did_multi.py @@ -36,6 +36,7 @@ from doubleml.double_ml_framework import concat from doubleml.utils._checks import _check_bool, _check_score from doubleml.utils._descriptive import generate_summary +from doubleml.utils._tune_optuna import TUNE_ML_MODELS_DOC from doubleml.utils.gain_statistics import gain_statistics from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor @@ -734,6 +735,37 @@ def p_adjust(self, method="romano-wolf"): return p_val + def tune_ml_models( + self, + ml_param_space, + scoring_methods=None, + cv=5, + set_as_params=True, + return_tune_res=False, + optuna_settings=None, + ): + """Hyperparameter-tuning for DoubleML models using Optuna.""" + + tuning_kwargs = { + "ml_param_space": ml_param_space, + "scoring_methods": scoring_methods, + "cv": cv, + "set_as_params": set_as_params, + "return_tune_res": return_tune_res, + "optuna_settings": optuna_settings, + } + + tune_res = [] if return_tune_res else None + + for model in self.modellist: + res = model.tune_ml_models(**tuning_kwargs) + if return_tune_res: + tune_res.append(res[0]) + + return tune_res if return_tune_res else self + + tune_ml_models.__doc__ = TUNE_ML_MODELS_DOC + def bootstrap(self, method="normal", n_rep_boot=500): """ Multiplier bootstrap for DoubleML models. diff --git a/doubleml/did/tests/test_did_binary_control_groups.py b/doubleml/did/tests/test_did_binary_control_groups.py index 627cf50a..ca493435 100644 --- a/doubleml/did/tests/test_did_binary_control_groups.py +++ b/doubleml/did/tests/test_did_binary_control_groups.py @@ -1,3 +1,4 @@ +import pytest from sklearn.linear_model import LinearRegression, LogisticRegression import doubleml as dml @@ -17,6 +18,7 @@ } +@pytest.mark.ci def test_control_groups_different(): dml_did_never_treated = dml.did.DoubleMLDIDBinary(control_group="never_treated", **args) dml_did_not_yet_treated = dml.did.DoubleMLDIDBinary(control_group="not_yet_treated", **args) diff --git a/doubleml/did/tests/test_did_binary_tune_ml_models.py b/doubleml/did/tests/test_did_binary_tune_ml_models.py new file mode 100644 index 00000000..e69b4ae6 --- /dev/null +++ b/doubleml/did/tests/test_did_binary_tune_ml_models.py @@ -0,0 +1,72 @@ +import numpy as np +import pytest +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor + +from doubleml.data import DoubleMLPanelData +from doubleml.did import DoubleMLDIDBinary +from doubleml.did.datasets import make_did_CS2021 +from doubleml.tests._utils_tune_optuna import ( + _SAMPLER_CASES, + _assert_tree_params, + _basic_optuna_settings, + _build_param_space, + _select_binary_periods, + _small_tree_params, +) + + +@pytest.mark.ci +@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES]) +@pytest.mark.parametrize("score", ["observational", "experimental"]) +def test_doubleml_did_binary_optuna_tune(sampler_name, optuna_sampler, score): + np.random.seed(3152) + df_panel = make_did_CS2021( + n_obs=500, + dgp_type=4, + include_never_treated=True, + time_type="float", + n_periods=4, + n_pre_treat_periods=2, + ) + panel_data = DoubleMLPanelData( + df_panel, + y_col="y", + d_cols="d", + id_col="id", + t_col="t", + x_cols=["Z1", "Z2", "Z3", "Z4"], + ) + + g_value, t_value_pre, t_value_eval = _select_binary_periods(panel_data) + + ml_g = DecisionTreeRegressor(random_state=321, max_depth=1) # underfit + ml_m = DecisionTreeClassifier(random_state=654) + dml_did_binary = DoubleMLDIDBinary( + obj_dml_data=panel_data, + g_value=g_value, + t_value_pre=t_value_pre, + t_value_eval=t_value_eval, + ml_g=ml_g, + ml_m=ml_m, + score=score, + n_folds=5, + ) + dml_did_binary.fit() + untuned_score = dml_did_binary.evaluate_learners() + + optuna_params = _build_param_space(dml_did_binary, _small_tree_params) + + optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler}) + tune_res = dml_did_binary.tune_ml_models( + ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True + ) + + dml_did_binary.fit() + tuned_score = dml_did_binary.evaluate_learners() + + for learner_name in dml_did_binary.params_names: + tuned_params = tune_res[0][learner_name].best_params + _assert_tree_params(tuned_params) + + # ensure tuning improved RMSE + assert tuned_score[learner_name] < untuned_score[learner_name] diff --git a/doubleml/did/tests/test_did_cs_binary_control_groups.py b/doubleml/did/tests/test_did_cs_binary_control_groups.py index ea4f2933..5c3202af 100644 --- a/doubleml/did/tests/test_did_cs_binary_control_groups.py +++ b/doubleml/did/tests/test_did_cs_binary_control_groups.py @@ -1,3 +1,4 @@ +import pytest from sklearn.linear_model import LinearRegression, LogisticRegression import doubleml as dml @@ -17,6 +18,7 @@ } +@pytest.mark.ci def test_control_groups_different(): dml_did_never_treated = dml.did.DoubleMLDIDCSBinary(control_group="never_treated", **args) dml_did_not_yet_treated = dml.did.DoubleMLDIDCSBinary(control_group="not_yet_treated", **args) diff --git a/doubleml/did/tests/test_did_cs_binary_tune_ml_models.py b/doubleml/did/tests/test_did_cs_binary_tune_ml_models.py new file mode 100644 index 00000000..06398135 --- /dev/null +++ b/doubleml/did/tests/test_did_cs_binary_tune_ml_models.py @@ -0,0 +1,71 @@ +import numpy as np +import pytest +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor + +from doubleml.data import DoubleMLPanelData +from doubleml.did import DoubleMLDIDCSBinary +from doubleml.did.datasets import make_did_cs_CS2021 +from doubleml.tests._utils_tune_optuna import ( + _SAMPLER_CASES, + _assert_tree_params, + _basic_optuna_settings, + _build_param_space, + _select_binary_periods, + _small_tree_params, +) + + +@pytest.mark.ci +@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES]) +@pytest.mark.parametrize("score", ["observational", "experimental"]) +def test_doubleml_did_cs_binary_optuna_tune(sampler_name, optuna_sampler, score): + np.random.seed(3153) + df_panel = make_did_cs_CS2021( + n_obs=500, + dgp_type=4, + include_never_treated=True, + time_type="float", + ) + panel_data = DoubleMLPanelData( + df_panel, + y_col="y", + d_cols="d", + id_col="id", + t_col="t", + x_cols=["Z1", "Z2", "Z3", "Z4"], + ) + print(df_panel.head()) + g_value, t_value_pre, t_value_eval = _select_binary_periods(panel_data) + + ml_g = DecisionTreeRegressor(random_state=321, max_depth=1) # underfit + ml_m = DecisionTreeClassifier(random_state=654) + + dml_did_cs_binary = DoubleMLDIDCSBinary( + obj_dml_data=panel_data, + g_value=g_value, + t_value_pre=t_value_pre, + t_value_eval=t_value_eval, + ml_g=ml_g, + ml_m=ml_m, + score=score, + n_folds=5, + ) + dml_did_cs_binary.fit() + untuned_score = dml_did_cs_binary.evaluate_learners() + + optuna_params = _build_param_space(dml_did_cs_binary, _small_tree_params) + + optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler}) + tune_res = dml_did_cs_binary.tune_ml_models( + ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True + ) + + dml_did_cs_binary.fit() + tuned_score = dml_did_cs_binary.evaluate_learners() + + for learner_name in dml_did_cs_binary.params_names: + tuned_params = tune_res[0][learner_name].best_params + _assert_tree_params(tuned_params) + + # ensure tuning improved RMSE or LogLoss + assert tuned_score[learner_name] < untuned_score[learner_name] diff --git a/doubleml/did/tests/test_did_cs_tune_ml_models.py b/doubleml/did/tests/test_did_cs_tune_ml_models.py new file mode 100644 index 00000000..0a44d412 --- /dev/null +++ b/doubleml/did/tests/test_did_cs_tune_ml_models.py @@ -0,0 +1,50 @@ +import numpy as np +import pytest +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor + +import doubleml as dml +from doubleml.did.datasets import make_did_SZ2020 +from doubleml.tests._utils_tune_optuna import ( + _SAMPLER_CASES, + _assert_tree_params, + _basic_optuna_settings, + _build_param_space, + _small_tree_params, +) + + +@pytest.mark.ci +@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES]) +@pytest.mark.parametrize("score", ["observational", "experimental"]) +def test_doubleml_did_cs_optuna_tune(sampler_name, optuna_sampler, score): + np.random.seed(3151) + dml_data = make_did_SZ2020( + n_obs=500, + dgp_type=4, + cross_sectional_data=True, + return_type="DoubleMLDIDData", + ) + + ml_g = DecisionTreeRegressor(random_state=321, max_depth=1) # underfit + if score == "observational": + ml_m = DecisionTreeClassifier(random_state=654) + dml_did_cs = dml.DoubleMLDIDCS(dml_data, ml_g, ml_m, score=score, n_folds=5) + else: + dml_did_cs = dml.DoubleMLDIDCS(dml_data, ml_g, score=score, n_folds=5) + dml_did_cs.fit() + untuned_score = dml_did_cs.evaluate_learners() + + optuna_params = _build_param_space(dml_did_cs, _small_tree_params) + + optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler}) + tune_res = dml_did_cs.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True) + + dml_did_cs.fit() + tuned_score = dml_did_cs.evaluate_learners() + + for learner_name in dml_did_cs.params_names: + tuned_params = tune_res[0][learner_name].best_params + _assert_tree_params(tuned_params) + + # ensure tuning improved RMSE + assert tuned_score[learner_name] < untuned_score[learner_name] diff --git a/doubleml/did/tests/test_did_tune_ml_models.py b/doubleml/did/tests/test_did_tune_ml_models.py new file mode 100644 index 00000000..b0ba76cb --- /dev/null +++ b/doubleml/did/tests/test_did_tune_ml_models.py @@ -0,0 +1,47 @@ +import numpy as np +import pytest +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor + +import doubleml as dml +from doubleml.did.datasets import make_did_SZ2020 +from doubleml.tests._utils_tune_optuna import ( + _SAMPLER_CASES, + _assert_tree_params, + _basic_optuna_settings, + _build_param_space, + _small_tree_params, +) + + +@pytest.mark.ci +@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES]) +@pytest.mark.parametrize("score", ["observational", "experimental"]) +def test_doubleml_did_optuna_tune(sampler_name, optuna_sampler, score): + """Test DID with ml_g0, ml_g1 (and ml_m for observational score) nuisance models.""" + + np.random.seed(3150) + dml_data = make_did_SZ2020(n_obs=500, dgp_type=4, return_type="DoubleMLDIDData") + + ml_g = DecisionTreeRegressor(random_state=321, max_depth=1) # underfit + if score == "observational": + ml_m = DecisionTreeClassifier(random_state=654) + dml_did = dml.DoubleMLDID(dml_data, ml_g, ml_m, score=score, n_folds=5) + else: + dml_did = dml.DoubleMLDID(dml_data, ml_g, score=score, n_folds=5) + dml_did.fit() + untuned_score = dml_did.evaluate_learners() + + optuna_params = _build_param_space(dml_did, _small_tree_params) + + optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler}) + tune_res = dml_did.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True) + + dml_did.fit() + tuned_score = dml_did.evaluate_learners() + + for learner_name in dml_did.params_names: + tuned_params = tune_res[0][learner_name].best_params + _assert_tree_params(tuned_params) + + # ensure tuning improved RMSE + assert tuned_score[learner_name] < untuned_score[learner_name] diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py index 6293731a..eb119615 100644 --- a/doubleml/double_ml.py +++ b/doubleml/double_ml.py @@ -14,6 +14,7 @@ from doubleml.utils._checks import _check_external_predictions from doubleml.utils._estimation import _aggregate_coefs_and_ses, _rmse, _set_external_predictions, _var_est from doubleml.utils._sensitivity import _compute_sensitivity_bias +from doubleml.utils._tune_optuna import OPTUNA_GLOBAL_SETTING_KEYS, TUNE_ML_MODELS_DOC, resolve_optuna_cv from doubleml.utils.gain_statistics import gain_statistics _implemented_data_backends = ["DoubleMLData", "DoubleMLClusterData", "DoubleMLDIDData", "DoubleMLSSMData", "DoubleMLRDDData"] @@ -765,6 +766,11 @@ def tune( """ Hyperparameter-tuning for DoubleML models. + .. deprecated:: 0.13.0 + The ``tune`` method using grid/randomized search is maintained for backward compatibility. + For more efficient hyperparameter optimization, use :meth:`tune_ml_models` with Optuna, + which provides Bayesian optimization and better performance. + The hyperparameter-tuning is performed using either an exhaustive search over specified parameter values implemented in :class:`sklearn.model_selection.GridSearchCV` or via a randomized search implemented in :class:`sklearn.model_selection.RandomizedSearchCV`. @@ -818,6 +824,16 @@ def tune( A list containing detailed tuning results and the proposed hyperparameters. Returned if ``return_tune_res`` is ``True``. """ + # Deprecation warning for the tune method + warnings.warn( + "The 'tune' method using grid search or randomized search is maintained for backward compatibility. " + "It will be removed in future versions. " + "For more advanced hyperparameter optimization, consider using 'tune_ml_models' with Optuna, " + "which offers Bayesian optimization and is generally more efficient. " + "See the documentation for 'tune_ml_models' for usage examples.", + FutureWarning, + stacklevel=2, + ) if (not isinstance(param_grids, dict)) | (not all(k in param_grids for k in self.learner_names)): raise ValueError( @@ -934,6 +950,164 @@ def tune( else: return self + def tune_ml_models( + self, + ml_param_space, + scoring_methods=None, + cv=5, + set_as_params=True, + return_tune_res=False, + optuna_settings=None, + ): + """Hyperparameter-tuning for DoubleML models using Optuna.""" + + # Validation + + expanded_param_space = self._validate_optuna_param_space(ml_param_space) + scoring_methods = self._resolve_scoring_methods(scoring_methods) + cv_splitter = resolve_optuna_cv(cv) + self._validate_optuna_setting_keys(optuna_settings) + + if not isinstance(set_as_params, bool): + raise TypeError(f"set_as_params must be True or False. Got {str(set_as_params)}.") + + if not isinstance(return_tune_res, bool): + raise TypeError(f"return_tune_res must be True or False. Got {str(return_tune_res)}.") + + # Optuna tuning is always global (not fold-specific) + tuning_res = [None] * self._dml_data.n_treat + + for i_d in range(self._dml_data.n_treat): + self._i_treat = i_d + # this step could be skipped for the single treatment variable case + if self._dml_data.n_treat > 1: + self._dml_data.set_x_d(self._dml_data.d_cols[i_d]) + + # tune hyperparameters (globally, not fold-specific) + res = self._nuisance_tuning_optuna( + expanded_param_space, + scoring_methods, + cv_splitter, + optuna_settings, + ) + + tuning_res[i_d] = res + if set_as_params: + for nuisance_model, tuned_result in res.items(): + if tuned_result is None: + params_to_set = None + else: + params_to_set = tuned_result.best_params + + self.set_ml_nuisance_params(nuisance_model, self._dml_data.d_cols[i_d], params_to_set) + + return tuning_res if return_tune_res else self + + tune_ml_models.__doc__ = TUNE_ML_MODELS_DOC + + def _resolve_scoring_methods(self, scoring_methods): + """Resolve scoring methods to ensure all learners have an entry.""" + + if scoring_methods is None: + return None + + if not isinstance(scoring_methods, dict): + raise TypeError("scoring_methods must be provided as a dictionary keyed by learner name.") + + invalid_scoring_keys = [key for key in scoring_methods if key not in self.params_names] + if invalid_scoring_keys: + raise ValueError( + "Invalid scoring_methods keys for " + + self.__class__.__name__ + + ": " + + ", ".join(sorted(invalid_scoring_keys)) + + ". Valid keys are: " + + ", ".join(self.params_names) + + "." + ) + + resolved = dict(scoring_methods) + for learner_name in self.params_names: + resolved.setdefault(learner_name, None) + + return resolved + + def _validate_optuna_setting_keys(self, optuna_settings): + """Validate learner-level keys provided in optuna_settings.""" + + if optuna_settings is not None and not isinstance(optuna_settings, dict): + raise TypeError(f"optuna_settings must be a dict or None. Got {str(type(optuna_settings))}.") + + if not optuna_settings: + return + + allowed_learner_keys = set(self.params_names) | set(self.learner_names) + invalid_keys = [ + key for key in optuna_settings if key not in OPTUNA_GLOBAL_SETTING_KEYS and key not in allowed_learner_keys + ] + + if invalid_keys: + if allowed_learner_keys: + valid_keys_msg = ", ".join(sorted(allowed_learner_keys)) + else: + valid_keys_msg = "" + raise ValueError( + "Invalid optuna_settings keys for " + + self.__class__.__name__ + + ": " + + ", ".join(sorted(invalid_keys)) + + ". Valid learner-specific keys are: " + + valid_keys_msg + + "." + ) + + for key in allowed_learner_keys: + if key in optuna_settings and not isinstance(optuna_settings[key], dict): + raise TypeError(f"Optuna settings for '{key}' must be a dict.") + + def _validate_optuna_param_space(self, ml_param_space): + """Validate learner keys provided in the Optuna parameter space dictionary.""" + + if not isinstance(ml_param_space, dict) or not ml_param_space: + raise ValueError("ml_param_space must be a non-empty dictionary.") + + allowed_param_keys = set(self.params_names) | set(self.learner_names) + invalid_keys = [key for key in ml_param_space if key not in allowed_param_keys] + + if invalid_keys: + valid_keys_msg = ", ".join(sorted(allowed_param_keys)) if allowed_param_keys else "" + raise ValueError( + "Invalid ml_param_space keys for " + + self.__class__.__name__ + + ": " + + ", ".join(sorted(invalid_keys)) + + ". Valid keys are: " + + valid_keys_msg + + "." + ) + final_param_space = {k: None for k in self.params_names} + + # Validate that all parameter spaces are callables + for learner_name, param_fn in ml_param_space.items(): + if param_fn is None: + continue + if not callable(param_fn): + raise TypeError( + f"Parameter space for '{learner_name}' must be a callable function that takes a trial " + f"and returns a dict. Got {type(param_fn).__name__}. " + f"Example: def ml_params(trial): return {{'lr': trial.suggest_float('lr', 0.01, 0.1)}}" + ) + + # Set Hyperparameter spaces for learners (global / learner_name level) + for learner_name in [ln for ln in self.learner_names if ln in ml_param_space.keys()]: + for param_key in [pk for pk in self.params_names if learner_name in pk]: + final_param_space[param_key] = ml_param_space[learner_name] + # Override if param_name specific space is provided + for param_key in [pk for pk in self.params_names if pk in ml_param_space.keys()]: + final_param_space[param_key] = ml_param_space[param_key] + + return final_param_space + def set_ml_nuisance_params(self, learner, treat_var, params): """ Set hyperparameters for the nuisance models of DoubleML models. @@ -1011,10 +1185,32 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models, external_predictions): @abstractmethod def _nuisance_tuning( - self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search + self, + smpls, + param_grids, + scoring_methods, + n_folds_tune, + n_jobs_cv, + search_mode, + n_iter_randomized_search, ): pass + @abstractmethod + def _nuisance_tuning_optuna( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + """ + Optuna-based hyperparameter tuning hook. + + Subclasses should override this method to provide Optuna tuning support. + """ + raise NotImplementedError(f"Optuna tuning not implemented for {self.__class__.__name__}.") + @staticmethod def _check_learner(learner, learner_name, regressor, classifier): err_msg_prefix = f"Invalid learner provided for {learner_name}: " @@ -1246,8 +1442,8 @@ def evaluate_learners(self, learners=None, metric=_rmse): >>> from doubleml.irm.datasets import make_irm_data >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier >>> np.random.seed(3141) - >>> ml_g = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2) - >>> ml_m = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2) + >>> ml_g = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2, random_state=42) + >>> ml_m = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2, random_state=42) >>> data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type='DataFrame') >>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd') >>> dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m) diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py index 7441d112..9248c88d 100644 --- a/doubleml/irm/apo.py +++ b/doubleml/irm/apo.py @@ -15,6 +15,7 @@ ) from doubleml.utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls from doubleml.utils._propensity_score import _propensity_score_adjustment +from doubleml.utils._tune_optuna import _dml_tune_optuna from doubleml.utils.blp import DoubleMLBLP from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor @@ -457,6 +458,75 @@ def _nuisance_tuning( return res + def _nuisance_tuning_optuna( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + dx = np.column_stack((d, x)) + treated_indicator = self.treated.astype(bool) + + if scoring_methods is None: + scoring_methods = {"ml_g_d_lvl0": None, "ml_g_d_lvl1": None, "ml_m": None} + + mask_lvl1 = treated_indicator + mask_lvl0 = np.logical_not(mask_lvl1) + + dx_lvl0 = dx[mask_lvl0, :] + y_lvl0 = y[mask_lvl0] + g_lvl0_param_grid = optuna_params["ml_g_d_lvl0"] + g_lvl0_scoring = scoring_methods["ml_g_d_lvl0"] + g_d_lvl0_tune_res = _dml_tune_optuna( + y_lvl0, + dx_lvl0, + self._learner["ml_g"], + g_lvl0_param_grid, + g_lvl0_scoring, + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g_d_lvl0", + ) + + x_lvl1 = x[mask_lvl1, :] + y_lvl1 = y[mask_lvl1] + g_lvl1_param_grid = optuna_params["ml_g_d_lvl1"] + g_lvl1_scoring = scoring_methods["ml_g_d_lvl1"] + g_d_lvl1_tune_res = _dml_tune_optuna( + y_lvl1, + x_lvl1, + self._learner["ml_g"], + g_lvl1_param_grid, + g_lvl1_scoring, + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g_d_lvl1", + ) + + m_tune_res = _dml_tune_optuna( + treated_indicator.astype(float), + x, + self._learner["ml_m"], + optuna_params["ml_m"], + scoring_methods["ml_m"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m", + ) + + return { + "ml_g_d_lvl0": g_d_lvl0_tune_res, + "ml_g_d_lvl1": g_d_lvl1_tune_res, + "ml_m": m_tune_res, + } + def _check_data(self, obj_dml_data): if len(obj_dml_data.d_cols) > 1: raise ValueError( diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py index 23e7085e..848c87a6 100644 --- a/doubleml/irm/apos.py +++ b/doubleml/irm/apos.py @@ -16,12 +16,75 @@ from doubleml.utils._checks import _check_score, _check_weights from doubleml.utils._descriptive import generate_summary from doubleml.utils._sensitivity import _compute_sensitivity_bias +from doubleml.utils._tune_optuna import TUNE_ML_MODELS_DOC from doubleml.utils.gain_statistics import gain_statistics from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor class DoubleMLAPOS(SampleSplittingMixin): - """Double machine learning for interactive regression models with multiple discrete treatments.""" + """Double machine learning for interactive regression models with multiple discrete + treatments. + + Parameters + ---------- + obj_dml_data : :class:`DoubleMLData` object + The :class:`DoubleMLData` object providing the data and specifying the variables for the causal model. + + ml_g : estimator implementing ``fit()`` and ``predict()`` + A machine learner implementing ``fit()`` and ``predict()`` methods (e.g. + :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function :math:`g_0(D, X) = E[Y | X, D]`. + For a binary outcome variable :math:`Y` (with values 0 and 1), a classifier implementing ``fit()`` and + ``predict_proba()`` can also be specified. If :py:func:`sklearn.base.is_classifier` returns ``True``, + ``predict_proba()`` is used otherwise ``predict()``. + + ml_m : classifier implementing ``fit()`` and ``predict_proba()`` + A machine learner implementing ``fit()`` and ``predict_proba()`` methods (e.g. + :py:class:`sklearn.ensemble.RandomForestClassifier`) for the nuisance function :math:`m_0(X) = E[D | X]`. + + treatment_levels : iterable of int or float + The treatment levels for which average potential outcomes are evaluated. Each element must be present in the + treatment variable ``d`` of ``obj_dml_data``. + + n_folds : int + Number of folds. + Default is ``5``. + + n_rep : int + Number of repetitions for the sample splitting. + Default is ``1``. + + score : str + A str (``'APO'``) specifying the score function. + Default is ``'APO'``. + + weights : array, dict or None + A numpy array of weights for each individual observation. If ``None``, then the ``'APO'`` score + is applied (corresponds to weights equal to 1). + An array has to be of shape ``(n,)``, where ``n`` is the number of observations. + A dictionary can be used to specify weights which depend on the treatment variable. + In this case, the dictionary has to contain two keys ``weights`` and ``weights_bar``, where the values + have to be arrays of shape ``(n,)`` and ``(n, n_rep)``. + Default is ``None``. + + normalize_ipw : bool + Indicates whether the inverse probability weights are normalized. + Default is ``False``. + + trimming_rule : str, optional, deprecated + (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach. + Use ``ps_processor_config`` instead. Will be removed in a future version. + + trimming_threshold : float, optional, deprecated + (DEPRECATED) The threshold used for trimming. + Use ``ps_processor_config`` instead. Will be removed in a future version. + + ps_processor_config : PSProcessorConfig, optional + Configuration for propensity score processing (clipping, calibration, etc.). + + draw_sample_splitting : bool + Indicates whether the sample splitting should be drawn during initialization of the object. + Default is ``True``. + """ def __init__( self, @@ -863,3 +926,32 @@ def _initialize_models(self): modellist[i_level] = model return modellist + + def tune_ml_models( + self, + ml_param_space, + scoring_methods=None, + cv=5, + set_as_params=True, + return_tune_res=False, + optuna_settings=None, + ): + """Hyperparameter-tuning for DoubleML models using Optuna.""" + + tuning_kwargs = { + "ml_param_space": ml_param_space, + "scoring_methods": scoring_methods, + "cv": cv, + "set_as_params": set_as_params, + "return_tune_res": return_tune_res, + "optuna_settings": optuna_settings, + } + + tune_res = [] if return_tune_res else None + for model in self._modellist: + res = model.tune_ml_models(**tuning_kwargs) + if return_tune_res: + tune_res.append(res[0]) + return tune_res if return_tune_res else self + + tune_ml_models.__doc__ = TUNE_ML_MODELS_DOC diff --git a/doubleml/irm/cvar.py b/doubleml/irm/cvar.py index 6c698293..cd32756e 100644 --- a/doubleml/irm/cvar.py +++ b/doubleml/irm/cvar.py @@ -25,6 +25,7 @@ _solve_ipw_score, ) from doubleml.utils._propensity_score import _normalize_ipw +from doubleml.utils._tune_optuna import _dml_tune_optuna from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor @@ -413,6 +414,54 @@ def _nuisance_tuning( return res + def _nuisance_tuning_optuna( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + + if scoring_methods is None: + scoring_methods = {"ml_g": None, "ml_m": None} + + mask_treat = d == self.treatment + + quantile_approx = np.quantile(y[mask_treat], self.quantile) + g_target_1 = np.full_like(y, quantile_approx, dtype=float) + g_target_2 = (y - self.quantile * quantile_approx) / (1 - self.quantile) + g_target_approx = np.maximum(g_target_1, g_target_2) + + x_treat = x[mask_treat, :] + target_treat = g_target_approx[mask_treat] + g_tune_res = _dml_tune_optuna( + target_treat, + x_treat, + self._learner["ml_g"], + optuna_params["ml_g"], + scoring_methods["ml_g"], + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g", + ) + + m_tune_res = _dml_tune_optuna( + d, + x, + self._learner["ml_m"], + optuna_params["ml_m"], + scoring_methods["ml_m"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m", + ) + return {"ml_g": g_tune_res, "ml_m": m_tune_res} + def _check_data(self, obj_dml_data): if not isinstance(obj_dml_data, DoubleMLData): raise TypeError( diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py index 50513c0f..4f0c59e9 100644 --- a/doubleml/irm/iivm.py +++ b/doubleml/irm/iivm.py @@ -17,6 +17,7 @@ ) from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls, _solve_quadratic_inequality from doubleml.utils._propensity_score import _normalize_ipw +from doubleml.utils._tune_optuna import _dml_tune_optuna from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor @@ -593,6 +594,112 @@ def _nuisance_tuning( return res + def _nuisance_tuning_optuna( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + """ + Optuna-based hyperparameter tuning for IIVM nuisance models. + + Performs tuning once on the whole dataset using cross-validation, + returning the same optimal parameters for all folds. + """ + + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, z = check_X_y(x, np.ravel(self._dml_data.z), ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + + if scoring_methods is None: + scoring_methods = {"ml_g0": None, "ml_g1": None, "ml_m": None, "ml_r0": None, "ml_r1": None} + + # Separate data by instrument status for conditional mean tuning + mask_z0 = z == 0 + mask_z1 = z == 1 + + x_z0 = x[mask_z0, :] + y_z0 = y[mask_z0] + g0_tune_res = _dml_tune_optuna( + y_z0, + x_z0, + self._learner["ml_g"], + optuna_params["ml_g0"], + scoring_methods["ml_g0"], + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g0", + ) + + x_z1 = x[mask_z1, :] + y_z1 = y[mask_z1] + g1_tune_res = _dml_tune_optuna( + y_z1, + x_z1, + self._learner["ml_g"], + optuna_params["ml_g1"], + scoring_methods["ml_g1"], + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g1", + ) + + # Tune propensity score on full dataset + m_tune_res = _dml_tune_optuna( + z, + x, + self._learner["ml_m"], + optuna_params["ml_m"], + scoring_methods["ml_m"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m", + ) + + r0_tune_res = None + r1_tune_res = None + if self.subgroups["always_takers"]: + d_z0 = d[mask_z0] + r0_tune_res = _dml_tune_optuna( + d_z0, + x_z0, + self._learner["ml_r"], + optuna_params["ml_r0"], + scoring_methods["ml_r0"], + cv, + optuna_settings, + learner_name="ml_r", + params_name="ml_r0", + ) + + if self.subgroups["never_takers"]: + d_z1 = d[mask_z1] + r1_tune_res = _dml_tune_optuna( + d_z1, + x_z1, + self._learner["ml_r"], + optuna_params["ml_r1"], + scoring_methods["ml_r1"], + cv, + optuna_settings, + learner_name="ml_r", + params_name="ml_r1", + ) + + results = { + "ml_g0": g0_tune_res, + "ml_g1": g1_tune_res, + "ml_m": m_tune_res, + "ml_r0": r0_tune_res, + "ml_r1": r1_tune_res, + } + + return results + def _sensitivity_element_est(self, preds): pass diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py index f5abdbd9..16db426b 100644 --- a/doubleml/irm/irm.py +++ b/doubleml/irm/irm.py @@ -18,6 +18,7 @@ ) from doubleml.utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls from doubleml.utils._propensity_score import _propensity_score_adjustment +from doubleml.utils._tune_optuna import _dml_tune_optuna from doubleml.utils.blp import DoubleMLBLP from doubleml.utils.policytree import DoubleMLPolicyTree from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor @@ -494,6 +495,72 @@ def _nuisance_tuning( return res + def _nuisance_tuning_optuna( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + """ + Optuna-based hyperparameter tuning for IRM nuisance models. + + Performs tuning once on the whole dataset using cross-validation, + returning the same optimal parameters for all folds. + """ + + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + + if scoring_methods is None: + scoring_methods = {"ml_g0": None, "ml_g1": None, "ml_m": None} + + # Separate data by treatment status for conditional mean tuning + mask_d0 = d == 0 + mask_d1 = d == 1 + + x_d0 = x[mask_d0, :] + y_d0 = y[mask_d0] + g0_tune_res = _dml_tune_optuna( + y_d0, + x_d0, + self._learner["ml_g"], + optuna_params["ml_g0"], + scoring_methods["ml_g0"], + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g0", + ) + + x_d1 = x[mask_d1, :] + y_d1 = y[mask_d1] + g1_tune_res = _dml_tune_optuna( + y_d1, + x_d1, + self._learner["ml_g"], + optuna_params["ml_g1"], + scoring_methods["ml_g1"], + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g1", + ) + + # Tune propensity score on full dataset + m_tune_res = _dml_tune_optuna( + d, + x, + self._learner["ml_m"], + optuna_params["ml_m"], + scoring_methods["ml_m"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m", + ) + return {"ml_g0": g0_tune_res, "ml_g1": g1_tune_res, "ml_m": m_tune_res} + def cate(self, basis, is_gate=False, **kwargs): """ Calculate conditional average treatment effects (CATE) for a given basis. diff --git a/doubleml/irm/lpq.py b/doubleml/irm/lpq.py index 5dd8ff37..4301dfda 100644 --- a/doubleml/irm/lpq.py +++ b/doubleml/irm/lpq.py @@ -21,6 +21,7 @@ _solve_ipw_score, ) from doubleml.utils._propensity_score import _normalize_ipw +from doubleml.utils._tune_optuna import _dml_tune_optuna from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor @@ -694,6 +695,105 @@ def _nuisance_tuning( return res + def _nuisance_tuning_optuna( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + x, z = check_X_y(x, np.ravel(self._dml_data.z), ensure_all_finite=False) + + if scoring_methods is None: + scoring_methods = { + "ml_m_z": None, + "ml_m_d_z0": None, + "ml_m_d_z1": None, + "ml_g_du_z0": None, + "ml_g_du_z1": None, + } + + approx_quant = np.quantile(y[d == self.treatment], self.quantile) + du = (d == self.treatment) * (y <= approx_quant) + + m_z_tune_res = _dml_tune_optuna( + z, + x, + self._learner["ml_m_z"], + optuna_params["ml_m_z"], + scoring_methods["ml_m_z"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m_z", + ) + + mask_z0 = z == 0 + mask_z1 = z == 1 + + x_z0 = x[mask_z0, :] + d_z0 = d[mask_z0] + du_z0 = du[mask_z0] + m_d_z0_tune_res = _dml_tune_optuna( + d_z0, + x_z0, + self._learner["ml_m_d_z0"], + optuna_params["ml_m_d_z0"], + scoring_methods["ml_m_d_z0"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m_d_z0", + ) + g_du_z0_tune_res = _dml_tune_optuna( + du_z0, + x_z0, + self._learner["ml_g_du_z0"], + optuna_params["ml_g_du_z0"], + scoring_methods["ml_g_du_z0"], + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g_du_z0", + ) + + x_z1 = x[mask_z1, :] + d_z1 = d[mask_z1] + du_z1 = du[mask_z1] + m_d_z1_tune_res = _dml_tune_optuna( + d_z1, + x_z1, + self._learner["ml_m_d_z1"], + optuna_params["ml_m_d_z1"], + scoring_methods["ml_m_d_z1"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m_d_z1", + ) + g_du_z1_tune_res = _dml_tune_optuna( + du_z1, + x_z1, + self._learner["ml_g_du_z1"], + optuna_params["ml_g_du_z1"], + scoring_methods["ml_g_du_z1"], + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g_du_z1", + ) + + return { + "ml_m_z": m_z_tune_res, + "ml_m_d_z0": m_d_z0_tune_res, + "ml_m_d_z1": m_d_z1_tune_res, + "ml_g_du_z0": g_du_z0_tune_res, + "ml_g_du_z1": g_du_z1_tune_res, + } + def _check_data(self, obj_dml_data): if not isinstance(obj_dml_data, DoubleMLData): raise TypeError( diff --git a/doubleml/irm/pq.py b/doubleml/irm/pq.py index 901c07b7..73e9e3d4 100644 --- a/doubleml/irm/pq.py +++ b/doubleml/irm/pq.py @@ -26,6 +26,7 @@ _solve_ipw_score, ) from doubleml.utils._propensity_score import _normalize_ipw +from doubleml.utils._tune_optuna import _dml_tune_optuna from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor @@ -480,6 +481,50 @@ def _nuisance_tuning( return res + def _nuisance_tuning_optuna( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + + if scoring_methods is None: + scoring_methods = {"ml_g": None, "ml_m": None} + + mask_treat = d == self.treatment + approx_goal = y <= np.quantile(y[mask_treat], self.quantile) + + x_treat = x[mask_treat, :] + goal_treat = approx_goal[mask_treat] + g_tune_res = _dml_tune_optuna( + goal_treat, + x_treat, + self._learner["ml_g"], + optuna_params["ml_g"], + scoring_methods["ml_g"], + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g", + ) + + m_tune_res = _dml_tune_optuna( + d, + x, + self._learner["ml_m"], + optuna_params["ml_m"], + scoring_methods["ml_m"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m", + ) + return {"ml_g": g_tune_res, "ml_m": m_tune_res} + def _check_data(self, obj_dml_data): if not isinstance(obj_dml_data, DoubleMLData): raise TypeError( diff --git a/doubleml/irm/qte.py b/doubleml/irm/qte.py index c3325e08..c572001e 100644 --- a/doubleml/irm/qte.py +++ b/doubleml/irm/qte.py @@ -15,6 +15,7 @@ from doubleml.utils._checks import _check_score, _check_zero_one_treatment from doubleml.utils._descriptive import generate_summary from doubleml.utils._estimation import _default_kde +from doubleml.utils._tune_optuna import TUNE_ML_MODELS_DOC from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor @@ -536,6 +537,42 @@ def p_adjust(self, method="romano-wolf"): return p_val + def tune_ml_models( + self, + ml_param_space, + scoring_methods=None, + cv=5, + set_as_params=True, + return_tune_res=False, + optuna_settings=None, + ): + """Hyperparameter-tuning for DoubleML models using Optuna.""" + + tuning_kwargs = { + "ml_param_space": ml_param_space, + "scoring_methods": scoring_methods, + "cv": cv, + "set_as_params": set_as_params, + "return_tune_res": return_tune_res, + "optuna_settings": optuna_settings, + } + + tune_res = [] if return_tune_res else None + + for i_quant in range(self.n_quantiles): + model_0 = self.modellist_0[i_quant] + model_1 = self.modellist_1[i_quant] + + res_0 = model_0.tune_ml_models(**tuning_kwargs) + res_1 = model_1.tune_ml_models(**tuning_kwargs) + + if return_tune_res: + tune_res.append({"treatment_0": res_0[0], "treatment_1": res_1[0]}) + + return tune_res if return_tune_res else self + + tune_ml_models.__doc__ = TUNE_ML_MODELS_DOC + def _fit_quantile(self, i_quant, n_jobs_cv=None, store_predictions=True, store_models=False): model_0 = self.modellist_0[i_quant] model_1 = self.modellist_1[i_quant] diff --git a/doubleml/irm/ssm.py b/doubleml/irm/ssm.py index bc6cd739..9c1fd6ed 100644 --- a/doubleml/irm/ssm.py +++ b/doubleml/irm/ssm.py @@ -12,6 +12,7 @@ from doubleml.double_ml_score_mixins import LinearScoreMixin from doubleml.utils._checks import _check_finite_predictions, _check_score from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d, _predict_zero_one_propensity +from doubleml.utils._tune_optuna import _dml_tune_optuna from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor @@ -572,5 +573,99 @@ def filter_by_ds(inner_train1_inds, d, s): return {"params": params, "tune_res": tune_res} + def _nuisance_tuning_optuna( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + x, s = check_X_y(x, self._dml_data.s, ensure_all_finite=False) + + if scoring_methods is None: + scoring_methods = { + "ml_g_d0": None, + "ml_g_d1": None, + "ml_pi": None, + "ml_m": None, + } + + def get_param_and_scoring(key): + return optuna_params[key], scoring_methods[key] + + if self._score == "nonignorable": + raise NotImplementedError("Optuna tuning for nonignorable score is not implemented yet. ") + else: + mask_d0_s1 = np.logical_and(d == 0, s == 1) + mask_d1_s1 = np.logical_and(d == 1, s == 1) + + g_d0_param, g_d0_scoring = get_param_and_scoring("ml_g_d0") + g_d1_param, g_d1_scoring = get_param_and_scoring("ml_g_d1") + + x_d0 = x[mask_d0_s1, :] + y_d0 = y[mask_d0_s1] + g_d0_tune_res = _dml_tune_optuna( + y_d0, + x_d0, + self._learner["ml_g"], + g_d0_param, + g_d0_scoring, + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g_d0", + ) + + x_d1 = x[mask_d1_s1, :] + y_d1 = y[mask_d1_s1] + g_d1_tune_res = _dml_tune_optuna( + y_d1, + x_d1, + self._learner["ml_g"], + g_d1_param, + g_d1_scoring, + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g_d1", + ) + + x_d_feat = np.column_stack((x, d)) + pi_tune_res = _dml_tune_optuna( + s, + x_d_feat, + self._learner["ml_pi"], + optuna_params["ml_pi"], + scoring_methods["ml_pi"], + cv, + optuna_settings, + learner_name="ml_pi", + params_name="ml_pi", + ) + + m_tune_res = _dml_tune_optuna( + d, + x, + self._learner["ml_m"], + optuna_params["ml_m"], + scoring_methods["ml_m"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m", + ) + + results = { + "ml_g_d0": g_d0_tune_res, + "ml_g_d1": g_d1_tune_res, + "ml_pi": pi_tune_res, + "ml_m": m_tune_res, + } + + return results + def _sensitivity_element_est(self, preds): pass diff --git a/doubleml/irm/tests/test_apo_tune_ml_models.py b/doubleml/irm/tests/test_apo_tune_ml_models.py new file mode 100644 index 00000000..469cdcd9 --- /dev/null +++ b/doubleml/irm/tests/test_apo_tune_ml_models.py @@ -0,0 +1,43 @@ +import numpy as np +import pytest +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor + +import doubleml as dml +from doubleml.irm.datasets import make_irm_data +from doubleml.tests._utils_tune_optuna import ( + _SAMPLER_CASES, + _assert_tree_params, + _basic_optuna_settings, + _build_param_space, + _small_tree_params, +) + + +@pytest.mark.ci +@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES]) +def test_doubleml_apo_optuna_tune(sampler_name, optuna_sampler): + np.random.seed(3146) + theta = 0.5 + dml_data = make_irm_data(n_obs=500, dim_x=6, theta=theta) + + ml_g = DecisionTreeRegressor(random_state=321) + ml_m = DecisionTreeClassifier(random_state=654) + + dml_apo = dml.DoubleMLAPO(dml_data, ml_g=ml_g, ml_m=ml_m, n_folds=2, treatment_level=1) + dml_apo.fit() + untuned_score = dml_apo.evaluate_learners() + + optuna_params = _build_param_space(dml_apo, _small_tree_params) + + optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler}) + tune_res = dml_apo.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True) + + dml_apo.fit() + tuned_score = dml_apo.evaluate_learners() + + for learner_name in dml_apo.params_names: + tuned_params = tune_res[0][learner_name].best_params + _assert_tree_params(tuned_params) + + # ensure tuning improved RMSE + assert tuned_score[learner_name] < untuned_score[learner_name] diff --git a/doubleml/irm/tests/test_cvar_tune_ml_models.py b/doubleml/irm/tests/test_cvar_tune_ml_models.py new file mode 100644 index 00000000..76dc89da --- /dev/null +++ b/doubleml/irm/tests/test_cvar_tune_ml_models.py @@ -0,0 +1,44 @@ +import numpy as np +import pytest +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor + +import doubleml as dml +from doubleml.irm.datasets import make_irm_data +from doubleml.tests._utils_tune_optuna import ( + _SAMPLER_CASES, + _assert_tree_params, + _basic_optuna_settings, + _small_tree_params, +) + + +@pytest.mark.ci +@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES]) +def test_doubleml_cvar_optuna_tune(sampler_name, optuna_sampler): + np.random.seed(3145) + dml_data = make_irm_data(n_obs=200, dim_x=5) + + ml_g = DecisionTreeRegressor(random_state=321) + ml_m = DecisionTreeClassifier(random_state=654) + + dml_cvar = dml.DoubleMLCVAR(dml_data, ml_g=ml_g, ml_m=ml_m, n_folds=2) + dml_cvar.fit() + untuned_score = dml_cvar.evaluate_learners() + + optuna_params = {"ml_g": _small_tree_params, "ml_m": _small_tree_params} + + optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler}) + tune_res = dml_cvar.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True) + + dml_cvar.fit() + tuned_score = dml_cvar.evaluate_learners() + + tuned_params_g = tune_res[0]["ml_g"].best_params + tuned_params_m = tune_res[0]["ml_m"].best_params + + _assert_tree_params(tuned_params_g) + _assert_tree_params(tuned_params_m) + + # ensure tuning improved RMSE + assert tuned_score["ml_g"] < untuned_score["ml_g"] + assert tuned_score["ml_m"] < untuned_score["ml_m"] diff --git a/doubleml/irm/tests/test_datasets.py b/doubleml/irm/tests/test_datasets.py index 79bf6794..9286e33a 100644 --- a/doubleml/irm/tests/test_datasets.py +++ b/doubleml/irm/tests/test_datasets.py @@ -131,6 +131,7 @@ def n_levels(request): return request.param +@pytest.mark.ci def test_make_data_discrete_treatments(n_levels): np.random.seed(3141) n = 100 diff --git a/doubleml/irm/tests/test_iivm_tune_ml_models.py b/doubleml/irm/tests/test_iivm_tune_ml_models.py new file mode 100644 index 00000000..40dc1b51 --- /dev/null +++ b/doubleml/irm/tests/test_iivm_tune_ml_models.py @@ -0,0 +1,62 @@ +import numpy as np +import pytest +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor + +import doubleml as dml +from doubleml.irm.datasets import make_iivm_data +from doubleml.tests._utils_tune_optuna import ( + _SAMPLER_CASES, + _assert_tree_params, + _basic_optuna_settings, + _small_tree_params, +) + + +@pytest.mark.ci +@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES]) +def test_doubleml_iivm_optuna_tune(sampler_name, optuna_sampler): + """Test IIVM with ml_g0, ml_g1, ml_m, ml_r0, ml_r1 nuisance models.""" + + np.random.seed(3143) + dml_data = make_iivm_data(n_obs=1000, dim_x=5) + + ml_g = DecisionTreeRegressor(random_state=321) + ml_m = DecisionTreeClassifier(random_state=420) + ml_r = DecisionTreeClassifier(random_state=789) + + dml_iivm = dml.DoubleMLIIVM(dml_data, ml_g, ml_m, ml_r, n_folds=2, subgroups={"always_takers": True, "never_takers": True}) + dml_iivm.fit() + untuned_score = dml_iivm.evaluate_learners() + + optuna_params = { + "ml_g0": _small_tree_params, + "ml_g1": _small_tree_params, + "ml_m": _small_tree_params, + "ml_r0": _small_tree_params, + "ml_r1": _small_tree_params, + } + + optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler}) + tune_res = dml_iivm.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True) + + dml_iivm.fit() + tuned_score = dml_iivm.evaluate_learners() + + tuned_params_g0 = tune_res[0]["ml_g0"].best_params + tuned_params_g1 = tune_res[0]["ml_g1"].best_params + tuned_params_m = tune_res[0]["ml_m"].best_params + tuned_params_r0 = tune_res[0]["ml_r0"].best_params + tuned_params_r1 = tune_res[0]["ml_r1"].best_params + + _assert_tree_params(tuned_params_g0) + _assert_tree_params(tuned_params_g1) + _assert_tree_params(tuned_params_m) + _assert_tree_params(tuned_params_r0) + _assert_tree_params(tuned_params_r1) + + # ensure tuning improved RMSE + assert tuned_score["ml_g0"] < untuned_score["ml_g0"] + assert tuned_score["ml_g1"] < untuned_score["ml_g1"] + assert tuned_score["ml_m"] < untuned_score["ml_m"] + assert tuned_score["ml_r0"] < untuned_score["ml_r0"] + assert tuned_score["ml_r1"] < untuned_score["ml_r1"] diff --git a/doubleml/irm/tests/test_irm_tune_ml_models.py b/doubleml/irm/tests/test_irm_tune_ml_models.py new file mode 100644 index 00000000..54242a02 --- /dev/null +++ b/doubleml/irm/tests/test_irm_tune_ml_models.py @@ -0,0 +1,48 @@ +import numpy as np +import pytest +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor + +import doubleml as dml +from doubleml.irm.datasets import make_irm_data +from doubleml.tests._utils_tune_optuna import ( + _SAMPLER_CASES, + _assert_tree_params, + _basic_optuna_settings, + _small_tree_params, +) + + +@pytest.mark.ci +@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES]) +def test_doubleml_irm_optuna_tune(sampler_name, optuna_sampler): + np.random.seed(3142) + dml_data = make_irm_data(n_obs=500, dim_x=5) + + ml_g = DecisionTreeRegressor(random_state=321) + ml_m = DecisionTreeClassifier(random_state=654) + + dml_irm = dml.DoubleMLIRM(dml_data, ml_g, ml_m, n_folds=2) + dml_irm.fit() + untuned_score = dml_irm.evaluate_learners() + + optuna_params = {"ml_g0": _small_tree_params, "ml_g1": _small_tree_params, "ml_m": _small_tree_params} + + optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler}) + + tune_res = dml_irm.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True) + + dml_irm.fit() + tuned_score = dml_irm.evaluate_learners() + + tuned_params_g0 = tune_res[0]["ml_g0"].best_params + tuned_params_g1 = tune_res[0]["ml_g1"].best_params + tuned_params_m = tune_res[0]["ml_m"].best_params + + _assert_tree_params(tuned_params_g0) + _assert_tree_params(tuned_params_g1) + _assert_tree_params(tuned_params_m) + + # ensure tuning improved RMSE + assert tuned_score["ml_g0"] < untuned_score["ml_g0"] + assert tuned_score["ml_g1"] < untuned_score["ml_g1"] + assert tuned_score["ml_m"] < untuned_score["ml_m"] diff --git a/doubleml/irm/tests/test_irm_with_missings.py b/doubleml/irm/tests/test_irm_with_missings.py index 838ea98a..5c2f7131 100644 --- a/doubleml/irm/tests/test_irm_with_missings.py +++ b/doubleml/irm/tests/test_irm_with_missings.py @@ -150,6 +150,7 @@ def test_dml_irm_w_missing_boot(dml_irm_w_missing_fixture): ) +@pytest.mark.ci def test_irm_exception_with_missings(generate_data_irm_w_missings, learner_sklearn): # collect data (x, y, d) = generate_data_irm_w_missings diff --git a/doubleml/irm/tests/test_lpq_tune_ml_models.py b/doubleml/irm/tests/test_lpq_tune_ml_models.py new file mode 100644 index 00000000..6db0aebc --- /dev/null +++ b/doubleml/irm/tests/test_lpq_tune_ml_models.py @@ -0,0 +1,45 @@ +import numpy as np +import pytest +from sklearn.tree import DecisionTreeClassifier + +import doubleml as dml +from doubleml.irm.datasets import make_iivm_data +from doubleml.tests._utils_tune_optuna import ( + _SAMPLER_CASES, + _assert_tree_params, + _basic_optuna_settings, + _build_param_space, + _small_tree_params, +) + + +@pytest.mark.ci +@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES]) +def test_doubleml_lpq_optuna_tune(sampler_name, optuna_sampler): + np.random.seed(3148) + dml_data = make_iivm_data(n_obs=500, dim_x=10) + + ml_g = DecisionTreeClassifier(random_state=321) + ml_m = DecisionTreeClassifier(random_state=654) + + dml_lpq = dml.DoubleMLLPQ(dml_data, ml_g=ml_g, ml_m=ml_m, n_folds=2) + dml_lpq.fit() + untuned_score = dml_lpq.nuisance_loss + + optuna_params = _build_param_space(dml_lpq, _small_tree_params) + + optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler}) + optuna_settings["n_trials"] = 10 + tune_res = dml_lpq.tune_ml_models( + ml_param_space=optuna_params, set_as_params=True, optuna_settings=optuna_settings, return_tune_res=True + ) + + dml_lpq.fit() + tuned_score = dml_lpq.nuisance_loss + + for learner_name in dml_lpq.params_names: + tuned_params = tune_res[0][learner_name].best_params + _assert_tree_params(tuned_params) + + # ensure tuning improved RMSE + assert tuned_score[learner_name] < untuned_score[learner_name] diff --git a/doubleml/irm/tests/test_pq_tune_ml_models.py b/doubleml/irm/tests/test_pq_tune_ml_models.py new file mode 100644 index 00000000..19759244 --- /dev/null +++ b/doubleml/irm/tests/test_pq_tune_ml_models.py @@ -0,0 +1,42 @@ +import numpy as np +import pytest +from sklearn.tree import DecisionTreeClassifier + +import doubleml as dml +from doubleml.irm.datasets import make_irm_data +from doubleml.tests._utils_tune_optuna import ( + _SAMPLER_CASES, + _assert_tree_params, + _basic_optuna_settings, + _build_param_space, + _small_tree_params, +) + + +@pytest.mark.ci +@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES]) +def test_doubleml_pq_optuna_tune(sampler_name, optuna_sampler): + np.random.seed(3147) + dml_data = make_irm_data(n_obs=500, dim_x=10) + + ml_g = DecisionTreeClassifier(random_state=321) + ml_m = DecisionTreeClassifier(random_state=654) + + dml_pq = dml.DoubleMLPQ(dml_data, ml_g=ml_g, ml_m=ml_m, n_folds=2) + dml_pq.fit() + untuned_score = dml_pq.evaluate_learners() + + optuna_params = _build_param_space(dml_pq, _small_tree_params) + + optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler}) + tune_res = dml_pq.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True) + + dml_pq.fit() + tuned_score = dml_pq.evaluate_learners() + + for learner_name in dml_pq.params_names: + tuned_params = tune_res[0][learner_name].best_params + _assert_tree_params(tuned_params) + + # ensure tuning improved RMSE + assert tuned_score[learner_name] < untuned_score[learner_name] diff --git a/doubleml/irm/tests/test_ssm_tune_ml_models.py b/doubleml/irm/tests/test_ssm_tune_ml_models.py new file mode 100644 index 00000000..6418ae0f --- /dev/null +++ b/doubleml/irm/tests/test_ssm_tune_ml_models.py @@ -0,0 +1,64 @@ +import numpy as np +import pytest +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor + +import doubleml as dml +from doubleml.irm.datasets import make_ssm_data +from doubleml.tests._utils_tune_optuna import ( + _SAMPLER_CASES, + _assert_tree_params, + _basic_optuna_settings, + _build_param_space, + _small_tree_params, +) + + +# test NotImplementedError for nonignorable score +@pytest.mark.ci +def test_doubleml_ssm_optuna_tune_not_implemented(): + np.random.seed(3149) + dml_data = make_ssm_data(n_obs=500, dim_x=10, mar=False) + + ml_g = DecisionTreeRegressor(random_state=321) + ml_pi = DecisionTreeClassifier(random_state=654) + ml_m = DecisionTreeClassifier(random_state=987) + + dml_ssm = dml.DoubleMLSSM(dml_data, ml_g=ml_g, ml_pi=ml_pi, ml_m=ml_m, n_folds=2, score="nonignorable") + + optuna_params = _build_param_space(dml_ssm, _small_tree_params) + optuna_settings = _basic_optuna_settings({"sampler": "TPESampler"}) + + with pytest.raises(NotImplementedError, match="Optuna tuning for nonignorable score is not implemented yet."): + dml_ssm.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings) + + +@pytest.mark.ci +@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES]) +def test_doubleml_ssm_optuna_tune(sampler_name, optuna_sampler): + np.random.seed(3149) + dml_data = make_ssm_data(n_obs=500, dim_x=10, mar=True) + + ml_g = DecisionTreeRegressor(random_state=321) + ml_pi = DecisionTreeClassifier(random_state=654) + ml_m = DecisionTreeClassifier(random_state=987) + + dml_ssm = dml.DoubleMLSSM(dml_data, ml_g=ml_g, ml_pi=ml_pi, ml_m=ml_m, n_folds=2, score="missing-at-random") + dml_ssm.fit() + untuned_score = dml_ssm.evaluate_learners() + + optuna_params = _build_param_space(dml_ssm, _small_tree_params) + + optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler}) + tune_res = dml_ssm.tune_ml_models( + ml_param_space=optuna_params, optuna_settings=optuna_settings, set_as_params=True, return_tune_res=True + ) + + dml_ssm.fit() + tuned_score = dml_ssm.evaluate_learners() + + for learner_name in dml_ssm.params_names: + tuned_params = tune_res[0][learner_name].best_params + _assert_tree_params(tuned_params) + + # ensure tuning improved RMSE + assert tuned_score[learner_name] < untuned_score[learner_name] diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py index 1c674fef..ff578448 100644 --- a/doubleml/plm/lplr.py +++ b/doubleml/plm/lplr.py @@ -4,6 +4,7 @@ import numpy as np import scipy from sklearn.base import clone +from sklearn.model_selection import cross_val_predict from sklearn.utils import check_X_y from sklearn.utils.multiclass import type_of_target @@ -15,6 +16,7 @@ _dml_tune, _double_dml_cv_predict, ) +from doubleml.utils._tune_optuna import _dml_tune_optuna class DoubleMLLPLR(NonLinearScoreMixin, DoubleML): @@ -423,8 +425,8 @@ def _sensitivity_element_est(self, preds): def _nuisance_tuning( self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search ): - x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False) - x, d = check_X_y(x, self._dml_data.d, force_all_finite=False) + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) x_d_concat = np.hstack((d.reshape(-1, 1), x)) if scoring_methods is None: @@ -536,3 +538,102 @@ def _compute_score_deriv(self, psi_elements, coef, inds=None): deriv = -psi_elements["d"] * expit * (1 - expit) * psi_elements["d_tilde"] return deriv + + def _nuisance_tuning_optuna( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + """ + Optuna-based hyperparameter tuning for LPLR nuisance models. + + Performs tuning once on the whole dataset using cross-validation, + returning the same optimal parameters for all folds. + """ + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + x_d_concat = np.hstack((d.reshape(-1, 1), x)) + + if scoring_methods is None: + scoring_methods = {"ml_m": None, "ml_M": None, "ml_a": None, "ml_t": None} + + M_tune_res = _dml_tune_optuna( + y, + x_d_concat, + self._learner["ml_M"], + optuna_params["ml_M"], + scoring_methods["ml_M"], + cv, + optuna_settings, + learner_name="ml_M", + params_name="ml_M", + ) + + if self.score == "nuisance_space": + mask_y0 = y == 0 + outcome_ml_m = d[mask_y0] + features_ml_m = x[mask_y0, :] + elif self.score == "instrument": + outcome_ml_m = d + features_ml_m = x + + m_tune_res = _dml_tune_optuna( + outcome_ml_m, + features_ml_m, + self._learner["ml_m"], + optuna_params["ml_m"], + scoring_methods["ml_m"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m", + ) + + a_tune_res = _dml_tune_optuna( + d, + x, + self._learner["ml_a"], + optuna_params["ml_a"], + scoring_methods["ml_a"], + cv, + optuna_settings, + learner_name="ml_a", + params_name="ml_a", + ) + + # Create targets for tuning ml_t + # Unlike for inference in _nuisance_est, we do not use the double cross-fitting here and use a single model for + # predicting M_hat + # This presents a small risk of bias in the targets, but enables tuning without tune_on_folds=True + + M_hat = cross_val_predict( + estimator=clone(M_tune_res.best_estimator), + X=x_d_concat, + y=y, + cv=cv, + method="predict_proba", + )[:, 1] + M_hat = np.clip(M_hat, 1e-8, 1 - 1e-8) + W_hat = scipy.special.logit(M_hat) + + t_tune_res = _dml_tune_optuna( + W_hat, + x, + self._learner["ml_t"], + optuna_params["ml_t"], + scoring_methods["ml_t"], + cv, + optuna_settings, + learner_name="ml_t", + params_name="ml_t", + ) + + results = { + "ml_M": M_tune_res, + "ml_m": m_tune_res, + "ml_a": a_tune_res, + "ml_t": t_tune_res, + } + return results diff --git a/doubleml/plm/pliv.py b/doubleml/plm/pliv.py index e8fd0ed6..b7a384f0 100644 --- a/doubleml/plm/pliv.py +++ b/doubleml/plm/pliv.py @@ -3,14 +3,15 @@ import numpy as np from sklearn.dummy import DummyRegressor from sklearn.linear_model import LinearRegression -from sklearn.model_selection import GridSearchCV, KFold, RandomizedSearchCV +from sklearn.model_selection import GridSearchCV, KFold, RandomizedSearchCV, cross_val_predict from sklearn.utils import check_X_y -from ..data.base_data import DoubleMLData -from ..double_ml import DoubleML -from ..double_ml_score_mixins import LinearScoreMixin -from ..utils._checks import _check_finite_predictions -from ..utils._estimation import _dml_cv_predict, _dml_tune +from doubleml.data.base_data import DoubleMLData +from doubleml.double_ml import DoubleML +from doubleml.double_ml_score_mixins import LinearScoreMixin +from doubleml.utils._checks import _check_finite_predictions +from doubleml.utils._estimation import _dml_cv_predict, _dml_tune +from doubleml.utils._tune_optuna import _dml_tune_optuna class DoubleMLPLIV(LinearScoreMixin, DoubleML): @@ -229,24 +230,79 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa return psi_elements, preds def _nuisance_tuning( - self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search + self, + smpls, + param_grids, + scoring_methods, + n_folds_tune, + n_jobs_cv, + search_mode, + n_iter_randomized_search, ): if self.partialX & (not self.partialZ): res = self._nuisance_tuning_partial_x( - smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search + smpls, + param_grids, + scoring_methods, + n_folds_tune, + n_jobs_cv, + search_mode, + n_iter_randomized_search, ) elif (not self.partialX) & self.partialZ: res = self._nuisance_tuning_partial_z( - smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search + smpls, + param_grids, + scoring_methods, + n_folds_tune, + n_jobs_cv, + search_mode, + n_iter_randomized_search, ) else: assert self.partialX & self.partialZ res = self._nuisance_tuning_partial_xz( - smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search + smpls, + param_grids, + scoring_methods, + n_folds_tune, + n_jobs_cv, + search_mode, + n_iter_randomized_search, ) return res + def _nuisance_tuning_optuna( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + if self.partialX & (not self.partialZ): + return self._nuisance_tuning_optuna_partial_x( + optuna_params, + scoring_methods, + cv, + optuna_settings, + ) + elif (not self.partialX) & self.partialZ: + return self._nuisance_tuning_optuna_partial_z( + optuna_params, + scoring_methods, + cv, + optuna_settings, + ) + else: + assert self.partialX & self.partialZ + return self._nuisance_tuning_optuna_partial_xz( + optuna_params, + scoring_methods, + cv, + optuna_settings, + ) + def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return_models=False): x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) @@ -638,7 +694,7 @@ def _nuisance_tuning_partial_z( scoring_methods = {"ml_r": None} train_inds = [train_index for (train_index, _) in smpls] - m_tune_res = _dml_tune( + r_tune_res = _dml_tune( d, xz, train_inds, @@ -651,11 +707,11 @@ def _nuisance_tuning_partial_z( n_iter_randomized_search, ) - m_best_params = [xx.best_params_ for xx in m_tune_res] + r_best_params = [xx.best_params_ for xx in r_tune_res] - params = {"ml_r": m_best_params} + params = {"ml_r": r_best_params} - tune_res = {"r_tune": m_tune_res} + tune_res = {"r_tune": r_tune_res} res = {"params": params, "tune_res": tune_res} @@ -733,5 +789,188 @@ def _nuisance_tuning_partial_xz( return res + def _nuisance_tuning_optuna_partial_x( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + + if scoring_methods is None: + scoring_methods = {"ml_l": None, "ml_m": None, "ml_r": None, "ml_g": None} + + l_tune_res = _dml_tune_optuna( + y, + x, + self._learner["ml_l"], + optuna_params["ml_l"], + scoring_methods["ml_l"], + cv, + optuna_settings, + learner_name="ml_l", + params_name="ml_l", + ) + + if self._dml_data.n_instr > 1: + m_tune_res = {} + z_all = self._dml_data.z + for i_instr, instr_var in enumerate(self._dml_data.z_cols): + x_instr, this_z = check_X_y(x, z_all[:, i_instr], ensure_all_finite=False) + scoring_key = scoring_methods.get(f"ml_m_{instr_var}", scoring_methods.get("ml_m")) + m_tune_res[instr_var] = _dml_tune_optuna( + this_z, + x_instr, + self._learner["ml_m"], + optuna_params[f"ml_m_{instr_var}"], + scoring_key, + cv, + optuna_settings, + learner_name="ml_m", + params_name=f"ml_m_{instr_var}", + ) + x_m_features = x # keep reference for later when constructing params + z_vector = None + else: + x_m_features, z_vector = check_X_y(x, np.ravel(self._dml_data.z), ensure_all_finite=False) + m_tune_res = _dml_tune_optuna( + z_vector, + x_m_features, + self._learner["ml_m"], + optuna_params["ml_m"], + scoring_methods["ml_m"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m", + ) + + r_tune_res = _dml_tune_optuna( + d, + x, + self._learner["ml_r"], + optuna_params["ml_r"], + scoring_methods["ml_r"], + cv, + optuna_settings, + learner_name="ml_r", + params_name="ml_r", + ) + + results = {"ml_l": l_tune_res, "ml_r": r_tune_res} + + if self._dml_data.n_instr > 1: + for instr_var in self._dml_data.z_cols: + results["ml_m_" + instr_var] = m_tune_res[instr_var] + else: + results["ml_m"] = m_tune_res + if "ml_g" in self._learner: + l_hat = cross_val_predict(l_tune_res.best_estimator, x, y, cv=cv, method=self._predict_method["ml_l"]) + m_hat = cross_val_predict( + m_tune_res.best_estimator, x_m_features, z_vector, cv=cv, method=self._predict_method["ml_m"] + ) + r_hat = cross_val_predict(r_tune_res.best_estimator, x, cv=cv, method=self._predict_method["ml_r"]) + psi_a = -np.multiply(d - r_hat, z_vector - m_hat) + psi_b = np.multiply(z_vector - m_hat, y - l_hat) + theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a) + + g_tune_res = _dml_tune_optuna( + y - theta_initial * d, + x, + self._learner["ml_g"], + optuna_params["ml_g"], + scoring_methods["ml_g"], + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g", + ) + + results["ml_g"] = g_tune_res + + return results + + def _nuisance_tuning_optuna_partial_z( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + + xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)), self._dml_data.d, ensure_all_finite=False) + + if scoring_methods is None: + scoring_methods = {"ml_r": None} + + r_tune_res = _dml_tune_optuna( + d, + xz, + self._learner["ml_r"], + optuna_params["ml_r"], + scoring_methods["ml_r"], + cv, + optuna_settings, + learner_name="ml_r", + params_name="ml_r", + ) + return {"ml_r": r_tune_res} + + def _nuisance_tuning_optuna_partial_xz( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)), self._dml_data.d, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + + if scoring_methods is None: + scoring_methods = {"ml_l": None, "ml_m": None, "ml_r": None} + + l_tune_res = _dml_tune_optuna( + y, + x, + self._learner["ml_l"], + optuna_params["ml_l"], + scoring_methods["ml_l"], + cv, + optuna_settings, + learner_name="ml_l", + params_name="ml_l", + ) + + m_tune_res = _dml_tune_optuna( + d, + xz, + self._learner["ml_m"], + optuna_params["ml_m"], + scoring_methods["ml_m"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m", + ) + + m_hat = cross_val_predict(m_tune_res.best_estimator, xz, d, cv=cv, method=self._predict_method["ml_m"]) + r_tune_res = _dml_tune_optuna( + m_hat, + x, + self._learner["ml_r"], + optuna_params["ml_r"], + scoring_methods["ml_r"], + cv, + optuna_settings, + learner_name="ml_r", + params_name="ml_r", + ) + return {"ml_l": l_tune_res, "ml_m": m_tune_res, "ml_r": r_tune_res} + def _sensitivity_element_est(self, preds): pass diff --git a/doubleml/plm/plr.py b/doubleml/plm/plr.py index 9b28231f..e943d87b 100644 --- a/doubleml/plm/plr.py +++ b/doubleml/plm/plr.py @@ -3,14 +3,16 @@ import numpy as np import pandas as pd from sklearn.base import clone +from sklearn.model_selection import cross_val_predict from sklearn.utils import check_X_y -from ..data.base_data import DoubleMLData -from ..double_ml import DoubleML -from ..double_ml_score_mixins import LinearScoreMixin -from ..utils._checks import _check_binary_predictions, _check_finite_predictions, _check_is_propensity, _check_score -from ..utils._estimation import _dml_cv_predict, _dml_tune -from ..utils.blp import DoubleMLBLP +from doubleml.data.base_data import DoubleMLData +from doubleml.double_ml import DoubleML +from doubleml.double_ml_score_mixins import LinearScoreMixin +from doubleml.utils._checks import _check_binary_predictions, _check_finite_predictions, _check_is_propensity, _check_score +from doubleml.utils._estimation import _dml_cv_predict, _dml_tune +from doubleml.utils._tune_optuna import _dml_tune_optuna +from doubleml.utils.blp import DoubleMLBLP class DoubleMLPLR(LinearScoreMixin, DoubleML): @@ -372,6 +374,76 @@ def _nuisance_tuning( return res + def _nuisance_tuning_optuna( + self, + optuna_params, + scoring_methods, + cv, + optuna_settings, + ): + """ + Optuna-based hyperparameter tuning for PLR nuisance models. + + Performs tuning once on the whole dataset using cross-validation, + returning the same optimal parameters for all folds. + """ + + x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False) + x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False) + + if scoring_methods is None: + scoring_methods = {"ml_l": None, "ml_m": None, "ml_g": None} + + l_tune_res = _dml_tune_optuna( + y, + x, + self._learner["ml_l"], + optuna_params["ml_l"], + scoring_methods["ml_l"], + cv, + optuna_settings, + learner_name="ml_l", + params_name="ml_l", + ) + m_tune_res = _dml_tune_optuna( + d, + x, + self._learner["ml_m"], + optuna_params["ml_m"], + scoring_methods["ml_m"], + cv, + optuna_settings, + learner_name="ml_m", + params_name="ml_m", + ) + + results = {"ml_l": l_tune_res, "ml_m": m_tune_res} + + # an ML model for g is obtained for the IV-type score and callable scores + if "ml_g" in self._learner: + # construct an initial theta estimate from the tuned models using the partialling out score + # use cross-fitting for tuning ml_g + l_hat = cross_val_predict(l_tune_res.best_estimator, x, y, cv=cv, method=self._predict_method["ml_l"]) + m_hat = cross_val_predict(m_tune_res.best_estimator, x, d, cv=cv, method=self._predict_method["ml_m"]) + psi_a = -np.multiply(d - m_hat, d - m_hat) + psi_b = np.multiply(d - m_hat, y - l_hat) + theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a) + + g_tune_res = _dml_tune_optuna( + y - theta_initial * d, + x, + self._learner["ml_g"], + optuna_params["ml_g"], + scoring_methods["ml_g"], + cv, + optuna_settings, + learner_name="ml_g", + params_name="ml_g", + ) + results["ml_g"] = g_tune_res + + return results + def cate(self, basis, is_gate=False, **kwargs): """ Calculate conditional average treatment effects (CATE) for a given basis. diff --git a/doubleml/plm/tests/test_lplr_tune_ml_models.py b/doubleml/plm/tests/test_lplr_tune_ml_models.py new file mode 100644 index 00000000..2c649392 --- /dev/null +++ b/doubleml/plm/tests/test_lplr_tune_ml_models.py @@ -0,0 +1,93 @@ +import numpy as np +import pytest +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor + +import doubleml as dml +from doubleml.plm.datasets import make_lplr_LZZ2020 +from doubleml.tests._utils_tune_optuna import ( + _SAMPLER_CASES, + _assert_tree_params, + _basic_optuna_settings, + _small_tree_params, +) + + +@pytest.fixture(scope="module", params=["nuisance_space", "instrument"]) +def score(request): + return request.param + + +@pytest.fixture( + scope="module", + params=[DecisionTreeRegressor(random_state=567), None], +) +def ml_a(request): + return request.param + + +@pytest.mark.ci +@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES]) +def test_doubleml_lplr_optuna_tune(sampler_name, optuna_sampler, score, ml_a): + np.random.seed(3141) + alpha = 0.5 + dml_data = make_lplr_LZZ2020(n_obs=200, dim_x=15, alpha=alpha) + + ml_M = DecisionTreeClassifier(random_state=123) + ml_t = DecisionTreeRegressor(random_state=234) + ml_m = DecisionTreeRegressor(random_state=456) + + dml_lplr = dml.DoubleMLLPLR( + dml_data, + ml_M=ml_M, + ml_t=ml_t, + ml_m=ml_m, + ml_a=ml_a, + n_folds=2, + n_folds_inner=2, + score=score, + ) + dml_lplr.fit() + untuned_score = dml_lplr.evaluate_learners() + + optuna_params = { + "ml_M": _small_tree_params, + "ml_m": _small_tree_params, + "ml_t": _small_tree_params, + "ml_a": _small_tree_params, + } + + tune_res = dml_lplr.tune_ml_models( + ml_param_space=optuna_params, + optuna_settings=_basic_optuna_settings({"sampler": optuna_sampler, "n_trials": 5}), + return_tune_res=True, + ) + + dml_lplr.fit() + tuned_score = dml_lplr.evaluate_learners() + + tuned_params_M = tune_res[0]["ml_M"].best_params + tuned_params_t = tune_res[0]["ml_t"].best_params + tuned_params_m = tune_res[0]["ml_m"].best_params + tuned_params_a = tune_res[0]["ml_a"].best_params + + _assert_tree_params(tuned_params_M) + _assert_tree_params(tuned_params_t) + _assert_tree_params(tuned_params_m) + _assert_tree_params(tuned_params_a) + + # ensure results contain optuna objects and best params + assert isinstance(tune_res[0], dict) + assert set(tune_res[0].keys()) == {"ml_M", "ml_m", "ml_t", "ml_a"} + assert hasattr(tune_res[0]["ml_M"], "best_params") + assert tune_res[0]["ml_M"].best_params["max_depth"] == tuned_params_M["max_depth"] + assert hasattr(tune_res[0]["ml_t"], "best_params") + assert tune_res[0]["ml_t"].best_params["max_depth"] == tuned_params_t["max_depth"] + assert hasattr(tune_res[0]["ml_m"], "best_params") + assert tune_res[0]["ml_m"].best_params["max_depth"] == tuned_params_m["max_depth"] + assert hasattr(tune_res[0]["ml_a"], "best_params") + assert tune_res[0]["ml_a"].best_params["max_depth"] == tuned_params_a["max_depth"] + + # ensure tuning improved RMSE # not actually possible for ml_t as the targets are not available + assert tuned_score["ml_M"] < untuned_score["ml_M"] + assert tuned_score["ml_m"] < untuned_score["ml_m"] + assert tuned_score["ml_a"] < untuned_score["ml_a"] diff --git a/doubleml/plm/tests/test_pliv_tune_ml_models.py b/doubleml/plm/tests/test_pliv_tune_ml_models.py new file mode 100644 index 00000000..3189980a --- /dev/null +++ b/doubleml/plm/tests/test_pliv_tune_ml_models.py @@ -0,0 +1,48 @@ +import numpy as np +import pytest +from sklearn.tree import DecisionTreeRegressor + +import doubleml as dml +from doubleml.plm.datasets import make_pliv_CHS2015 +from doubleml.tests._utils_tune_optuna import ( + _SAMPLER_CASES, + _assert_tree_params, + _basic_optuna_settings, + _build_param_space, + _small_tree_params, +) + + +@pytest.mark.ci +@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES]) +def test_doubleml_pliv_optuna_tune(sampler_name, optuna_sampler): + """Test PLIV with ml_l, ml_m, ml_r nuisance models.""" + + np.random.seed(3144) + dml_data = make_pliv_CHS2015(n_obs=500, dim_x=15, dim_z=3) + + ml_l = DecisionTreeRegressor(random_state=123) + ml_m = DecisionTreeRegressor(random_state=456) + ml_r = DecisionTreeRegressor(random_state=789) + + dml_pliv = dml.DoubleMLPLIV(dml_data, ml_l, ml_m, ml_r, n_folds=2) + dml_pliv.fit() + untuned_score = dml_pliv.evaluate_learners() + + optuna_params = _build_param_space(dml_pliv, _small_tree_params) + + optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler}) + optuna_settings["n_trials"] = 10 + tune_res = dml_pliv.tune_ml_models( + ml_param_space=optuna_params, set_as_params=True, optuna_settings=optuna_settings, return_tune_res=True + ) + + dml_pliv.fit() + tuned_score = dml_pliv.evaluate_learners() + + for learner_name in dml_pliv.params_names: + tuned_params = tune_res[0][learner_name].best_params + _assert_tree_params(tuned_params) + + # ensure tuning improved RMSE + assert tuned_score[learner_name] < untuned_score[learner_name] diff --git a/doubleml/plm/tests/test_plr_tune_ml_models.py b/doubleml/plm/tests/test_plr_tune_ml_models.py new file mode 100644 index 00000000..380bca8b --- /dev/null +++ b/doubleml/plm/tests/test_plr_tune_ml_models.py @@ -0,0 +1,80 @@ +import numpy as np +import pytest +from sklearn.tree import DecisionTreeRegressor + +import doubleml as dml +from doubleml.plm.datasets import make_plr_CCDDHNR2018 +from doubleml.tests._utils_tune_optuna import ( + _SAMPLER_CASES, + _assert_tree_params, + _basic_optuna_settings, + _small_tree_params, +) + + +@pytest.mark.ci +@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES]) +def test_doubleml_plr_optuna_tune(sampler_name, optuna_sampler): + np.random.seed(3141) + alpha = 0.5 + dml_data = make_plr_CCDDHNR2018(n_obs=500, dim_x=5, alpha=alpha) + + ml_l = DecisionTreeRegressor(random_state=123) + ml_m = DecisionTreeRegressor(random_state=456) + + dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, score="partialling out") + dml_plr.fit() + untuned_score = dml_plr.evaluate_learners() + + optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params} + + tune_res = dml_plr.tune_ml_models( + ml_param_space=optuna_params, + optuna_settings=_basic_optuna_settings({"sampler": optuna_sampler}), + return_tune_res=True, + ) + + dml_plr.fit() + tuned_score = dml_plr.evaluate_learners() + + tuned_params_l = tune_res[0]["ml_l"].best_params + tuned_params_m = tune_res[0]["ml_m"].best_params + + _assert_tree_params(tuned_params_l) + _assert_tree_params(tuned_params_m) + + # ensure results contain optuna objects and best params + assert isinstance(tune_res[0], dict) + assert set(tune_res[0].keys()) == {"ml_l", "ml_m"} + assert hasattr(tune_res[0]["ml_l"], "best_params") + assert tune_res[0]["ml_l"].best_params["max_depth"] == tuned_params_l["max_depth"] + assert hasattr(tune_res[0]["ml_m"], "best_params") + assert tune_res[0]["ml_m"].best_params["max_depth"] == tuned_params_m["max_depth"] + + # ensure tuning improved RMSE + assert tuned_score["ml_l"] < untuned_score["ml_l"] + assert tuned_score["ml_m"] < untuned_score["ml_m"] + + +@pytest.mark.ci +def test_doubleml_plr_optuna_tune_with_ml_g(): + np.random.seed(3150) + dml_data = make_plr_CCDDHNR2018(n_obs=200, dim_x=5, alpha=0.5) + + ml_l = DecisionTreeRegressor(random_state=11) + ml_m = DecisionTreeRegressor(random_state=12) + ml_g = DecisionTreeRegressor(random_state=13) + + dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, ml_g, n_folds=2, score="IV-type") + + optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params, "ml_g": _small_tree_params} + + tune_res = dml_plr.tune_ml_models( + ml_param_space=optuna_params, + optuna_settings=_basic_optuna_settings({"n_trials": 1}), + return_tune_res=True, + ) + + assert "ml_g" in tune_res[0] + ml_g_res = tune_res[0]["ml_g"] + assert ml_g_res.best_params is not None diff --git a/doubleml/tests/_utils_tune_optuna.py b/doubleml/tests/_utils_tune_optuna.py new file mode 100644 index 00000000..241451fd --- /dev/null +++ b/doubleml/tests/_utils_tune_optuna.py @@ -0,0 +1,51 @@ +import numpy as np +import optuna + + +def _basic_optuna_settings(additional=None): + base_settings = { + "n_trials": 10, + "sampler": optuna.samplers.TPESampler(seed=3141), + "verbosity": optuna.logging.WARNING, + "show_progress_bar": False, + } + if additional is not None: + base_settings.update(additional) + return base_settings + + +_SAMPLER_CASES = [ + ("random", optuna.samplers.RandomSampler(seed=3141)), + ("tpe", optuna.samplers.TPESampler(seed=3141)), +] + + +def _small_tree_params(trial): + return { + "max_depth": trial.suggest_int("max_depth", 1, 20), + "min_samples_split": trial.suggest_int("min_samples_split", 2, 20), + "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10), + } + + +def _assert_tree_params(param_dict, depth_range=(1, 20), leaf_range=(1, 10), split_range=(2, 20)): + assert set(param_dict.keys()) == {"max_depth", "min_samples_leaf", "min_samples_split"} + assert depth_range[0] <= param_dict["max_depth"] <= depth_range[1] + assert leaf_range[0] <= param_dict["min_samples_leaf"] <= leaf_range[1] + assert split_range[0] <= param_dict["min_samples_split"] <= split_range[1] + + +def _build_param_space(dml_obj, param_fn): + """Build parameter grid using the actual params_names from the DML object.""" + param_grid = {learner_name: param_fn for learner_name in dml_obj.params_names} + return param_grid + + +def _select_binary_periods(panel_data): + t_values = np.sort(panel_data.t_values) + finite_g = sorted(val for val in panel_data.g_values if np.isfinite(val)) + for candidate in finite_g: + pre_candidates = [t for t in t_values if t < candidate] + if pre_candidates: + return candidate, pre_candidates[-1], candidate + raise RuntimeError("No valid treatment group found for binary DID data.") diff --git a/doubleml/tests/test_datasets.py b/doubleml/tests/test_datasets.py index 95b6ea53..7d805346 100644 --- a/doubleml/tests/test_datasets.py +++ b/doubleml/tests/test_datasets.py @@ -7,6 +7,7 @@ msg_inv_return_type = "Invalid return_type." +@pytest.mark.ci def test_fetch_401K_return_types(): res = fetch_401K("DoubleMLData") assert isinstance(res, DoubleMLData) @@ -16,12 +17,14 @@ def test_fetch_401K_return_types(): _ = fetch_401K("matrix") +@pytest.mark.ci def test_fetch_401K_poly(): msg = "polynomial_features os not implemented yet for fetch_401K." with pytest.raises(NotImplementedError, match=msg): _ = fetch_401K(polynomial_features=True) +@pytest.mark.ci def test_fetch_bonus_return_types(): res = fetch_bonus("DoubleMLData") assert isinstance(res, DoubleMLData) @@ -31,6 +34,7 @@ def test_fetch_bonus_return_types(): _ = fetch_bonus("matrix") +@pytest.mark.ci def test_fetch_bonus_poly(): data_bonus_wo_poly = fetch_bonus(polynomial_features=False) n_x = len(data_bonus_wo_poly.x_cols) diff --git a/doubleml/tests/test_dml_tune_optuna.py b/doubleml/tests/test_dml_tune_optuna.py new file mode 100644 index 00000000..4a583c1c --- /dev/null +++ b/doubleml/tests/test_dml_tune_optuna.py @@ -0,0 +1,605 @@ +import numpy as np +import optuna +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression +from sklearn.model_selection import KFold +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor + +import doubleml as dml +from doubleml.irm.datasets import make_irm_data +from doubleml.plm.datasets import make_plr_CCDDHNR2018 +from doubleml.tests._utils_tune_optuna import ( + _assert_tree_params, + _basic_optuna_settings, + _small_tree_params, +) +from doubleml.utils._tune_optuna import ( + DMLOptunaResult, + _create_study, + _resolve_optuna_scoring, + resolve_optuna_cv, +) + + +@pytest.mark.ci +def test_resolve_optuna_scoring_regressor_default(): + learner = LinearRegression() + scoring, message = _resolve_optuna_scoring(None, learner, "ml_l") + assert scoring == "neg_root_mean_squared_error" + assert "neg_root_mean_squared_error" in message + + +@pytest.mark.ci +def test_resolve_optuna_scoring_classifier_default(): + learner = LogisticRegression() + scoring, message = _resolve_optuna_scoring(None, learner, "ml_m") + assert scoring == "neg_log_loss" + assert "neg_log_loss" in message + + +@pytest.mark.ci +def test_resolve_optuna_scoring_with_criterion_keeps_default(): + learner = DecisionTreeRegressor(random_state=0) + scoring, message = _resolve_optuna_scoring(None, learner, "ml_l") + assert scoring == "neg_root_mean_squared_error" + assert "neg_root_mean_squared_error" in message + + +@pytest.mark.ci +def test_resolve_optuna_scoring_lightgbm_regressor_default(): + pytest.importorskip("lightgbm") + from lightgbm import LGBMRegressor + + learner = LGBMRegressor() + scoring, message = _resolve_optuna_scoring(None, learner, "ml_l") + assert scoring == "neg_root_mean_squared_error" + assert "neg_root_mean_squared_error" in message + + +@pytest.mark.ci +def test_resolve_optuna_cv_sets_random_state(): + cv = resolve_optuna_cv(3) + assert isinstance(cv, KFold) + assert cv.shuffle is True + assert cv.random_state == 42 + + +@pytest.mark.ci +def test_doubleml_optuna_cv_variants(): + np.random.seed(3142) + dml_data = make_plr_CCDDHNR2018(n_obs=100, dim_x=5) + + ml_l = DecisionTreeRegressor(random_state=10, max_depth=5, min_samples_leaf=4) + ml_m = DecisionTreeRegressor(random_state=11, max_depth=5, min_samples_leaf=4) + dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, score="partialling out") + + optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params} + + dml_plr.tune_ml_models( + ml_param_space=optuna_params, + cv=3, + optuna_settings=_basic_optuna_settings(), + ) + + int_l_params = dml_plr.get_params("ml_l")["d"][0][0] + int_m_params = dml_plr.get_params("ml_m")["d"][0][0] + + assert int_l_params is not None + assert int_m_params is not None + + cv_splitter = KFold(n_splits=3, shuffle=True, random_state=3142) + + dml_plr.tune_ml_models( + ml_param_space=optuna_params, + cv=cv_splitter, + optuna_settings=_basic_optuna_settings(), + ) + + split_l_params = dml_plr.get_params("ml_l")["d"][0][0] + split_m_params = dml_plr.get_params("ml_m")["d"][0][0] + + assert split_l_params is not None + assert split_m_params is not None + + class SimpleSplitter: + def __init__(self, n_splits=3): + self._kfold = KFold(n_splits=n_splits, shuffle=True, random_state=3142) + self._n_splits = n_splits + + def split(self, X, y=None, groups=None): + return self._kfold.split(X, y, groups) + + def get_n_splits(self, X=None, y=None, groups=None): + return self._n_splits + + custom_cv = SimpleSplitter(n_splits=3) + + dml_plr.tune_ml_models( + ml_param_space=optuna_params, + cv=custom_cv, + optuna_settings=_basic_optuna_settings({"n_trials": 2}), + ) + + custom_l_params = dml_plr.get_params("ml_l")["d"][0][0] + custom_m_params = dml_plr.get_params("ml_m")["d"][0][0] + + assert custom_l_params is not None + assert custom_m_params is not None + + base_iter_kfold = KFold(n_splits=3, shuffle=True, random_state=3142) + cv_iterable = list(base_iter_kfold.split(np.arange(dml_data.n_obs))) + + dml_plr.tune_ml_models( + ml_param_space=optuna_params, + cv=cv_iterable, + optuna_settings=_basic_optuna_settings({"n_trials": 2}), + ) + + iterable_l_params = dml_plr.get_params("ml_l")["d"][0][0] + iterable_m_params = dml_plr.get_params("ml_m")["d"][0][0] + + assert iterable_l_params is not None + assert iterable_m_params is not None + + explicit_cv_iterable = [ + (list(train_idx), list(test_idx)) for train_idx, test_idx in base_iter_kfold.split(np.arange(dml_data.n_obs)) + ] + + dml_plr.tune_ml_models( + ml_param_space=optuna_params, + cv=explicit_cv_iterable, + optuna_settings=_basic_optuna_settings({"n_trials": 2}), + ) + + explicit_l_params = dml_plr.get_params("ml_l")["d"][0][0] + explicit_m_params = dml_plr.get_params("ml_m")["d"][0][0] + + assert explicit_l_params is not None + assert explicit_m_params is not None + + cv = None + dml_plr.tune_ml_models( + ml_param_space=optuna_params, + cv=cv, + optuna_settings=_basic_optuna_settings({"n_trials": 2}), + ) + none_l_params = dml_plr.get_params("ml_l")["d"][0][0] + none_m_params = dml_plr.get_params("ml_m")["d"][0][0] + + assert none_l_params is not None + assert none_m_params is not None + + +@pytest.mark.ci +def test_create_study_respects_user_study_name(monkeypatch): + captured_kwargs = {} + + def fake_create_study(**kwargs): + captured_kwargs.update(kwargs) + + class _DummyStudy: + pass + + return _DummyStudy() + + monkeypatch.setattr(optuna, "create_study", fake_create_study) + + settings = { + "study": None, + "study_kwargs": {"study_name": "custom-study", "direction": "maximize"}, + "direction": "maximize", + "sampler": None, + } + + _create_study(settings, "ml_l") + + assert captured_kwargs["study_name"] == "custom-study" + + +@pytest.mark.ci +def test_doubleml_optuna_partial_tuning_single_learner(): + np.random.seed(3143) + dml_data = make_plr_CCDDHNR2018(n_obs=100, dim_x=5) + + ml_l = DecisionTreeRegressor(random_state=20, max_depth=5, min_samples_leaf=4) + ml_m = DecisionTreeRegressor(random_state=21, max_depth=5, min_samples_leaf=4) + + dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, score="partialling out") + + optuna_params = {"ml_l": _small_tree_params} + + tune_res = dml_plr.tune_ml_models( + ml_param_space=optuna_params, + optuna_settings=_basic_optuna_settings(), + return_tune_res=True, + ) + + res_ml_m = tune_res[0]["ml_m"] + res_ml_l = tune_res[0]["ml_l"] + + assert res_ml_m.tuned is False + assert res_ml_m.best_estimator.get_params() == ml_m.get_params() # assert default params kept + assert res_ml_l.tuned is True + _assert_tree_params(res_ml_l.best_params) # assert tuned params valid + + assert isinstance(tune_res[0], dict) + assert set(tune_res[0].keys()) == {"ml_l", "ml_m"} + + +@pytest.mark.ci +def test_doubleml_optuna_sets_params_for_all_folds(): + np.random.seed(3153) + dml_data = make_plr_CCDDHNR2018(n_obs=100, dim_x=5) + + ml_l = DecisionTreeRegressor(random_state=101, max_depth=5, min_samples_leaf=4) + ml_m = DecisionTreeRegressor(random_state=202, max_depth=5, min_samples_leaf=4) + + dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=3, n_rep=2) + + optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params} + + dml_plr.tune_ml_models(ml_param_space=optuna_params, optuna_settings=_basic_optuna_settings()) + + l_params = dml_plr.get_params("ml_l") + m_params = dml_plr.get_params("ml_m") + + assert set(l_params.keys()) == {"d"} + assert set(m_params.keys()) == {"d"} + + expected_l = dict(l_params["d"][0][0]) + expected_m = dict(m_params["d"][0][0]) + + assert len(l_params["d"]) == dml_plr.n_rep + assert len(m_params["d"]) == dml_plr.n_rep + + for rep_idx in range(dml_plr.n_rep): + assert len(l_params["d"][rep_idx]) == dml_plr.n_folds + assert len(m_params["d"][rep_idx]) == dml_plr.n_folds + for fold_idx in range(dml_plr.n_folds): + l_fold_params = l_params["d"][rep_idx][fold_idx] + m_fold_params = m_params["d"][rep_idx][fold_idx] + assert l_fold_params is not None + assert m_fold_params is not None + assert l_fold_params == expected_l + assert m_fold_params == expected_m + + +@pytest.mark.ci +def test_doubleml_optuna_fit_uses_tuned_params(): + np.random.seed(3154) + dml_data = make_plr_CCDDHNR2018(n_obs=100, dim_x=5) + + ml_l = DecisionTreeRegressor(random_state=303, max_depth=5, min_samples_leaf=4) + ml_m = DecisionTreeRegressor(random_state=404, max_depth=5, min_samples_leaf=4) + + dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, n_rep=1) + + optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params} + + dml_plr.tune_ml_models(ml_param_space=optuna_params, optuna_settings=_basic_optuna_settings()) + + expected_l = dict(dml_plr.get_params("ml_l")["d"][0][0]) + expected_m = dict(dml_plr.get_params("ml_m")["d"][0][0]) + + dml_plr.fit(store_predictions=False, store_models=True) + + for rep_idx in range(dml_plr.n_rep): + for fold_idx in range(dml_plr.n_folds): + ml_l_model = dml_plr.models["ml_l"]["d"][rep_idx][fold_idx] + ml_m_model = dml_plr.models["ml_m"]["d"][rep_idx][fold_idx] + assert ml_l_model is not None + assert ml_m_model is not None + for key, value in expected_l.items(): + assert ml_l_model.get_params()[key] == value + for key, value in expected_m.items(): + assert ml_m_model.get_params()[key] == value + + +@pytest.mark.ci +def test_dml_optuna_result_str_representation(): + def custom_scorer(**args): + return 0.0 + + primary_result = DMLOptunaResult( + learner_name="ml_l", + params_name="ml_l", + best_estimator=LinearRegression(), + best_params={"alpha": 1, "depth": 3}, + best_score=0.123, + scoring_method="neg_mean_squared_error", + study=None, + tuned=True, + ) + + primary_str = str(primary_result) + assert primary_str.startswith("================== DMLOptunaResult") + assert "Learner name: ml_l" in primary_str + assert "Best score: 0.123" in primary_str + assert "Scoring method: neg_mean_squared_error" in primary_str + assert "'alpha': 1" in primary_str + assert "'depth': 3" in primary_str + + empty_params_result = DMLOptunaResult( + learner_name="ml_m", + params_name="ml_m", + best_estimator=LinearRegression(), + best_params={}, + best_score=-0.5, + scoring_method=custom_scorer, + study=None, + tuned=False, + ) + + empty_str = str(empty_params_result) + assert "Learner name: ml_m" in empty_str + assert "Scoring method: custom_scorer" in empty_str + assert "No best parameters available." in empty_str + + +@pytest.mark.ci +def test_doubleml_optuna_scoring_method_variants(): + np.random.seed(3156) + dml_data = make_plr_CCDDHNR2018(n_obs=120, dim_x=5) + + optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params} + + ml_l_string = DecisionTreeRegressor(random_state=501) + ml_m_default = DecisionTreeRegressor(random_state=502) + dml_plr_string = dml.DoubleMLPLR(dml_data, ml_l_string, ml_m_default, n_folds=2) + + scoring_methods_string = {"ml_l": "neg_mean_squared_error"} + + tune_res_string = dml_plr_string.tune_ml_models( + ml_param_space=optuna_params, + scoring_methods=scoring_methods_string, + optuna_settings=_basic_optuna_settings({"n_trials": 2}), + return_tune_res=True, + ) + + assert tune_res_string[0]["ml_l"].scoring_method == "neg_mean_squared_error" + assert tune_res_string[0]["ml_m"].scoring_method == "neg_root_mean_squared_error" + + def neg_mae_scorer(estimator, x, y): + preds = estimator.predict(x) + return -np.mean(np.abs(y - preds)) + + ml_l_callable = DecisionTreeRegressor(random_state=601) + ml_m_callable = DecisionTreeRegressor(random_state=602) + dml_plr_callable = dml.DoubleMLPLR(dml_data, ml_l_callable, ml_m_callable, n_folds=2) + + scoring_methods_callable = {"ml_l": neg_mae_scorer, "ml_m": neg_mae_scorer} + + tune_res_callable = dml_plr_callable.tune_ml_models( + ml_param_space=optuna_params, + scoring_methods=scoring_methods_callable, + optuna_settings=_basic_optuna_settings({"n_trials": 2}), + return_tune_res=True, + ) + + assert tune_res_callable[0]["ml_l"].scoring_method is neg_mae_scorer + assert tune_res_callable[0]["ml_m"].scoring_method is neg_mae_scorer + + +@pytest.mark.ci +def test_doubleml_optuna_invalid_settings_key_raises(): + np.random.seed(3155) + dml_data = make_irm_data(n_obs=100, dim_x=5) + + ml_g = DecisionTreeRegressor(random_state=111, max_depth=5, min_samples_leaf=4) + ml_m = DecisionTreeClassifier(random_state=222, max_depth=5, min_samples_leaf=4) + + dml_irm = dml.DoubleMLIRM(dml_data, ml_g, ml_m, n_folds=2) + + optuna_params = {"ml_g0": _small_tree_params, "ml_g1": _small_tree_params, "ml_m": _small_tree_params} + invalid_settings = _basic_optuna_settings({"ml_l": {"n_trials": 2}}) + + with pytest.raises(ValueError, match="ml_l"): + dml_irm.tune_ml_models(ml_param_space=optuna_params, optuna_settings=invalid_settings) + + +@pytest.mark.ci +def test_optuna_settings_hierarchy_overrides(): + np.random.seed(3160) + dml_data = make_irm_data(n_obs=80, dim_x=5) + + ml_g = DecisionTreeRegressor(random_state=123) + ml_m = DecisionTreeClassifier(random_state=456) + dml_irm = dml.DoubleMLIRM(dml_data, ml_g, ml_m, n_folds=2, n_rep=1) + + optuna_params = {"ml_g": _small_tree_params, "ml_g1": _small_tree_params, "ml_m": _small_tree_params} + scoring_methods = { + "ml_g0": "neg_mean_squared_error", + "ml_g1": "neg_mean_squared_error", + "ml_m": "roc_auc", + } + + optuna_settings = { + "n_trials": 4, + "direction": "maximize", + "show_progress_bar": False, + "ml_g": {"n_trials": 2}, + "ml_g1": {"n_trials": 3}, + } + + tune_res = dml_irm.tune_ml_models( + ml_param_space=optuna_params, + optuna_settings=optuna_settings, + scoring_methods=scoring_methods, + cv=3, + return_tune_res=True, + ) + + result_map = tune_res[0] + + def _completed_trials(study): + return sum(trial.state == optuna.trial.TrialState.COMPLETE for trial in study.trials) + + assert _completed_trials(result_map["ml_g0"].study) == 2 + assert _completed_trials(result_map["ml_g1"].study) == 3 + assert _completed_trials(result_map["ml_m"].study) == 4 + + +@pytest.mark.ci +def test_optuna_logging_integration(): + """Test that logging integration works correctly with Optuna.""" + import logging + + np.random.seed(3154) + dml_data = make_plr_CCDDHNR2018(n_obs=100, dim_x=5) + + ml_l = DecisionTreeRegressor(random_state=303, max_depth=5, min_samples_leaf=4) + ml_m = DecisionTreeRegressor(random_state=404, max_depth=5, min_samples_leaf=4) + + dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, n_rep=1) + + optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params} + + # Capture log messages + logger = logging.getLogger("doubleml.utils._tune_optuna") + original_level = logger.level + + # Create a custom handler to capture log records + log_records = [] + + class ListHandler(logging.Handler): + def emit(self, record): + log_records.append(record) + + handler = ListHandler() + handler.setLevel(logging.INFO) + logger.addHandler(handler) + logger.setLevel(logging.INFO) + + try: + # Tune with specific settings that should trigger log messages + optuna_settings = { + "n_trials": 2, + "sampler": optuna.samplers.TPESampler(seed=42), + "show_progress_bar": False, + } + + dml_plr.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings) + + # Check that we got log messages + log_messages = [record.getMessage() for record in log_records] + + # Should have messages about direction and sampler for each learner + direction_messages = [msg for msg in log_messages if "direction set to" in msg] + sampler_messages = [msg for msg in log_messages if "sampler" in msg.lower()] + scoring_messages = [msg for msg in log_messages if "scoring method" in msg.lower()] + + # We should have at least one message about direction + assert len(direction_messages) > 0, "Expected log messages about optimization direction" + + # We should have messages about the sampler + assert len(sampler_messages) > 0, "Expected log messages about sampler" + + # We should have messages about scoring + assert len(scoring_messages) > 0, "Expected log messages about scoring method" + + # Verify that the tuning actually worked + tuned_l = dml_plr.get_params("ml_l")["d"][0][0] + tuned_m = dml_plr.get_params("ml_m")["d"][0][0] + assert tuned_l is not None + assert tuned_m is not None + + finally: + # Clean up + logger.removeHandler(handler) + logger.setLevel(original_level) + + +@pytest.mark.ci +def test_optuna_logging_verbosity_sync(): + """Test that DoubleML logger level syncs with Optuna logger level.""" + import logging + + np.random.seed(3155) + dml_data = make_plr_CCDDHNR2018(n_obs=100, dim_x=5) + + ml_l = DecisionTreeRegressor(random_state=111) + ml_m = DecisionTreeRegressor(random_state=222) + + dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2) + + optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params} + + # Set DoubleML logger to DEBUG + logger = logging.getLogger("doubleml.utils._tune_optuna") + original_level = logger.level + logger.setLevel(logging.DEBUG) + + try: + # Tune without explicit verbosity setting + optuna_settings = { + "n_trials": 1, + "show_progress_bar": False, + } + + dml_plr.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings) + + # The test passes if no exception is raised + # The actual sync happens internally in _dml_tune_optuna + assert True + + finally: + logger.setLevel(original_level) + + +@pytest.mark.ci +def test_optuna_logging_explicit_verbosity(): + """Test that explicit verbosity setting in optuna_settings takes precedence.""" + np.random.seed(3156) + dml_data = make_plr_CCDDHNR2018(n_obs=100, dim_x=5) + + ml_l = DecisionTreeRegressor(random_state=333) + ml_m = DecisionTreeRegressor(random_state=444) + + dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2) + + optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params} + + # Explicitly set Optuna verbosity + optuna_settings = { + "n_trials": 1, + "verbosity": optuna.logging.WARNING, + "show_progress_bar": False, + } + + # This should not raise an error + dml_plr.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings) + + # Verify tuning worked + tuned_l = dml_plr.get_params("ml_l")["d"][0][0] + assert tuned_l is not None + + +@pytest.mark.ci +def test_doubleml_optuna_respects_provided_study_instances(): + np.random.seed(3157) + dml_data = make_plr_CCDDHNR2018(n_obs=80, dim_x=4) + + ml_l = DecisionTreeRegressor(random_state=555, max_depth=3, min_samples_leaf=3) + ml_m = DecisionTreeRegressor(random_state=556, max_depth=3, min_samples_leaf=3) + + dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2) + + study_l = optuna.create_study(direction="maximize") + study_m = optuna.create_study(direction="maximize") + + optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params} + optuna_settings = { + "n_trials": 1, + "show_progress_bar": False, + "ml_l": {"study": study_l}, + "ml_m": {"study": study_m}, + } + + tune_res = dml_plr.tune_ml_models( + ml_param_space=optuna_params, + optuna_settings=optuna_settings, + return_tune_res=True, + ) + + assert tune_res[0]["ml_l"].study is study_l + assert tune_res[0]["ml_m"].study is study_m diff --git a/doubleml/tests/test_dml_tune_optuna_exceptions.py b/doubleml/tests/test_dml_tune_optuna_exceptions.py new file mode 100644 index 00000000..d7e7c0fe --- /dev/null +++ b/doubleml/tests/test_dml_tune_optuna_exceptions.py @@ -0,0 +1,314 @@ +import re + +import numpy as np +import pytest +from sklearn.base import BaseEstimator, RegressorMixin +from sklearn.linear_model import Lasso + +from doubleml import DoubleMLData, DoubleMLPLR +from doubleml.plm.datasets import make_plr_CCDDHNR2018 +from doubleml.utils._tune_optuna import ( + _check_tuning_inputs, + _create_objective, + _default_optuna_settings, + _dml_tune_optuna, + _get_optuna_settings, + _resolve_optuna_scoring, + resolve_optuna_cv, +) + +np.random.seed(42) +data = make_plr_CCDDHNR2018(n_obs=100, dim_x=5, return_type="DataFrame") +dml_data = DoubleMLData(data, "y", "d") +ml_l = Lasso() +ml_m = Lasso() +dml_plr = DoubleMLPLR(dml_data, ml_l, ml_m) + + +def ml_l_params(trial): + return {"alpha": trial.suggest_float("alpha", 0.01, 0.1)} + + +def ml_m_params(trial): + return {"alpha": trial.suggest_float("alpha", 0.01, 0.1)} + + +valid_param_space = {"ml_l": ml_l_params, "ml_m": ml_m_params} + + +@pytest.mark.ci +@pytest.mark.parametrize( + "ml_param_space, msg", + [ + (None, "ml_param_space must be a non-empty dictionary."), + ( + {"ml_l": ml_l_params, "invalid_key": ml_m_params}, + r"Invalid ml_param_space keys for DoubleMLPLR: invalid_key. Valid keys are: ml_l, ml_m.", + ), + ( + {"ml_l": ml_l_params, "ml_m": "invalid"}, + "Parameter space for 'ml_m' must be a callable function that takes a trial and returns a dict. Got str.", + ), + ], +) +def test_tune_ml_models_invalid_param_space(ml_param_space, msg): + with pytest.raises(ValueError if "keys" in msg or "non-empty" in msg else TypeError, match=msg): + dml_plr.tune_ml_models(ml_param_space) + + +@pytest.mark.ci +@pytest.mark.parametrize( + "scoring_methods, exc, msg", + [ + ("invalid", TypeError, "scoring_methods must be provided as a dictionary keyed by learner name."), + ( + {"ml_l": "accuracy", "invalid_learner": "accuracy"}, + ValueError, + r"Invalid scoring_methods keys for DoubleMLPLR: invalid_learner. Valid keys are: ml_l, ml_m.", + ), + ( + {"ml_l": 123}, + TypeError, + r"scoring_method must be None, a string, a callable, accepted by scikit-learn. Got int for learner 'ml_l'.", + ), + ], +) +def test_tune_ml_models_invalid_scoring_methods(scoring_methods, exc, msg): + with pytest.raises(exc, match=re.escape(msg)): + dml_plr.tune_ml_models(valid_param_space, scoring_methods=scoring_methods) + + +@pytest.mark.ci +@pytest.mark.parametrize( + "cv, msg", + [ + ("invalid", "cv must not be provided as a string. Pass an integer or a cross-validation splitter."), + (1, "The number of folds used for tuning must be at least two. 1 was passed."), + ], +) +def test_tune_ml_models_invalid_cv(cv, msg): + with pytest.raises(ValueError if "folds" in msg else TypeError, match=msg): + dml_plr.tune_ml_models(valid_param_space, cv=cv) + + +@pytest.mark.ci +@pytest.mark.parametrize( + "set_as_params, msg", + [ + ("invalid", "set_as_params must be True or False. Got invalid."), + (None, "set_as_params must be True or False. Got None."), + ], +) +def test_tune_ml_models_invalid_set_as_params(set_as_params, msg): + with pytest.raises(TypeError, match=msg): + dml_plr.tune_ml_models(valid_param_space, set_as_params=set_as_params) + + +@pytest.mark.ci +@pytest.mark.parametrize( + "return_tune_res, msg", + [ + ("invalid", "return_tune_res must be True or False. Got invalid."), + (None, "return_tune_res must be True or False. Got None."), + ], +) +def test_tune_ml_models_invalid_return_tune_res(return_tune_res, msg): + with pytest.raises(TypeError, match=msg): + dml_plr.tune_ml_models(valid_param_space, return_tune_res=return_tune_res) + + +@pytest.mark.ci +@pytest.mark.parametrize( + "optuna_settings, msg", + [ + ("invalid", "optuna_settings must be a dict or None. Got ."), + ( + {"invalid_key": "value"}, + r"Invalid optuna_settings keys for DoubleMLPLR: invalid_key. Valid learner-specific keys are: ml_l, ml_m.", + ), + ({"ml_l": "invalid"}, "Optuna settings for 'ml_l' must be a dict."), + ], +) +def test_tune_ml_models_invalid_optuna_settings(optuna_settings, msg): + with pytest.raises(TypeError if "dict" in msg else ValueError, match=msg): + dml_plr.tune_ml_models(valid_param_space, optuna_settings=optuna_settings) + + +# add test for giving non iterable cv object +@pytest.mark.ci +def test_tune_ml_models_non_iterable_cv(): + class NonIterableCV: + pass + + non_iterable_cv = NonIterableCV() + msg = re.escape( + "cv must be an integer >= 2, a scikit-learn cross-validation splitter, " + "or an iterable of (train_indices, test_indices) pairs." + ) + with pytest.raises(TypeError, match=msg): + dml_plr.tune_ml_models(valid_param_space, cv=non_iterable_cv) + + +@pytest.mark.ci +def test_resolve_optuna_cv_invalid_iterable_pairs(): + invalid_cv = [(np.array([0, 1]),)] + msg = re.escape("cv iterable must yield (train_indices, test_indices) pairs.") + with pytest.raises(TypeError, match=msg): + resolve_optuna_cv(invalid_cv) + + +@pytest.mark.ci +def test_resolve_optuna_scoring_unknown_estimator_type(): + class GenericEstimator(BaseEstimator): + def fit(self, x, y): + return self + + def set_params(self, **params): + return self + + msg = ( + "No scoring method provided and estimator type could not be inferred. " + "Please provide a scoring_method for learner 'ml_l'." + ) + with pytest.raises(ValueError, match=re.escape(msg)): + _resolve_optuna_scoring(None, GenericEstimator(), "ml_l") + + +@pytest.mark.ci +def test_check_tuning_inputs_mismatched_dimensions(): + x = np.zeros((3, 2)) + y = np.zeros(5) + with pytest.raises( + ValueError, + match=re.escape("Features and target must contain the same number of observations for learner 'ml_l'."), + ): + _check_tuning_inputs(y, x, Lasso(), lambda trial: {}, "neg_mean_squared_error", 2, "ml_l") + + +@pytest.mark.ci +def test_check_tuning_inputs_empty_target(): + x = np.zeros((0, 2)) + y = np.zeros(0) + with pytest.raises( + ValueError, + match=re.escape("Empty target passed to Optuna tuner for learner 'ml_l'."), + ): + _check_tuning_inputs(y, x, Lasso(), lambda trial: {}, "neg_mean_squared_error", 2, "ml_l") + + +@pytest.mark.ci +def test_check_tuning_inputs_invalid_learner_interface(): + class BadLearner: + def set_params(self, **kwargs): + return self + + x = np.zeros((5, 2)) + y = np.zeros(5) + with pytest.raises( + TypeError, + match=re.escape("Learner 'ml_l' must implement fit and set_params to be tuned with Optuna."), + ): + _check_tuning_inputs(y, x, BadLearner(), lambda trial: {}, "neg_mean_squared_error", 2, "ml_l") + + +@pytest.mark.ci +def test_check_tuning_inputs_non_callable_param_grid(): + x = np.zeros((5, 2)) + y = np.zeros(5) + msg = "param_grid must be a callable function that takes a trial and returns a dict. " "Got str for learner 'ml_l'." + with pytest.raises(TypeError, match=re.escape(msg)): + _check_tuning_inputs(y, x, Lasso(), "not-callable", "neg_mean_squared_error", 2, "ml_l") + + +@pytest.mark.ci +def test_get_optuna_settings_requires_dict(): + with pytest.raises(TypeError, match="optuna_settings must be a dict or None."): + _get_optuna_settings("invalid", "ml_l") + + +@pytest.mark.ci +def test_get_optuna_settings_returns_default_copy_for_none(): + resolved_a = _get_optuna_settings(None, "ml_l") + resolved_b = _get_optuna_settings(None, "ml_l") + # Ensure defaults are preserved + for key, value in _default_optuna_settings().items(): + assert resolved_a[key] == value + # Ensure copies are independent + resolved_a["n_trials"] = 5 + assert resolved_b["n_trials"] == _default_optuna_settings()["n_trials"] + + +@pytest.mark.ci +def test_get_optuna_settings_validates_study_kwargs_type(): + with pytest.raises(TypeError, match="study_kwargs must be a dict."): + _get_optuna_settings({"study_kwargs": "invalid"}, "ml_l") + + +@pytest.mark.ci +def test_get_optuna_settings_validates_optimize_kwargs_type(): + with pytest.raises(TypeError, match="optimize_kwargs must be a dict."): + _get_optuna_settings({"optimize_kwargs": "invalid"}, "ml_l") + + +@pytest.mark.ci +def test_get_optuna_settings_validates_callbacks_type(): + with pytest.raises(TypeError, match="callbacks must be a sequence of callables or None."): + _get_optuna_settings({"callbacks": "invalid"}, "ml_l") + + +@pytest.mark.ci +def test_create_objective_requires_dict_params(): + x = np.asarray(dml_data.x) + y = np.asarray(dml_data.y) + + def bad_param_func(trial): + return ["not-a-dict"] + + objective = _create_objective( + bad_param_func, + Lasso(), + x, + y, + resolve_optuna_cv(2), + "neg_mean_squared_error", + ) + msg = ( + "param function must return a dict. Got list. Example: def params(trial): " + "return {'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1)}" + ) + with pytest.raises(TypeError, match=re.escape(msg)): + objective(None) + + +@pytest.mark.ci +def test_dml_tune_optuna_raises_when_no_trials_complete(): + class FailingRegressor(BaseEstimator, RegressorMixin): + def fit(self, x, y): + raise ValueError("fail") + + def predict(self, x): + return np.zeros(x.shape[0]) + + x = np.asarray(dml_data.x) + y = np.asarray(dml_data.y) + optuna_settings = { + "n_trials": 1, + "catch": (ValueError,), + "study_kwargs": {}, + "optimize_kwargs": {}, + } + with pytest.raises( + RuntimeError, + match="Optuna optimization failed to produce any complete trials.", + ): + _dml_tune_optuna( + y, + x, + FailingRegressor(), + lambda trial: {}, + "neg_mean_squared_error", + 2, + optuna_settings, + "ml_l", + "ml_l", + ) diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py index 4fca5318..76b0ff12 100644 --- a/doubleml/tests/test_exceptions.py +++ b/doubleml/tests/test_exceptions.py @@ -1153,6 +1153,7 @@ def test_doubleml_sensitivity_inputs(): _ = dml_irm._set_sensitivity_elements(sensitivity_elements=sensitivity_elements, i_rep=0, i_treat=0) +@pytest.mark.ci def test_doubleml_sensitivity_reestimation_warning(): dml_irm = DoubleMLIRM( dml_data_irm, Lasso(), LogisticRegression(), ps_processor_config=PSProcessorConfig(clipping_threshold=0.1) @@ -1166,6 +1167,7 @@ def test_doubleml_sensitivity_reestimation_warning(): dml_irm._validate_sensitivity_elements() +@pytest.mark.ci def test_doubleml_sensitivity_summary(): dml_irm = DoubleMLIRM( dml_data_irm, Lasso(), LogisticRegression(), ps_processor_config=PSProcessorConfig(clipping_threshold=0.1) diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py index f562f98d..e8440650 100644 --- a/doubleml/tests/test_framework_exceptions.py +++ b/doubleml/tests/test_framework_exceptions.py @@ -167,6 +167,7 @@ def test_input_exceptions(): framework_names.treatment_names = ["test", "test2", "test3"] +@pytest.mark.ci def test_operation_exceptions(): # addition msg = "Unsupported operand type: " diff --git a/doubleml/tests/test_nonlinear_score_mixin.py b/doubleml/tests/test_nonlinear_score_mixin.py index d68785aa..ebc4f192 100644 --- a/doubleml/tests/test_nonlinear_score_mixin.py +++ b/doubleml/tests/test_nonlinear_score_mixin.py @@ -162,6 +162,9 @@ def _nuisance_tuning( ): pass + def _nuisance_tuning_optuna(self, optuna_params, scoring_methods, cv, optuna_settings): + pass + def _sensitivity_element_est(self, preds): pass diff --git a/doubleml/tests/test_optuna_multi_wrappers.py b/doubleml/tests/test_optuna_multi_wrappers.py new file mode 100644 index 00000000..d484b4ef --- /dev/null +++ b/doubleml/tests/test_optuna_multi_wrappers.py @@ -0,0 +1,205 @@ +from unittest.mock import MagicMock + +import numpy as np +import pandas as pd +import pytest +from sklearn.linear_model import LinearRegression, LogisticRegression + +import doubleml as dml +from doubleml.data import DoubleMLPanelData +from doubleml.did.datasets import make_did_CS2021 +from doubleml.irm.datasets import make_irm_data, make_irm_data_discrete_treatments +from doubleml.tests._utils_tune_optuna import _basic_optuna_settings, _small_tree_params + + +def _build_apos_object(): + np.random.seed(3141) + data = make_irm_data_discrete_treatments(n_obs=40, n_levels=3, random_state=42) + x = data["x"] + y = data["y"] + d = data["d"] + columns = ["y", "d"] + [f"x{i + 1}" for i in range(x.shape[1])] + df = pd.DataFrame(np.column_stack((y, d, x)), columns=columns) + dml_data = dml.DoubleMLData(df, "y", "d") + + ml_g = LinearRegression() + ml_m = LogisticRegression(max_iter=200, multi_class="auto") + + return dml.DoubleMLAPOS( + dml_data, + ml_g=ml_g, + ml_m=ml_m, + treatment_levels=[0, 1, 2], + n_folds=2, + n_rep=1, + ) + + +def _build_qte_object(): + np.random.seed(3141) + dml_data = make_irm_data(n_obs=80, dim_x=5) + ml = LogisticRegression(max_iter=200, multi_class="auto") + + return dml.DoubleMLQTE( + dml_data, + ml_g=ml, + ml_m=ml, + quantiles=[0.25, 0.75], + n_folds=2, + n_rep=1, + ) + + +def _build_did_multi_object(): + np.random.seed(3141) + df = make_did_CS2021(n_obs=40, n_periods=4, time_type="datetime") + x_cols = [col for col in df.columns if col.startswith("Z")] + dml_panel = DoubleMLPanelData(df, y_col="y", d_cols="d", t_col="t", id_col="id", x_cols=x_cols) + + ml_g = LinearRegression() + ml_m = LogisticRegression(max_iter=200, multi_class="auto") + + return dml.did.DoubleMLDIDMulti( + obj_dml_data=dml_panel, + ml_g=ml_g, + ml_m=ml_m, + control_group="never_treated", + n_folds=2, + n_rep=1, + panel=True, + ) + + +def _collect_param_names(dml_obj): + if hasattr(dml_obj, "params_names") and dml_obj.params_names: + return dml_obj.params_names + learner_dict = getattr(dml_obj, "_learner", None) + if isinstance(learner_dict, dict) and learner_dict: + return list(learner_dict.keys()) + return ["ml_g"] + + +def _make_tune_kwargs(dml_obj, return_tune_res=True): + optuna_params = {name: _small_tree_params for name in _collect_param_names(dml_obj)} + return { + "ml_param_space": optuna_params, + "cv": 3, + "set_as_params": False, + "return_tune_res": return_tune_res, + "optuna_settings": _basic_optuna_settings({"n_trials": 2}), + "scoring_methods": None, + } + + +@pytest.fixture +def apos_obj(): + return _build_apos_object() + + +@pytest.fixture +def qte_obj(): + return _build_qte_object() + + +@pytest.fixture +def did_multi_obj(): + return _build_did_multi_object() + + +@pytest.mark.ci +def test_doubleml_apos_tune_ml_models_collects_results(apos_obj): + dml_obj = apos_obj + mocks = [] + expected_payload = [] + + for idx in range(dml_obj.n_treatment_levels): + mock_model = MagicMock() + payload = {"params": f"level-{idx}"} + mock_model.tune_ml_models.return_value = [payload] + mocks.append(mock_model) + expected_payload.append(payload) + + dml_obj._modellist = mocks + + tune_kwargs = _make_tune_kwargs(dml_obj) + + res = dml_obj.tune_ml_models(**tune_kwargs) + assert res == expected_payload + for mock in mocks: + mock.tune_ml_models.assert_called_once_with(**tune_kwargs) + + for mock in mocks: + mock.reset_mock() + tune_kwargs_nores = _make_tune_kwargs(dml_obj, return_tune_res=False) + + assert dml_obj.tune_ml_models(**tune_kwargs_nores) is dml_obj + for mock in mocks: + mock.tune_ml_models.assert_called_once_with(**tune_kwargs_nores) + + +@pytest.mark.ci +def test_doubleml_qte_tune_ml_models_returns_quantile_results(qte_obj): + dml_obj = qte_obj + modellist_0 = [] + modellist_1 = [] + expected_payload = [] + + for idx in range(dml_obj.n_quantiles): + mock_0 = MagicMock() + mock_1 = MagicMock() + payload_0 = {"params": f"quantile-{idx}-treatment-0"} + payload_1 = {"params": f"quantile-{idx}-treatment-1"} + mock_0.tune_ml_models.return_value = [payload_0] + mock_1.tune_ml_models.return_value = [payload_1] + modellist_0.append(mock_0) + modellist_1.append(mock_1) + expected_payload.append({"treatment_0": payload_0, "treatment_1": payload_1}) + + dml_obj._modellist_0 = modellist_0 + dml_obj._modellist_1 = modellist_1 + + tune_kwargs = _make_tune_kwargs(dml_obj) + + res = dml_obj.tune_ml_models(**tune_kwargs) + assert res == expected_payload + for mock in modellist_0 + modellist_1: + mock.tune_ml_models.assert_called_once_with(**tune_kwargs) + + for mock in modellist_0 + modellist_1: + mock.reset_mock() + tune_kwargs_nores = _make_tune_kwargs(dml_obj, return_tune_res=False) + + assert dml_obj.tune_ml_models(**tune_kwargs_nores) is dml_obj + for mock in modellist_0 + modellist_1: + mock.tune_ml_models.assert_called_once_with(**tune_kwargs_nores) + + +@pytest.mark.ci +def test_doubleml_did_multi_tune_ml_models_handles_all_group_time_models(did_multi_obj): + dml_obj = did_multi_obj + mocks = [] + expected_payload = [] + + for idx in range(len(dml_obj.modellist)): + mock_model = MagicMock() + payload = {"params": f"gt-{idx}"} + mock_model.tune_ml_models.return_value = [payload] + mocks.append(mock_model) + expected_payload.append(payload) + + dml_obj._modellist = mocks + + tune_kwargs = _make_tune_kwargs(dml_obj) + + res = dml_obj.tune_ml_models(**tune_kwargs) + assert res == expected_payload + for mock in mocks: + mock.tune_ml_models.assert_called_once_with(**tune_kwargs) + + for mock in mocks: + mock.reset_mock() + tune_kwargs_nores = _make_tune_kwargs(dml_obj, return_tune_res=False) + + assert dml_obj.tune_ml_models(**tune_kwargs_nores) is dml_obj + for mock in mocks: + mock.tune_ml_models.assert_called_once_with(**tune_kwargs_nores) diff --git a/doubleml/tests/test_optuna_settings_validation.py b/doubleml/tests/test_optuna_settings_validation.py new file mode 100644 index 00000000..8414ff06 --- /dev/null +++ b/doubleml/tests/test_optuna_settings_validation.py @@ -0,0 +1,143 @@ +import numpy as np +import pytest +from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor + +import doubleml as dml +from doubleml.did.datasets import make_did_SZ2020 +from doubleml.irm.datasets import make_irm_data +from doubleml.plm.datasets import make_pliv_CHS2015, make_plr_CCDDHNR2018 + + +def _constant_params(_trial): + return {} + + +@pytest.mark.ci +def test_optuna_settings_invalid_key_for_irm_raises(): + np.random.seed(2024) + dml_data = make_irm_data(n_obs=40, dim_x=2) + + ml_g = DecisionTreeRegressor(random_state=11) + ml_m = DecisionTreeClassifier(random_state=22) + dml_irm = dml.DoubleMLIRM(dml_data, ml_g, ml_m, n_folds=2, n_rep=1) + + optuna_params = {"ml_g0": _constant_params, "ml_g1": _constant_params, "ml_m": _constant_params} + invalid_settings = {"ml_l": {"n_trials": 5}} + + with pytest.raises(ValueError, match="ml_l"): + dml_irm.tune_ml_models(ml_param_space=optuna_params, optuna_settings=invalid_settings) + + +@pytest.mark.ci +def test_optuna_settings_invalid_key_for_plr_raises(): + np.random.seed(2025) + dml_data = make_plr_CCDDHNR2018(n_obs=80, dim_x=4) + + ml_l = DecisionTreeRegressor(random_state=33) + ml_m = DecisionTreeRegressor(random_state=44) + dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, n_rep=1) + + optuna_params = {"ml_l": _constant_params, "ml_m": _constant_params} + invalid_settings = {"ml_g0": {"n_trials": 5}} + + with pytest.raises(ValueError, match="ml_g0"): + dml_plr.tune_ml_models(ml_param_space=optuna_params, optuna_settings=invalid_settings) + + +@pytest.mark.ci +def test_optuna_settings_invalid_key_for_pliv_raises(): + np.random.seed(2026) + dml_data = make_pliv_CHS2015(n_obs=80, dim_x=4, dim_z=2) + + ml_l = DecisionTreeRegressor(random_state=55) + ml_m = DecisionTreeRegressor(random_state=66) + ml_r = DecisionTreeRegressor(random_state=77) + dml_pliv = dml.DoubleMLPLIV(dml_data, ml_l, ml_m, ml_r, n_folds=2, n_rep=1) + + optuna_params = { + "ml_l": _constant_params, + "ml_m_Z1": _constant_params, + "ml_m_Z2": _constant_params, + "ml_r": _constant_params, + } + + invalid_settings = {"ml_g": {"n_trials": 5}} + + with pytest.raises(ValueError, match="ml_g"): + dml_pliv.tune_ml_models(ml_param_space=optuna_params, optuna_settings=invalid_settings) + + +@pytest.mark.ci +def test_optuna_settings_invalid_key_for_did_raises(): + np.random.seed(2027) + dml_data = make_did_SZ2020(n_obs=120, dgp_type=1, return_type="DoubleMLDIDData") + + ml_g = DecisionTreeRegressor(random_state=88) + ml_m = DecisionTreeClassifier(random_state=99) + dml_did = dml.DoubleMLDID(dml_data, ml_g, ml_m, score="observational", n_folds=2, n_rep=1) + + optuna_params = {"ml_g0": _constant_params, "ml_g1": _constant_params, "ml_m": _constant_params} + invalid_settings = {"ml_l": {"n_trials": 5}} + + with pytest.raises(ValueError, match="ml_l"): + dml_did.tune_ml_models(ml_param_space=optuna_params, optuna_settings=invalid_settings) + + +@pytest.mark.ci +def test_optuna_params_invalid_key_for_irm_raises(): + np.random.seed(2028) + dml_data = make_irm_data(n_obs=40, dim_x=2) + + ml_g = DecisionTreeRegressor(random_state=99) + ml_m = DecisionTreeClassifier(random_state=101) + dml_irm = dml.DoubleMLIRM(dml_data, ml_g, ml_m, n_folds=2, n_rep=1) + + optuna_params = {"ml_g0": _constant_params, "ml_g1": _constant_params, "ml_m": _constant_params, "ml_l": _constant_params} + + with pytest.raises(ValueError, match="ml_l"): + dml_irm.tune_ml_models(ml_param_space=optuna_params) + + +@pytest.mark.ci +def test_optuna_params_invalid_key_for_plr_raises(): + np.random.seed(2029) + dml_data = make_plr_CCDDHNR2018(n_obs=80, dim_x=4) + + ml_l = DecisionTreeRegressor(random_state=111) + ml_m = DecisionTreeRegressor(random_state=222) + dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, n_rep=1) + + optuna_params = {"ml_l": _constant_params, "ml_m": _constant_params, "ml_g0": _constant_params} + + with pytest.raises(ValueError, match="ml_g0"): + dml_plr.tune_ml_models(ml_param_space=optuna_params) + + +@pytest.mark.ci +def test_optuna_params_invalid_key_for_pliv_raises(): + np.random.seed(2030) + dml_data = make_pliv_CHS2015(n_obs=80, dim_x=4, dim_z=2) + + ml_l = DecisionTreeRegressor(random_state=333) + ml_m = DecisionTreeRegressor(random_state=444) + ml_r = DecisionTreeRegressor(random_state=555) + dml_pliv = dml.DoubleMLPLIV(dml_data, ml_l, ml_m, ml_r, n_folds=2, n_rep=1) + + optuna_params = {"ml_l": _constant_params, "ml_m": _constant_params, "ml_r": _constant_params, "ml_g": _constant_params} + + with pytest.raises(ValueError, match="ml_g"): + dml_pliv.tune_ml_models(ml_param_space=optuna_params) + + +@pytest.mark.ci +def test_optuna_params_invalid_key_for_did_raises(): + np.random.seed(2031) + dml_data = make_did_SZ2020(n_obs=100, dgp_type=1, return_type="DoubleMLDIDData") + + ml_g = DecisionTreeRegressor(random_state=666) + dml_did = dml.DoubleMLDID(dml_data, ml_g, score="experimental", n_folds=2, n_rep=1) + + optuna_params = {"ml_g0": _constant_params, "ml_g1": _constant_params, "ml_l": _constant_params} + + with pytest.raises(ValueError, match="ml_l"): + dml_did.tune_ml_models(ml_param_space=optuna_params) diff --git a/doubleml/tests/test_optuna_tune_multiple_treatments.py b/doubleml/tests/test_optuna_tune_multiple_treatments.py new file mode 100644 index 00000000..70328747 --- /dev/null +++ b/doubleml/tests/test_optuna_tune_multiple_treatments.py @@ -0,0 +1,61 @@ +import numpy as np +import pytest +from sklearn.tree import DecisionTreeRegressor + +import doubleml as dml +from doubleml.plm.datasets import make_plr_CCDDHNR2018 +from doubleml.tests._utils_tune_optuna import ( + _SAMPLER_CASES, + _assert_tree_params, + _basic_optuna_settings, + _small_tree_params, +) + + +@pytest.mark.ci +@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES]) +def test_doubleml_plr_optuna_multiple_treatments(sampler_name, optuna_sampler): + np.random.seed(3141) + alpha = 0.5 + data = make_plr_CCDDHNR2018(n_obs=500, dim_x=5, alpha=alpha, return_type="DataFrame") + treats = ["d", "X1"] + dml_data = dml.DoubleMLData( + data, y_col="y", d_cols=treats, x_cols=[col for col in data.columns if col not in ["y", "d", "X1"]] + ) + + ml_l = DecisionTreeRegressor(random_state=123) + ml_m = DecisionTreeRegressor(random_state=456) + + dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, score="partialling out") + dml_plr.fit() + untuned_score = dml_plr.evaluate_learners() + + optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params} + + tune_res = dml_plr.tune_ml_models( + ml_param_space=optuna_params, + optuna_settings=_basic_optuna_settings({"sampler": optuna_sampler}), + return_tune_res=True, + ) + + dml_plr.fit() + tuned_score = dml_plr.evaluate_learners() + + for i, _ in enumerate(treats): + tuned_params_l = tune_res[i]["ml_l"].best_params + tuned_params_m = tune_res[i]["ml_m"].best_params + + _assert_tree_params(tuned_params_l) + _assert_tree_params(tuned_params_m) + + # ensure results contain optuna objects and best params + assert isinstance(tune_res[i], dict) + assert set(tune_res[i].keys()) == {"ml_l", "ml_m"} + assert hasattr(tune_res[i]["ml_l"], "best_params") + assert tune_res[i]["ml_l"].best_params["max_depth"] == tuned_params_l["max_depth"] + assert hasattr(tune_res[i]["ml_m"], "best_params") + assert tune_res[i]["ml_m"].best_params["max_depth"] == tuned_params_m["max_depth"] + + # ensure tuning improved RMSE + assert tuned_score["ml_l"].squeeze()[i] < untuned_score["ml_l"].squeeze()[i] + assert tuned_score["ml_m"].squeeze()[i] < untuned_score["ml_m"].squeeze()[i] diff --git a/doubleml/utils/__init__.py b/doubleml/utils/__init__.py index 4f6269dd..868429da 100644 --- a/doubleml/utils/__init__.py +++ b/doubleml/utils/__init__.py @@ -2,6 +2,7 @@ The :mod:`doubleml.utils` module includes various utilities. """ +from ._tune_optuna import DMLOptunaResult from .blp import DoubleMLBLP from .dummy_learners import DMLDummyClassifier, DMLDummyRegressor from .gain_statistics import gain_statistics @@ -13,6 +14,7 @@ __all__ = [ "DMLDummyRegressor", "DMLDummyClassifier", + "DMLOptunaResult", "DoubleMLResampling", "DoubleMLClusterResampling", "DoubleMLBLP", diff --git a/doubleml/utils/_tune_optuna.py b/doubleml/utils/_tune_optuna.py new file mode 100644 index 00000000..36d8f7e7 --- /dev/null +++ b/doubleml/utils/_tune_optuna.py @@ -0,0 +1,708 @@ +""" +Optuna-based hyperparameter tuning utilities for DoubleML. + +This module provides Optuna-specific functionality for hyperparameter optimization, +decoupled from sklearn-based grid/randomized search. + +Logging +------- +This module uses Python's logging module. The logger is synchronized with Optuna's logging +system. You can control the verbosity by: +1. Setting the logging level for 'doubleml.utils._tune_optuna' +2. Passing 'verbosity' in optuna_settings (takes precedence) + +Example: + >>> import logging + >>> logging.basicConfig(level=logging.INFO) + >>> # Now you'll see tuning progress and information +""" + +import logging +from collections.abc import Iterable +from copy import deepcopy +from dataclasses import dataclass +from pprint import pformat +from typing import Callable, Union + +import numpy as np +import optuna +from sklearn.base import clone, is_classifier, is_regressor +from sklearn.model_selection import BaseCrossValidator, KFold, cross_val_score + +logger = logging.getLogger(__name__) + +_OPTUNA_KFOLD_RANDOM_STATE = 42 + +_OPTUNA_DEFAULT_SETTINGS = { + "n_trials": 100, + "timeout": None, + "direction": "maximize", + "study_kwargs": {}, + "optimize_kwargs": {}, + "sampler": None, + "callbacks": None, + "catch": (), + "show_progress_bar": False, + "gc_after_trial": False, + "study": None, + "n_jobs_optuna": None, + "verbosity": None, +} + + +@dataclass +class DMLOptunaResult: + """ + Container for Optuna search results. + + This dataclass holds the results of Optuna-based hyperparameter tuning, + including the best estimator, parameters, score, and the complete study history. + + Parameters + ---------- + learner_name : str + Name of the learner passed (e.g., 'ml_g'). + + params_name : str + Name of the nuisance parameter being tuned (e.g., 'ml_g0'). + + best_estimator : object + The estimator instance with the best found hyperparameters set and fitted on the + full dataset used during tuning. + + best_params : dict + The best hyperparameters found during tuning. + + best_score : float + The best average cross-validation score achieved during tuning. + + scoring_method : str or callable + The scoring method used during tuning. + + study : optuna.study.Study + The Optuna study object containing the tuning history. + + tuned : bool + Indicates whether tuning was performed (True) or skipped (False). + """ + + learner_name: str + params_name: str + best_estimator: object + best_params: dict + best_score: float + scoring_method: Union[str, Callable] + study: optuna.study.Study + tuned: bool + + def __str__(self): + core_summary = self._core_summary_str() + params_summary = self._best_params_str() + res = ( + "================== DMLOptunaResult ==================\n" + + core_summary + + "\n------------------ Best parameters ------------------\n" + + params_summary + ) + return res + + def _core_summary_str(self): + scoring_repr = ( + self.scoring_method.__name__ + if callable(self.scoring_method) and hasattr(self.scoring_method, "__name__") + else str(self.scoring_method) + ) + summary = ( + f"Learner name: {self.learner_name}\n" + f"Params name: {self.params_name}\n" + f"Tuned: {self.tuned}\n" + f"Best score: {self.best_score}\n" + f"Scoring method: {scoring_repr}\n" + ) + return summary + + def _best_params_str(self): + if not self.best_params: + return "No best parameters available.\n" + formatted = pformat(self.best_params, sort_dicts=True, compact=True) + return f"{formatted}\n" + + +OPTUNA_GLOBAL_SETTING_KEYS = frozenset(_OPTUNA_DEFAULT_SETTINGS.keys()) + +TUNE_ML_MODELS_DOC = """ + Hyperparameter-tuning for DoubleML models using Optuna. + + The hyperparameter-tuning is performed using Optuna's Bayesian optimization. + Unlike grid/randomized search, Optuna tuning is performed once on the whole dataset + using cross-validation, and the same optimal hyperparameters are used for all folds. + + Parameters + ---------- + ml_param_space : dict + A dict with a parameter grid function for each nuisance model + (see attribute ``params_names``) or for each learner (see attribute ``learner_names``). + Mixed specification are allowed, i.e., some nuisance models can share the same learner. + For mixed specifications, learner-specific settings will be overwritten by nuisance model-specific settings. + + Each parameter grid must be specified as a callable function that takes an Optuna trial + and returns a dictionary of hyperparameters. + + For PLR models, keys should be: ``'ml_l'``, ``'ml_m'`` (and optionally ``'ml_g'`` for IV-type score). + For IRM models, keys should be: ``'ml_g0'``, ``'ml_g1'`` (or just ``'ml_g'`` for both), ``'ml_m'``. + + Example: + + .. code-block:: python + + def ml_l_params(trial): + return { + 'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True), + 'n_estimators': trial.suggest_int('n_estimators', 100, 500, step=50), + 'num_leaves': trial.suggest_int('num_leaves', 20, 256), + 'min_child_samples': trial.suggest_int('min_child_samples', 5, 100), + } + + ml_param_space = {'ml_l': ml_l_params, 'ml_m': ml_m_params} + + Note: Optuna tuning is performed globally (not fold-specific) to ensure consistent + hyperparameters across all folds. + + scoring_methods : None or dict + The scoring method used to evaluate the predictions. The scoring method must be set per + nuisance model via a dict (see attribute ``params_names`` for the keys). + If None, the estimator's score method is used. + Default is ``None``. + + cv : int, cross-validation splitter, or iterable of (train_indices, test_indices) + Cross-validation strategy used for Optuna-based tuning. If an integer is provided, a shuffled + :class:`sklearn.model_selection.KFold` with the specified number of splits and ``random_state=42`` is used. + Custom splitters must implement ``split`` (and ideally ``get_n_splits``), or be an iterable yielding + ``(train_indices, test_indices)`` pairs. Default is ``5``. + + set_as_params : bool + Indicates whether the hyperparameters should be set in order to be used when :meth:`fit` is called. + Default is ``True``. + + return_tune_res : bool + Indicates whether detailed tuning results should be returned. + Default is ``False``. + + optuna_settings : None or dict + Optional configuration passed to the Optuna tuner. Supports global settings + as well as learner-specific overrides (using the keys from ``ml_param_space``). + The dictionary can contain entries corresponding to Optuna's study and optimize + configuration such as: + + - ``n_trials`` (int): Number of optimization trials (default: 100) + - ``timeout`` (float): Time limit in seconds for the study (default: None) + - ``direction`` (str): Optimization direction, 'maximize' or 'minimize'. + For sklearn scorers, use 'maximize' for negative metrics like 'neg_mean_squared_error' + (since -0.1 > -0.2 means better performance). Can be set globally or per learner. + (default: 'maximize') + - ``sampler`` (optuna.samplers.BaseSampler): Optuna sampler instance (default: None, uses TPE) + - ``callbacks`` (list): List of callback functions (default: None) + - ``show_progress_bar`` (bool): Show progress bar during optimization (default: False) + - ``n_jobs_optuna`` (int): Number of parallel trials (default: None) + - ``verbosity`` (int): Optuna logging verbosity level (default: None) + - ``study`` (optuna.study.Study): Pre-created study instance (default: None) + - ``study_kwargs`` (dict): Additional kwargs for study creation (default: {}) + - ``optimize_kwargs`` (dict): Additional kwargs for study.optimize() (default: {}) + + To set direction per learner (similar to ``scoring_methods``): + + .. code-block:: python + + optuna_settings = { + 'n_trials': 50, + 'direction': 'maximize', # Global default + 'ml_g0': {'direction': 'maximize'}, # Per-learner override + 'ml_m': {'n_trials': 100, 'direction': 'maximize'} + } + + Defaults to ``None``. + + Returns + ------- + self : object + Returned if ``return_tune_res`` is ``False``. + + tune_res: list + A list containing detailed tuning results and the proposed hyperparameters. + Returned if ``return_tune_res`` is ``True``. + + Examples + -------- + >>> import numpy as np + >>> from doubleml import DoubleMLData, DoubleMLPLR + >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018 + >>> from lightgbm import LGBMRegressor + >>> import optuna + >>> # Generate data + >>> np.random.seed(42) + >>> data = make_plr_CCDDHNR2018(n_obs=500, dim_x=20, return_type='DataFrame') + >>> dml_data = DoubleMLData(data, 'y', 'd') + >>> # Initialize model + >>> dml_plr = DoubleMLPLR( + ... dml_data, + ... LGBMRegressor(n_estimators=50, verbose=-1, random_state=42), + ... LGBMRegressor(n_estimators=50, verbose=-1, random_state=42) + ... ) + >>> # Define parameter grid functions + >>> def ml_l_params(trial): + ... return { + ... 'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True), + ... } + >>> def ml_m_params(trial): + ... return { + ... 'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True), + ... } + >>> ml_param_space = {'ml_l': ml_l_params, 'ml_m': ml_m_params} + >>> # Tune with TPE sampler + >>> optuna_settings = { + ... 'n_trials': 5, + ... 'sampler': optuna.samplers.TPESampler(seed=42), + ... } + >>> tune_res = dml_plr.tune_ml_models(ml_param_space, optuna_settings=optuna_settings, return_tune_res=True) + >>> print(tune_res[0]['ml_l'].best_params) # doctest: +SKIP + {'learning_rate': 0.03907122389107094} + >>> # Fit and get results + >>> dml_plr.fit().summary # doctest: +SKIP + coef std err t P>|t| 2.5 % 97.5 % + d 0.57436 0.045206 12.705519 5.510257e-37 0.485759 0.662961 + >>> # Example with scoring methods and directions + >>> scoring_methods = { + ... 'ml_l': 'neg_mean_squared_error', # Negative metric + ... 'ml_m': 'neg_mean_squared_error' + ... } + >>> optuna_settings = { + ... 'n_trials': 50, + ... 'direction': 'maximize', # Maximize negative MSE (minimize MSE) + ... 'sampler': optuna.samplers.TPESampler(seed=42), + ... } + >>> tune_res = dml_plr.tune_ml_models(ml_param_space, scoring_methods=scoring_methods, + ... optuna_settings=optuna_settings, return_tune_res=True) + >>> print(tune_res[0]['ml_l'].best_params) # doctest: +SKIP + {'learning_rate': 0.04300012336462904} + >>> dml_plr.fit().summary # doctest: +SKIP + coef std err t P>|t| 2.5 % 97.5 % + d 0.574796 0.045062 12.755721 2.896820e-37 0.486476 0.663115 + """ + + +def _default_optuna_settings(): + return deepcopy(_OPTUNA_DEFAULT_SETTINGS) + + +def _resolve_optuna_scoring(scoring_method, learner, params_name): + """Resolve the scoring argument for an Optuna-tuned learner. + + Parameters + ---------- + scoring_method : str, callable or None + Scoring argument supplied by the caller. ``None`` triggers automatic + fallback selection. + learner : estimator + Estimator instance that will be tuned. + params_name : str + Identifier used for logging and error messages. + + Returns + ------- + tuple + A pair consisting of the scoring argument to pass to + :func:`sklearn.model_selection.cross_val_score` (``None`` means use the + estimator's default ``score``) and a human-readable message describing + the decision for logging purposes. + """ + + if scoring_method is not None: + message = f"Using provided scoring method: {scoring_method} for learner '{params_name}'" + return scoring_method, message + + if is_regressor(learner): + message = "No scoring method provided, using 'neg_root_mean_squared_error' (RMSE) " f"for learner '{params_name}'." + return "neg_root_mean_squared_error", message + + if is_classifier(learner): + message = f"No scoring method provided, using 'neg_log_loss' " f"for learner '{params_name}'." + return "neg_log_loss", message + + raise ValueError( + f"No scoring method provided and estimator type could not be inferred. Please provide a scoring_method for learner " + f"'{params_name}'." + ) + + +def resolve_optuna_cv(cv): + """Normalize the ``cv`` argument for Optuna-based tuning.""" + + if cv is None: + cv = 5 + + if isinstance(cv, int): + if cv < 2: + raise ValueError(f"The number of folds used for tuning must be at least two. {cv} was passed.") + return KFold(n_splits=cv, shuffle=True, random_state=_OPTUNA_KFOLD_RANDOM_STATE) + + if isinstance(cv, BaseCrossValidator): + return cv + + if isinstance(cv, str): + raise TypeError("cv must not be provided as a string. Pass an integer or a cross-validation splitter.") + + split_attr = getattr(cv, "split", None) + if callable(split_attr): + return cv + + if isinstance(cv, Iterable): + cv_list = list(cv) + if not cv_list: + raise ValueError("cv iterable must not be empty.") + for split in cv_list: + if not isinstance(split, (tuple, list)) or len(split) != 2: + raise TypeError("cv iterable must yield (train_indices, test_indices) pairs.") + return cv_list + + raise TypeError( + "cv must be an integer >= 2, a scikit-learn cross-validation splitter, or an iterable of " + "(train_indices, test_indices) pairs." + ) + + +def _check_tuning_inputs( + y, + x, + learner, + param_grid_func, + scoring_method, + cv, + params_name, +): + """Validate Optuna tuning inputs and normalize the cross-validation splitter. + + Parameters + ---------- + y : np.ndarray + Target array used during tuning. + x : np.ndarray + Feature matrix used during tuning. + learner : estimator + Estimator that will be tuned. + param_grid_func : callable or None + Callback that samples hyperparameters from an Optuna trial. + scoring_method : str, callable or None + Scoring argument after applying :func:`doubleml.utils._tune_optuna._resolve_optuna_scoring`. + cv : int, cross-validation splitter or iterable + Cross-validation definition provided by the caller. + params_name : str + Name of the nuisance parameter for logging purposes. + + Returns + ------- + cross-validator or iterable + Cross-validation splitter compatible with + :func:`sklearn.model_selection.cross_val_score`. + """ + + if y.shape[0] != x.shape[0]: + raise ValueError(f"Features and target must contain the same number of observations for learner '{params_name}'.") + if y.size == 0: + raise ValueError(f"Empty target passed to Optuna tuner for learner '{params_name}'.") + + if param_grid_func is not None and not callable(param_grid_func): + raise TypeError( + "param_grid must be a callable function that takes a trial and returns a dict. " + f"Got {type(param_grid_func).__name__} for learner '{params_name}'." + ) + + if scoring_method is not None and not callable(scoring_method) and not isinstance(scoring_method, str): + raise TypeError( + "scoring_method must be None, a string, a callable, accepted by scikit-learn. " + f"Got {type(scoring_method).__name__} for learner '{params_name}'." + ) + + if not hasattr(learner, "fit") or not hasattr(learner, "set_params"): + raise TypeError(f"Learner '{params_name}' must implement fit and set_params to be tuned with Optuna.") + + return resolve_optuna_cv(cv) + + +def _get_optuna_settings(optuna_settings, params_name): + """ + Get Optuna settings, considering defaults, user-provided values, and learner-specific overrides. + + Parameters + ---------- + optuna_settings : dict or None + User-provided Optuna settings. + params_name : str + Name of the nuisance params to check for specific setting, e.g. `ml_g0` or `ml_g1` for `DoubleMLIRM`. + + Returns + ------- + dict + Resolved settings dictionary. + """ + default_settings = _default_optuna_settings() + + if optuna_settings is None: + return default_settings + + if not isinstance(optuna_settings, dict): + raise TypeError("optuna_settings must be a dict or None.") + + # Base settings are the user-provided settings filtered by default keys + base_settings = {key: value for key, value in optuna_settings.items() if key in OPTUNA_GLOBAL_SETTING_KEYS} + learner_or_params_keys = set(optuna_settings.keys()) - set(base_settings.keys()) + + # Find matching learner-specific settings, handles the case to match ml_g to ml_g0, ml_g1, etc. + learner_specific_settings = {} + for k in learner_or_params_keys: + if k in params_name and params_name != k: + learner_specific_settings = optuna_settings[k] + + # set params specific settings + params_specific_settings = {} + if params_name in learner_or_params_keys: + params_specific_settings = optuna_settings[params_name] + + # Merge settings: defaults < base < learner-specific < params_specific + resolved = default_settings.copy() | base_settings | learner_specific_settings | params_specific_settings + + # Validate types + if not isinstance(resolved["study_kwargs"], dict): + raise TypeError("study_kwargs must be a dict.") + if not isinstance(resolved["optimize_kwargs"], dict): + raise TypeError("optimize_kwargs must be a dict.") + if resolved["callbacks"] is not None and not isinstance(resolved["callbacks"], (list, tuple)): + raise TypeError("callbacks must be a sequence of callables or None.") + + return resolved + + +def _create_study(settings, learner_name): + """ + Create or retrieve an Optuna :class:`optuna.study.Study` instance. + + Parameters + ---------- + settings : dict + Resolved Optuna settings containing study configuration. + learner_name : str + Identifier used for logging the resolved study configuration. + + Returns + ------- + optuna.study.Study + The Optuna study object ready for optimization. + """ + + # Check if a study instance is provided directly + study_instance = settings.get("study") + if study_instance is not None: + return study_instance + + # Build study kwargs from settings + study_kwargs = settings.get("study_kwargs", {}).copy() + if "direction" not in study_kwargs: + study_kwargs["direction"] = settings.get("direction", "maximize") + logger.info(f"Optuna study direction set to '{study_kwargs['direction']}' for learner '{learner_name}'.") + if settings.get("sampler") is not None: + study_kwargs["sampler"] = settings["sampler"] + logger.info(f"Using sampler {settings['sampler'].__class__.__name__} for learner '{learner_name}'.") + study_kwargs.setdefault("study_name", f"tune_{learner_name}") + + return optuna.create_study(**study_kwargs) + + +def _create_objective(param_grid_func, learner, x, y, cv, scoring_method): + """ + Create an Optuna objective function for hyperparameter optimization. + + Parameters + ---------- + param_grid_func : callable + Function that takes an Optuna trial and returns a parameter dictionary. + Example: def params(trial): return {"learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1)} + learner : estimator + The machine learning model to tune. + x : np.ndarray + Features (full dataset). + y : np.ndarray + Target variable (full dataset). + cv : cross-validation generator + KFold or similar cross-validation splitter. + scoring_method : str, callable or None + Scoring argument for cross-validation. ``None`` delegates to the + estimator's default ``score`` implementation. + + Returns + ------- + callable + Objective function for Optuna optimization. + """ + + def objective(trial): + """Objective function for Optuna optimization.""" + # Get parameters from the user-provided function + params = param_grid_func(trial) + + if not isinstance(params, dict): + raise TypeError( + f"param function must return a dict. Got {type(params).__name__}. " + f"Example: def params(trial): return {{'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1)}}" + ) + + # Clone learner and set parameters + estimator = clone(learner).set_params(**params) + + # Perform cross-validation on full dataset + scores = cross_val_score( + estimator, + x, + y, + cv=cv, + scoring=scoring_method, + error_score="raise", + ) + + # Return mean test score + return np.nanmean(scores) + + return objective + + +def _dml_tune_optuna( + y, + x, + learner, + param_grid_func, + scoring_method, + cv, + optuna_settings, + learner_name, + params_name, +): + """ + Tune hyperparameters using Optuna on the whole dataset with cross-validation. + + Unlike the grid/randomized search which tunes separately for each fold, this function + tunes once on the full dataset and returns a single tuning result per learner. + + Parameters + ---------- + y : np.ndarray + Target variable (full dataset). + x : np.ndarray + Features (full dataset). + learner : estimator + The machine learning model to tune. + param_grid_func : callable + Function that takes an Optuna trial and returns a parameter dictionary. + Example: def params(trial): return {"learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1)} + scoring_method : str, callable or None + Scoring argument passed to cross-validation. ``None`` triggers an + automatic fallback chosen by :func:`_resolve_optuna_scoring`. + cv : int, cross-validation splitter, or iterable of (train_indices, test_indices) + Cross-validation strategy used during tuning. If an integer is provided, a shuffled + :class:`sklearn.model_selection.KFold` with the specified number of splits and ``random_state=42`` is used. + optuna_settings : dict or None + Optuna-specific settings. + learner_name : str + Name of the learner for logging and identification. + params_name : str or None + Name of the nuisance parameter for settings selection. + + Returns + ------- + DMLOptunaResult + A tuning result containing the optuna.Study object and further information. + """ + + scoring_method, scoring_message = _resolve_optuna_scoring(scoring_method, learner, params_name) + if scoring_message: + logger.info(scoring_message) + + cv_splitter = _check_tuning_inputs( + y, + x, + learner, + param_grid_func, + scoring_method, + cv, + params_name=params_name, + ) + + if param_grid_func is None: + estimator = clone(learner) + estimator.fit(x, y) + best_params = estimator.get_params(deep=True) + return DMLOptunaResult( + learner_name=learner_name, + params_name=params_name, + best_estimator=estimator, + best_params=best_params, + best_score=np.nan, + scoring_method=scoring_method, + study=None, + tuned=False, + ) + + settings = _get_optuna_settings(optuna_settings, params_name) + # Set Optuna logging verbosity if specified + verbosity = settings.get("verbosity") + if verbosity is not None: + optuna.logging.set_verbosity(verbosity) + + # Create the study + study = _create_study(settings, params_name) + + # Optionally set metric names (commented out as there is a warning in Optuna about this) + # study.set_metric_names([f"{scoring_method}_{params_name}"]) + + # Create the objective function + objective = _create_objective(param_grid_func, learner, x, y, cv_splitter, scoring_method) + + # Build optimize kwargs + optimize_kwargs = { + "n_trials": settings["n_trials"], + "timeout": settings["timeout"], + "callbacks": settings["callbacks"], + "catch": settings["catch"], + "show_progress_bar": settings["show_progress_bar"], + "gc_after_trial": settings["gc_after_trial"], + "n_jobs": settings["n_jobs_optuna"], + } + optimize_kwargs.update(settings.get("optimize_kwargs", {})) + + # Filter out None values, but keep boolean flags + final_optimize_kwargs = { + k: v for k, v in optimize_kwargs.items() if v is not None or k in ["show_progress_bar", "gc_after_trial"] + } + + # Run optimization once on the full dataset + study.optimize(objective, **final_optimize_kwargs) + + # Validate optimization results + if not study.trials or all(t.state != optuna.trial.TrialState.COMPLETE for t in study.trials): + raise RuntimeError("Optuna optimization failed to produce any complete trials.") + + # Extract best parameters and score + # Since each learner is tuned independently, use all parameters from the study + best_params = dict(study.best_trial.params) + best_score = study.best_value + + # Fit the best estimator on the full dataset once + best_estimator = clone(learner).set_params(**best_params) + + return DMLOptunaResult( + learner_name=learner_name, + params_name=params_name, + best_estimator=best_estimator, + best_params=best_params, + best_score=best_score, + scoring_method=scoring_method, + study=study, + tuned=True, + ) diff --git a/doubleml/utils/tests/test_quadratic_inequality.py b/doubleml/utils/tests/test_quadratic_inequality.py index 68faff0f..d59219af 100644 --- a/doubleml/utils/tests/test_quadratic_inequality.py +++ b/doubleml/utils/tests/test_quadratic_inequality.py @@ -27,6 +27,7 @@ ], ) def test_solve_quadratic_inequation(a, b, c, expected): + np.random.seed(42) result = _solve_quadratic_inequality(a, b, c) assert len(result) == len(expected), f"Expected {len(expected)} intervals but got {len(result)}" diff --git a/pyproject.toml b/pyproject.toml index 6aa06ab5..eb80c712 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ dependencies = [ "scipy>=1.7.0", "scikit-learn>=1.6.0", "statsmodels>=0.14.0", + "optuna>=4.6.0", "matplotlib>=3.9.0", "seaborn>=0.13", "plotly>=5.0.0"