diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
index 2c77cac5..6cab94cb 100644
--- a/.github/workflows/pytest.yml
+++ b/.github/workflows/pytest.yml
@@ -58,6 +58,7 @@ jobs:
         matrix.config.os != 'ubuntu-latest' ||
         matrix.config.python-version != '3.12'
       run: |
+        pytest --doctest-modules --ignore-glob="doubleml/**/tests/*" --ignore-glob="doubleml/tests/*"
         pytest -m ci
         pytest -m ci_rdd
 
diff --git a/doubleml/data/tests/test_dml_data.py b/doubleml/data/tests/test_dml_data.py
index 542bbb61..a745fb12 100644
--- a/doubleml/data/tests/test_dml_data.py
+++ b/doubleml/data/tests/test_dml_data.py
@@ -569,6 +569,7 @@ def test_dml_data_w_missings(generate_data_irm_w_missings):
     assert dml_data.force_all_x_finite == "allow-nan"
 
 
+@pytest.mark.ci
 def test_dml_data_w_missing_d(generate_data1):
     data = generate_data1
     np.random.seed(3141)
diff --git a/doubleml/did/datasets/dgp_did_CS2021.py b/doubleml/did/datasets/dgp_did_CS2021.py
index 50336cdb..1756d085 100644
--- a/doubleml/did/datasets/dgp_did_CS2021.py
+++ b/doubleml/did/datasets/dgp_did_CS2021.py
@@ -105,11 +105,35 @@ def make_did_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, time_typ
 
     6. Treatment assignment:
 
-       For non-experimental settings (DGP 1-4), the probability of being in treatment group :math:`g` is:
+       For non-experimental settings (DGP 1-4), the probability of being in treatment group :math:`g` is computed as follows:
 
-       .. math::
+       - Compute group-specific logits for each observation:
+
+         .. math::
+
+            \\text{logit}_{i,g} = f_{ps,g}(W_{ps})
+
+         The logits are clipped to the range [-2.5, 2.5] for numerical stability.
+
+       - Convert logits to uncapped probabilities via softmax:
+
+         .. math::
+
+            p^{\\text{uncapped}}_{i,g} = \\frac{\\exp(\\text{logit}_{i,g})}{\\sum_{g'} \\exp(\\text{logit}_{i,g'})}
+
+       - Clip uncapped probabilities to the range [0.05, 0.95]:
+
+         .. math::
+
+            p^{\\text{clipped}}_{i,g} = \\min(\\max(p^{\\text{uncapped}}_{i,g}, 0.05), 0.95)
+
+       - Renormalize clipped probabilities so they sum to 1 for each observation:
+
+         .. math::
+
+            p_{i,g} = \\frac{p^{\text{clipped}}_{i,g}}{\\sum_{g'} p^{\\text{clipped}}_{i,g'}}
 
-           P(G_i = g) = \\frac{\\exp(f_{ps,g}(W_{ps}))}{\\sum_{g'} \\exp(f_{ps,g'}(W_{ps}))}
+       - Assign each observation to a treatment group by sampling from the categorical distribution defined by :math:`p_{i,g}`.
 
        For experimental settings (DGP 5-6), each treatment group (including never-treated) has equal probability:
 
@@ -159,7 +183,7 @@ def make_did_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, time_typ
         `dim_x` (int, default=4):
             Dimension of feature vectors.
 
-        `xi` (float, default=0.9):
+        `xi` (float, default=0.5):
             Scale parameter for the propensity score function.
 
         `n_periods` (int, default=5):
@@ -188,7 +212,7 @@ def make_did_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, time_typ
 
     c = kwargs.get("c", 0.0)
     dim_x = kwargs.get("dim_x", 4)
-    xi = kwargs.get("xi", 0.9)
+    xi = kwargs.get("xi", 0.75)
     n_periods = kwargs.get("n_periods", 5)
     anticipation_periods = kwargs.get("anticipation_periods", 0)
     n_pre_treat_periods = kwargs.get("n_pre_treat_periods", 2)
@@ -228,8 +252,11 @@ def make_did_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, time_typ
         p = np.ones(n_treatment_groups) / n_treatment_groups
         d_index = np.random.choice(n_treatment_groups, size=n_obs, p=p)
     else:
-        unnormalized_p = np.exp(_f_ps_groups(features_ps, xi, n_groups=n_treatment_groups))
-        p = unnormalized_p / unnormalized_p.sum(1, keepdims=True)
+        logits = np.clip(_f_ps_groups(features_ps, xi, n_groups=n_treatment_groups), a_min=-2.5, a_max=2.5)
+        unnormalized_p = np.exp(logits)
+        p_uncapped = unnormalized_p / unnormalized_p.sum(1, keepdims=True)
+        p_clipped = np.clip(p_uncapped, a_min=0.05, a_max=0.95)
+        p = p_clipped / p_clipped.sum(1, keepdims=True)
         d_index = np.array([np.random.choice(n_treatment_groups, p=p_row) for p_row in p])
 
     # fixed effects (shape (n_obs, n_time_periods))
diff --git a/doubleml/did/datasets/dgp_did_cs_CS2021.py b/doubleml/did/datasets/dgp_did_cs_CS2021.py
index 08021270..2fb044eb 100644
--- a/doubleml/did/datasets/dgp_did_cs_CS2021.py
+++ b/doubleml/did/datasets/dgp_did_cs_CS2021.py
@@ -85,11 +85,35 @@ def make_did_cs_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, lambd
 
     6. Treatment assignment:
 
-       For non-experimental settings (DGP 1-4), the probability of being in treatment group :math:`g` is:
+       For non-experimental settings (DGP 1-4), the probability of being in treatment group :math:`g` is computed as follows:
 
-       .. math::
+       - Compute group-specific logits for each observation:
+
+         .. math::
+
+            \\text{logit}_{i,g} = f_{ps,g}(W_{ps})
+
+         The logits are clipped to the range [-2.5, 2.5] for numerical stability.
+
+       - Convert logits to uncapped probabilities via softmax:
+
+         .. math::
+
+            p^{\\text{uncapped}}_{i,g} = \\frac{\\exp(\\text{logit}_{i,g})}{\\sum_{g'} \\exp(\\text{logit}_{i,g'})}
+
+       - Clip uncapped probabilities to the range [0.05, 0.95]:
+
+         .. math::
+
+            p^{\\text{clipped}}_{i,g} = \\min(\\max(p^{\\text{uncapped}}_{i,g}, 0.05), 0.95)
+
+       - Renormalize clipped probabilities so they sum to 1 for each observation:
+
+         .. math::
+
+            p_{i,g} = \\frac{p^{\text{clipped}}_{i,g}}{\\sum_{g'} p^{\\text{clipped}}_{i,g'}}
 
-           P(G_i = g) = \\frac{\\exp(f_{ps,g}(W_{ps}))}{\\sum_{g'} \\exp(f_{ps,g'}(W_{ps}))}
+       - Assign each observation to a treatment group by sampling from the categorical distribution defined by :math:`p_{i,g}`.
 
        For experimental settings (DGP 5-6), each treatment group (including never-treated) has equal probability:
 
@@ -148,7 +172,7 @@ def make_did_cs_CS2021(n_obs=1000, dgp_type=1, include_never_treated=True, lambd
         `dim_x` (int, default=4):
             Dimension of feature vectors.
 
-        `xi` (float, default=0.9):
+        `xi` (float, default=0.5):
             Scale parameter for the propensity score function.
 
         `n_periods` (int, default=5):
diff --git a/doubleml/did/did.py b/doubleml/did/did.py
index 5e86d52e..378aa644 100644
--- a/doubleml/did/did.py
+++ b/doubleml/did/did.py
@@ -9,6 +9,7 @@
 from doubleml.double_ml_score_mixins import LinearScoreMixin
 from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score
 from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls
+from doubleml.utils._tune_optuna import _dml_tune_optuna
 
 
 # TODO: Remove DoubleMLDIDData with version 0.12.0
@@ -427,6 +428,83 @@ def _nuisance_tuning(
 
         return res
 
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+        """
+        Optuna-based hyperparameter tuning for DID nuisance models.
+
+        Performs tuning once on the whole dataset using cross-validation,
+        returning the same optimal parameters for all folds.
+        """
+
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
+
+        if scoring_methods is None:
+            if self.score == "observational":
+                scoring_methods = {"ml_g0": None, "ml_g1": None, "ml_m": None}
+            else:
+                scoring_methods = {"ml_g0": None, "ml_g1": None}
+
+        # Separate data by treatment status for conditional mean tuning
+        mask_d0 = d == 0
+        mask_d1 = d == 1
+
+        x_d0 = x[mask_d0, :]
+        y_d0 = y[mask_d0]
+        g0_tune_res = _dml_tune_optuna(
+            y_d0,
+            x_d0,
+            self._learner["ml_g"],
+            optuna_params["ml_g0"],
+            scoring_methods["ml_g0"],
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g0",
+        )
+
+        x_d1 = x[mask_d1, :]
+        y_d1 = y[mask_d1]
+        g1_tune_res = _dml_tune_optuna(
+            y_d1,
+            x_d1,
+            self._learner["ml_g"],
+            optuna_params["ml_g1"],
+            scoring_methods["ml_g1"],
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g1",
+        )
+
+        # Tune propensity score on full dataset for observational score
+        m_tune_res = None
+        if self.score == "observational":
+            m_tune_res = _dml_tune_optuna(
+                d,
+                x,
+                self._learner["ml_m"],
+                optuna_params["ml_m"],
+                scoring_methods["ml_m"],
+                cv,
+                optuna_settings,
+                learner_name="ml_m",
+                params_name="ml_m",
+            )
+
+        if self.score == "observational":
+            results = {"ml_g0": g0_tune_res, "ml_g1": g1_tune_res, "ml_m": m_tune_res}
+        else:
+            results = {"ml_g0": g0_tune_res, "ml_g1": g1_tune_res}
+
+        return results
+
     def sensitivity_benchmark(self, benchmarking_set, fit_args=None):
         """
         Computes a benchmark for a given set of features.
diff --git a/doubleml/did/did_binary.py b/doubleml/did/did_binary.py
index d92ebf19..a84304ab 100644
--- a/doubleml/did/did_binary.py
+++ b/doubleml/did/did_binary.py
@@ -23,6 +23,7 @@
     _check_score,
 )
 from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls
+from doubleml.utils._tune_optuna import _dml_tune_optuna
 from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
@@ -666,6 +667,79 @@ def _nuisance_tuning(
 
         return res
 
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+
+        x, y = check_X_y(self._x_data_subset, self._y_data_subset, ensure_all_finite=False)
+        x, d = check_X_y(x, self._g_data_subset, ensure_all_finite=False)
+
+        if scoring_methods is None:
+            if self.score == "observational":
+                scoring_methods = {"ml_g0": None, "ml_g1": None, "ml_m": None}
+            else:
+                scoring_methods = {"ml_g0": None, "ml_g1": None}
+
+        mask_d0 = d == 0
+        mask_d1 = d == 1
+
+        x_d0 = x[mask_d0, :]
+        y_d0 = y[mask_d0]
+        g0_param_grid = optuna_params["ml_g0"]
+        g0_scoring = scoring_methods["ml_g0"]
+        g0_tune_res = _dml_tune_optuna(
+            y_d0,
+            x_d0,
+            self._learner["ml_g"],
+            g0_param_grid,
+            g0_scoring,
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g0",
+        )
+
+        x_d1 = x[mask_d1, :]
+        y_d1 = y[mask_d1]
+        g1_param_grid = optuna_params["ml_g1"]
+        g1_scoring = scoring_methods["ml_g1"]
+        g1_tune_res = _dml_tune_optuna(
+            y_d1,
+            x_d1,
+            self._learner["ml_g"],
+            g1_param_grid,
+            g1_scoring,
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g1",
+        )
+
+        m_tune_res = None
+        if self.score == "observational":
+            m_tune_res = _dml_tune_optuna(
+                d,
+                x,
+                self._learner["ml_m"],
+                optuna_params["ml_m"],
+                scoring_methods["ml_m"],
+                cv,
+                optuna_settings,
+                learner_name="ml_m",
+                params_name="ml_m",
+            )
+
+        if self.score == "observational":
+            results = {"ml_g0": g0_tune_res, "ml_g1": g1_tune_res, "ml_m": m_tune_res}
+        else:
+            results = {"ml_g0": g0_tune_res, "ml_g1": g1_tune_res}
+
+        return results
+
     def _sensitivity_element_est(self, preds):
         y = self._y_data_subset
         d = self._g_data_subset
diff --git a/doubleml/did/did_cs.py b/doubleml/did/did_cs.py
index f0454b2b..609b313c 100644
--- a/doubleml/did/did_cs.py
+++ b/doubleml/did/did_cs.py
@@ -9,6 +9,7 @@
 from doubleml.double_ml_score_mixins import LinearScoreMixin
 from doubleml.utils._checks import _check_finite_predictions, _check_is_propensity, _check_score
 from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d
+from doubleml.utils._tune_optuna import _dml_tune_optuna
 
 
 # TODO: Remove DoubleMLDIDData with version 0.12.0
@@ -660,6 +661,82 @@ def _nuisance_tuning(
 
         return res
 
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
+        x, t = check_X_y(x, self._dml_data.t, ensure_all_finite=False)
+
+        if scoring_methods is None:
+            if self.score == "observational":
+                scoring_methods = {
+                    "ml_g_d0_t0": None,
+                    "ml_g_d0_t1": None,
+                    "ml_g_d1_t0": None,
+                    "ml_g_d1_t1": None,
+                    "ml_m": None,
+                }
+            else:
+                scoring_methods = {
+                    "ml_g_d0_t0": None,
+                    "ml_g_d0_t1": None,
+                    "ml_g_d1_t0": None,
+                    "ml_g_d1_t1": None,
+                }
+
+        masks = {
+            "d0_t0": (d == 0) & (t == 0),
+            "d0_t1": (d == 0) & (t == 1),
+            "d1_t0": (d == 1) & (t == 0),
+            "d1_t1": (d == 1) & (t == 1),
+        }
+
+        g_tune_results = {}
+        for key, mask in masks.items():
+            x_subset = x[mask, :]
+            y_subset = y[mask]
+            params_key = f"ml_g_{key}"
+            param_grid = optuna_params[params_key]
+            scoring = scoring_methods[params_key]
+            g_tune_results[key] = _dml_tune_optuna(
+                y_subset,
+                x_subset,
+                self._learner["ml_g"],
+                param_grid,
+                scoring,
+                cv,
+                optuna_settings,
+                learner_name="ml_g",
+                params_name=params_key,
+            )
+
+        m_tune_res = None
+        if self.score == "observational":
+            m_tune_res = _dml_tune_optuna(
+                d,
+                x,
+                self._learner["ml_m"],
+                optuna_params["ml_m"],
+                scoring_methods["ml_m"],
+                cv,
+                optuna_settings,
+                learner_name="ml_m",
+                params_name="ml_m",
+            )
+
+        results = {f"ml_g_{key}": res_obj for key, res_obj in g_tune_results.items()}
+
+        if self.score == "observational":
+            results["ml_m"] = m_tune_res
+
+        return results
+
     def sensitivity_benchmark(self, benchmarking_set, fit_args=None):
         """
         Computes a benchmark for a given set of features.
diff --git a/doubleml/did/did_cs_binary.py b/doubleml/did/did_cs_binary.py
index c547ff40..a2af841b 100644
--- a/doubleml/did/did_cs_binary.py
+++ b/doubleml/did/did_cs_binary.py
@@ -23,6 +23,7 @@
     _check_score,
 )
 from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d
+from doubleml.utils._tune_optuna import _dml_tune_optuna
 from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
@@ -765,6 +766,82 @@ def _nuisance_tuning(
 
         return res
 
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+
+        x, y = check_X_y(self._x_data_subset, self._y_data_subset, ensure_all_finite=False)
+        _, d = check_X_y(x, self._g_data_subset, ensure_all_finite=False)
+        _, t = check_X_y(x, self._t_data_subset, ensure_all_finite=False)
+
+        if scoring_methods is None:
+            if self.score == "observational":
+                scoring_methods = {
+                    "ml_g_d0_t0": None,
+                    "ml_g_d0_t1": None,
+                    "ml_g_d1_t0": None,
+                    "ml_g_d1_t1": None,
+                    "ml_m": None,
+                }
+            else:
+                scoring_methods = {
+                    "ml_g_d0_t0": None,
+                    "ml_g_d0_t1": None,
+                    "ml_g_d1_t0": None,
+                    "ml_g_d1_t1": None,
+                }
+
+        masks = {
+            "d0_t0": (d == 0) & (t == 0),
+            "d0_t1": (d == 0) & (t == 1),
+            "d1_t0": (d == 1) & (t == 0),
+            "d1_t1": (d == 1) & (t == 1),
+        }
+
+        g_tune_results = {}
+        for key, mask in masks.items():
+            x_subset = x[mask, :]
+            y_subset = y[mask]
+            params_key = f"ml_g_{key}"
+            param_grid = optuna_params[params_key]
+            scoring = scoring_methods[params_key]
+            g_tune_results[key] = _dml_tune_optuna(
+                y_subset,
+                x_subset,
+                self._learner["ml_g"],
+                param_grid,
+                scoring,
+                cv,
+                optuna_settings,
+                learner_name="ml_g",
+                params_name=params_key,
+            )
+
+        m_tune_res = None
+        if self.score == "observational":
+            m_tune_res = _dml_tune_optuna(
+                d,
+                x,
+                self._learner["ml_m"],
+                optuna_params["ml_m"],
+                scoring_methods["ml_m"],
+                cv,
+                optuna_settings,
+                learner_name="ml_m",
+                params_name="ml_m",
+            )
+
+        results = {f"ml_g_{key}": res_obj for key, res_obj in g_tune_results.items()}
+
+        if self.score == "observational":
+            results["ml_m"] = m_tune_res
+
+        return results
+
     def _sensitivity_element_est(self, preds):
         y = self._y_data_subset
         d = self._g_data_subset
diff --git a/doubleml/did/did_multi.py b/doubleml/did/did_multi.py
index ba00ea13..54c242d1 100644
--- a/doubleml/did/did_multi.py
+++ b/doubleml/did/did_multi.py
@@ -36,6 +36,7 @@
 from doubleml.double_ml_framework import concat
 from doubleml.utils._checks import _check_bool, _check_score
 from doubleml.utils._descriptive import generate_summary
+from doubleml.utils._tune_optuna import TUNE_ML_MODELS_DOC
 from doubleml.utils.gain_statistics import gain_statistics
 from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
@@ -734,6 +735,37 @@ def p_adjust(self, method="romano-wolf"):
 
         return p_val
 
+    def tune_ml_models(
+        self,
+        ml_param_space,
+        scoring_methods=None,
+        cv=5,
+        set_as_params=True,
+        return_tune_res=False,
+        optuna_settings=None,
+    ):
+        """Hyperparameter-tuning for DoubleML models using Optuna."""
+
+        tuning_kwargs = {
+            "ml_param_space": ml_param_space,
+            "scoring_methods": scoring_methods,
+            "cv": cv,
+            "set_as_params": set_as_params,
+            "return_tune_res": return_tune_res,
+            "optuna_settings": optuna_settings,
+        }
+
+        tune_res = [] if return_tune_res else None
+
+        for model in self.modellist:
+            res = model.tune_ml_models(**tuning_kwargs)
+            if return_tune_res:
+                tune_res.append(res[0])
+
+        return tune_res if return_tune_res else self
+
+    tune_ml_models.__doc__ = TUNE_ML_MODELS_DOC
+
     def bootstrap(self, method="normal", n_rep_boot=500):
         """
         Multiplier bootstrap for DoubleML models.
diff --git a/doubleml/did/tests/test_did_binary_control_groups.py b/doubleml/did/tests/test_did_binary_control_groups.py
index 627cf50a..ca493435 100644
--- a/doubleml/did/tests/test_did_binary_control_groups.py
+++ b/doubleml/did/tests/test_did_binary_control_groups.py
@@ -1,3 +1,4 @@
+import pytest
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
@@ -17,6 +18,7 @@
 }
 
 
+@pytest.mark.ci
 def test_control_groups_different():
     dml_did_never_treated = dml.did.DoubleMLDIDBinary(control_group="never_treated", **args)
     dml_did_not_yet_treated = dml.did.DoubleMLDIDBinary(control_group="not_yet_treated", **args)
diff --git a/doubleml/did/tests/test_did_binary_tune_ml_models.py b/doubleml/did/tests/test_did_binary_tune_ml_models.py
new file mode 100644
index 00000000..e69b4ae6
--- /dev/null
+++ b/doubleml/did/tests/test_did_binary_tune_ml_models.py
@@ -0,0 +1,72 @@
+import numpy as np
+import pytest
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+
+from doubleml.data import DoubleMLPanelData
+from doubleml.did import DoubleMLDIDBinary
+from doubleml.did.datasets import make_did_CS2021
+from doubleml.tests._utils_tune_optuna import (
+    _SAMPLER_CASES,
+    _assert_tree_params,
+    _basic_optuna_settings,
+    _build_param_space,
+    _select_binary_periods,
+    _small_tree_params,
+)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
+@pytest.mark.parametrize("score", ["observational", "experimental"])
+def test_doubleml_did_binary_optuna_tune(sampler_name, optuna_sampler, score):
+    np.random.seed(3152)
+    df_panel = make_did_CS2021(
+        n_obs=500,
+        dgp_type=4,
+        include_never_treated=True,
+        time_type="float",
+        n_periods=4,
+        n_pre_treat_periods=2,
+    )
+    panel_data = DoubleMLPanelData(
+        df_panel,
+        y_col="y",
+        d_cols="d",
+        id_col="id",
+        t_col="t",
+        x_cols=["Z1", "Z2", "Z3", "Z4"],
+    )
+
+    g_value, t_value_pre, t_value_eval = _select_binary_periods(panel_data)
+
+    ml_g = DecisionTreeRegressor(random_state=321, max_depth=1)  # underfit
+    ml_m = DecisionTreeClassifier(random_state=654)
+    dml_did_binary = DoubleMLDIDBinary(
+        obj_dml_data=panel_data,
+        g_value=g_value,
+        t_value_pre=t_value_pre,
+        t_value_eval=t_value_eval,
+        ml_g=ml_g,
+        ml_m=ml_m,
+        score=score,
+        n_folds=5,
+    )
+    dml_did_binary.fit()
+    untuned_score = dml_did_binary.evaluate_learners()
+
+    optuna_params = _build_param_space(dml_did_binary, _small_tree_params)
+
+    optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler})
+    tune_res = dml_did_binary.tune_ml_models(
+        ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True
+    )
+
+    dml_did_binary.fit()
+    tuned_score = dml_did_binary.evaluate_learners()
+
+    for learner_name in dml_did_binary.params_names:
+        tuned_params = tune_res[0][learner_name].best_params
+        _assert_tree_params(tuned_params)
+
+        # ensure tuning improved RMSE
+        assert tuned_score[learner_name] < untuned_score[learner_name]
diff --git a/doubleml/did/tests/test_did_cs_binary_control_groups.py b/doubleml/did/tests/test_did_cs_binary_control_groups.py
index ea4f2933..5c3202af 100644
--- a/doubleml/did/tests/test_did_cs_binary_control_groups.py
+++ b/doubleml/did/tests/test_did_cs_binary_control_groups.py
@@ -1,3 +1,4 @@
+import pytest
 from sklearn.linear_model import LinearRegression, LogisticRegression
 
 import doubleml as dml
@@ -17,6 +18,7 @@
 }
 
 
+@pytest.mark.ci
 def test_control_groups_different():
     dml_did_never_treated = dml.did.DoubleMLDIDCSBinary(control_group="never_treated", **args)
     dml_did_not_yet_treated = dml.did.DoubleMLDIDCSBinary(control_group="not_yet_treated", **args)
diff --git a/doubleml/did/tests/test_did_cs_binary_tune_ml_models.py b/doubleml/did/tests/test_did_cs_binary_tune_ml_models.py
new file mode 100644
index 00000000..06398135
--- /dev/null
+++ b/doubleml/did/tests/test_did_cs_binary_tune_ml_models.py
@@ -0,0 +1,71 @@
+import numpy as np
+import pytest
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+
+from doubleml.data import DoubleMLPanelData
+from doubleml.did import DoubleMLDIDCSBinary
+from doubleml.did.datasets import make_did_cs_CS2021
+from doubleml.tests._utils_tune_optuna import (
+    _SAMPLER_CASES,
+    _assert_tree_params,
+    _basic_optuna_settings,
+    _build_param_space,
+    _select_binary_periods,
+    _small_tree_params,
+)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
+@pytest.mark.parametrize("score", ["observational", "experimental"])
+def test_doubleml_did_cs_binary_optuna_tune(sampler_name, optuna_sampler, score):
+    np.random.seed(3153)
+    df_panel = make_did_cs_CS2021(
+        n_obs=500,
+        dgp_type=4,
+        include_never_treated=True,
+        time_type="float",
+    )
+    panel_data = DoubleMLPanelData(
+        df_panel,
+        y_col="y",
+        d_cols="d",
+        id_col="id",
+        t_col="t",
+        x_cols=["Z1", "Z2", "Z3", "Z4"],
+    )
+    print(df_panel.head())
+    g_value, t_value_pre, t_value_eval = _select_binary_periods(panel_data)
+
+    ml_g = DecisionTreeRegressor(random_state=321, max_depth=1)  # underfit
+    ml_m = DecisionTreeClassifier(random_state=654)
+
+    dml_did_cs_binary = DoubleMLDIDCSBinary(
+        obj_dml_data=panel_data,
+        g_value=g_value,
+        t_value_pre=t_value_pre,
+        t_value_eval=t_value_eval,
+        ml_g=ml_g,
+        ml_m=ml_m,
+        score=score,
+        n_folds=5,
+    )
+    dml_did_cs_binary.fit()
+    untuned_score = dml_did_cs_binary.evaluate_learners()
+
+    optuna_params = _build_param_space(dml_did_cs_binary, _small_tree_params)
+
+    optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler})
+    tune_res = dml_did_cs_binary.tune_ml_models(
+        ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True
+    )
+
+    dml_did_cs_binary.fit()
+    tuned_score = dml_did_cs_binary.evaluate_learners()
+
+    for learner_name in dml_did_cs_binary.params_names:
+        tuned_params = tune_res[0][learner_name].best_params
+        _assert_tree_params(tuned_params)
+
+        # ensure tuning improved RMSE or LogLoss
+        assert tuned_score[learner_name] < untuned_score[learner_name]
diff --git a/doubleml/did/tests/test_did_cs_tune_ml_models.py b/doubleml/did/tests/test_did_cs_tune_ml_models.py
new file mode 100644
index 00000000..0a44d412
--- /dev/null
+++ b/doubleml/did/tests/test_did_cs_tune_ml_models.py
@@ -0,0 +1,50 @@
+import numpy as np
+import pytest
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+
+import doubleml as dml
+from doubleml.did.datasets import make_did_SZ2020
+from doubleml.tests._utils_tune_optuna import (
+    _SAMPLER_CASES,
+    _assert_tree_params,
+    _basic_optuna_settings,
+    _build_param_space,
+    _small_tree_params,
+)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
+@pytest.mark.parametrize("score", ["observational", "experimental"])
+def test_doubleml_did_cs_optuna_tune(sampler_name, optuna_sampler, score):
+    np.random.seed(3151)
+    dml_data = make_did_SZ2020(
+        n_obs=500,
+        dgp_type=4,
+        cross_sectional_data=True,
+        return_type="DoubleMLDIDData",
+    )
+
+    ml_g = DecisionTreeRegressor(random_state=321, max_depth=1)  # underfit
+    if score == "observational":
+        ml_m = DecisionTreeClassifier(random_state=654)
+        dml_did_cs = dml.DoubleMLDIDCS(dml_data, ml_g, ml_m, score=score, n_folds=5)
+    else:
+        dml_did_cs = dml.DoubleMLDIDCS(dml_data, ml_g, score=score, n_folds=5)
+    dml_did_cs.fit()
+    untuned_score = dml_did_cs.evaluate_learners()
+
+    optuna_params = _build_param_space(dml_did_cs, _small_tree_params)
+
+    optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler})
+    tune_res = dml_did_cs.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True)
+
+    dml_did_cs.fit()
+    tuned_score = dml_did_cs.evaluate_learners()
+
+    for learner_name in dml_did_cs.params_names:
+        tuned_params = tune_res[0][learner_name].best_params
+        _assert_tree_params(tuned_params)
+
+        # ensure tuning improved RMSE
+        assert tuned_score[learner_name] < untuned_score[learner_name]
diff --git a/doubleml/did/tests/test_did_tune_ml_models.py b/doubleml/did/tests/test_did_tune_ml_models.py
new file mode 100644
index 00000000..b0ba76cb
--- /dev/null
+++ b/doubleml/did/tests/test_did_tune_ml_models.py
@@ -0,0 +1,47 @@
+import numpy as np
+import pytest
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+
+import doubleml as dml
+from doubleml.did.datasets import make_did_SZ2020
+from doubleml.tests._utils_tune_optuna import (
+    _SAMPLER_CASES,
+    _assert_tree_params,
+    _basic_optuna_settings,
+    _build_param_space,
+    _small_tree_params,
+)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
+@pytest.mark.parametrize("score", ["observational", "experimental"])
+def test_doubleml_did_optuna_tune(sampler_name, optuna_sampler, score):
+    """Test DID with ml_g0, ml_g1 (and ml_m for observational score) nuisance models."""
+
+    np.random.seed(3150)
+    dml_data = make_did_SZ2020(n_obs=500, dgp_type=4, return_type="DoubleMLDIDData")
+
+    ml_g = DecisionTreeRegressor(random_state=321, max_depth=1)  # underfit
+    if score == "observational":
+        ml_m = DecisionTreeClassifier(random_state=654)
+        dml_did = dml.DoubleMLDID(dml_data, ml_g, ml_m, score=score, n_folds=5)
+    else:
+        dml_did = dml.DoubleMLDID(dml_data, ml_g, score=score, n_folds=5)
+    dml_did.fit()
+    untuned_score = dml_did.evaluate_learners()
+
+    optuna_params = _build_param_space(dml_did, _small_tree_params)
+
+    optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler})
+    tune_res = dml_did.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True)
+
+    dml_did.fit()
+    tuned_score = dml_did.evaluate_learners()
+
+    for learner_name in dml_did.params_names:
+        tuned_params = tune_res[0][learner_name].best_params
+        _assert_tree_params(tuned_params)
+
+        # ensure tuning improved RMSE
+        assert tuned_score[learner_name] < untuned_score[learner_name]
diff --git a/doubleml/double_ml.py b/doubleml/double_ml.py
index 6293731a..eb119615 100644
--- a/doubleml/double_ml.py
+++ b/doubleml/double_ml.py
@@ -14,6 +14,7 @@
 from doubleml.utils._checks import _check_external_predictions
 from doubleml.utils._estimation import _aggregate_coefs_and_ses, _rmse, _set_external_predictions, _var_est
 from doubleml.utils._sensitivity import _compute_sensitivity_bias
+from doubleml.utils._tune_optuna import OPTUNA_GLOBAL_SETTING_KEYS, TUNE_ML_MODELS_DOC, resolve_optuna_cv
 from doubleml.utils.gain_statistics import gain_statistics
 
 _implemented_data_backends = ["DoubleMLData", "DoubleMLClusterData", "DoubleMLDIDData", "DoubleMLSSMData", "DoubleMLRDDData"]
@@ -765,6 +766,11 @@ def tune(
         """
         Hyperparameter-tuning for DoubleML models.
 
+        .. deprecated::  0.13.0
+           The ``tune`` method using grid/randomized search is maintained for backward compatibility.
+           For more efficient hyperparameter optimization, use :meth:`tune_ml_models` with Optuna,
+           which provides Bayesian optimization and better performance.
+
         The hyperparameter-tuning is performed using either an exhaustive search over specified parameter values
         implemented in :class:`sklearn.model_selection.GridSearchCV` or via a randomized search implemented in
         :class:`sklearn.model_selection.RandomizedSearchCV`.
@@ -818,6 +824,16 @@ def tune(
             A list containing detailed tuning results and the proposed hyperparameters.
             Returned if ``return_tune_res`` is ``True``.
         """
+        # Deprecation warning for the tune method
+        warnings.warn(
+            "The 'tune' method using grid search or randomized search is maintained for backward compatibility. "
+            "It will be removed in future versions. "
+            "For more advanced hyperparameter optimization, consider using 'tune_ml_models' with Optuna, "
+            "which offers Bayesian optimization and is generally more efficient. "
+            "See the documentation for 'tune_ml_models' for usage examples.",
+            FutureWarning,
+            stacklevel=2,
+        )
 
         if (not isinstance(param_grids, dict)) | (not all(k in param_grids for k in self.learner_names)):
             raise ValueError(
@@ -934,6 +950,164 @@ def tune(
         else:
             return self
 
+    def tune_ml_models(
+        self,
+        ml_param_space,
+        scoring_methods=None,
+        cv=5,
+        set_as_params=True,
+        return_tune_res=False,
+        optuna_settings=None,
+    ):
+        """Hyperparameter-tuning for DoubleML models using Optuna."""
+
+        # Validation
+
+        expanded_param_space = self._validate_optuna_param_space(ml_param_space)
+        scoring_methods = self._resolve_scoring_methods(scoring_methods)
+        cv_splitter = resolve_optuna_cv(cv)
+        self._validate_optuna_setting_keys(optuna_settings)
+
+        if not isinstance(set_as_params, bool):
+            raise TypeError(f"set_as_params must be True or False. Got {str(set_as_params)}.")
+
+        if not isinstance(return_tune_res, bool):
+            raise TypeError(f"return_tune_res must be True or False. Got {str(return_tune_res)}.")
+
+        # Optuna tuning is always global (not fold-specific)
+        tuning_res = [None] * self._dml_data.n_treat
+
+        for i_d in range(self._dml_data.n_treat):
+            self._i_treat = i_d
+            # this step could be skipped for the single treatment variable case
+            if self._dml_data.n_treat > 1:
+                self._dml_data.set_x_d(self._dml_data.d_cols[i_d])
+
+            # tune hyperparameters (globally, not fold-specific)
+            res = self._nuisance_tuning_optuna(
+                expanded_param_space,
+                scoring_methods,
+                cv_splitter,
+                optuna_settings,
+            )
+
+            tuning_res[i_d] = res
+            if set_as_params:
+                for nuisance_model, tuned_result in res.items():
+                    if tuned_result is None:
+                        params_to_set = None
+                    else:
+                        params_to_set = tuned_result.best_params
+
+                    self.set_ml_nuisance_params(nuisance_model, self._dml_data.d_cols[i_d], params_to_set)
+
+        return tuning_res if return_tune_res else self
+
+    tune_ml_models.__doc__ = TUNE_ML_MODELS_DOC
+
+    def _resolve_scoring_methods(self, scoring_methods):
+        """Resolve scoring methods to ensure all learners have an entry."""
+
+        if scoring_methods is None:
+            return None
+
+        if not isinstance(scoring_methods, dict):
+            raise TypeError("scoring_methods must be provided as a dictionary keyed by learner name.")
+
+        invalid_scoring_keys = [key for key in scoring_methods if key not in self.params_names]
+        if invalid_scoring_keys:
+            raise ValueError(
+                "Invalid scoring_methods keys for "
+                + self.__class__.__name__
+                + ": "
+                + ", ".join(sorted(invalid_scoring_keys))
+                + ". Valid keys are: "
+                + ", ".join(self.params_names)
+                + "."
+            )
+
+        resolved = dict(scoring_methods)
+        for learner_name in self.params_names:
+            resolved.setdefault(learner_name, None)
+
+        return resolved
+
+    def _validate_optuna_setting_keys(self, optuna_settings):
+        """Validate learner-level keys provided in optuna_settings."""
+
+        if optuna_settings is not None and not isinstance(optuna_settings, dict):
+            raise TypeError(f"optuna_settings must be a dict or None. Got {str(type(optuna_settings))}.")
+
+        if not optuna_settings:
+            return
+
+        allowed_learner_keys = set(self.params_names) | set(self.learner_names)
+        invalid_keys = [
+            key for key in optuna_settings if key not in OPTUNA_GLOBAL_SETTING_KEYS and key not in allowed_learner_keys
+        ]
+
+        if invalid_keys:
+            if allowed_learner_keys:
+                valid_keys_msg = ", ".join(sorted(allowed_learner_keys))
+            else:
+                valid_keys_msg = "<none>"
+            raise ValueError(
+                "Invalid optuna_settings keys for "
+                + self.__class__.__name__
+                + ": "
+                + ", ".join(sorted(invalid_keys))
+                + ". Valid learner-specific keys are: "
+                + valid_keys_msg
+                + "."
+            )
+
+        for key in allowed_learner_keys:
+            if key in optuna_settings and not isinstance(optuna_settings[key], dict):
+                raise TypeError(f"Optuna settings for '{key}' must be a dict.")
+
+    def _validate_optuna_param_space(self, ml_param_space):
+        """Validate learner keys provided in the Optuna parameter space dictionary."""
+
+        if not isinstance(ml_param_space, dict) or not ml_param_space:
+            raise ValueError("ml_param_space must be a non-empty dictionary.")
+
+        allowed_param_keys = set(self.params_names) | set(self.learner_names)
+        invalid_keys = [key for key in ml_param_space if key not in allowed_param_keys]
+
+        if invalid_keys:
+            valid_keys_msg = ", ".join(sorted(allowed_param_keys)) if allowed_param_keys else "<none>"
+            raise ValueError(
+                "Invalid ml_param_space keys for "
+                + self.__class__.__name__
+                + ": "
+                + ", ".join(sorted(invalid_keys))
+                + ". Valid keys are: "
+                + valid_keys_msg
+                + "."
+            )
+        final_param_space = {k: None for k in self.params_names}
+
+        # Validate that all parameter spaces are callables
+        for learner_name, param_fn in ml_param_space.items():
+            if param_fn is None:
+                continue
+            if not callable(param_fn):
+                raise TypeError(
+                    f"Parameter space for '{learner_name}' must be a callable function that takes a trial "
+                    f"and returns a dict. Got {type(param_fn).__name__}. "
+                    f"Example: def ml_params(trial): return {{'lr': trial.suggest_float('lr', 0.01, 0.1)}}"
+                )
+
+        # Set Hyperparameter spaces for learners (global / learner_name level)
+        for learner_name in [ln for ln in self.learner_names if ln in ml_param_space.keys()]:
+            for param_key in [pk for pk in self.params_names if learner_name in pk]:
+                final_param_space[param_key] = ml_param_space[learner_name]
+        # Override if param_name specific space is provided
+        for param_key in [pk for pk in self.params_names if pk in ml_param_space.keys()]:
+            final_param_space[param_key] = ml_param_space[param_key]
+
+        return final_param_space
+
     def set_ml_nuisance_params(self, learner, treat_var, params):
         """
         Set hyperparameters for the nuisance models of DoubleML models.
@@ -1011,10 +1185,32 @@ def _nuisance_est(self, smpls, n_jobs_cv, return_models, external_predictions):
 
     @abstractmethod
     def _nuisance_tuning(
-        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+        self,
+        smpls,
+        param_grids,
+        scoring_methods,
+        n_folds_tune,
+        n_jobs_cv,
+        search_mode,
+        n_iter_randomized_search,
     ):
         pass
 
+    @abstractmethod
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+        """
+        Optuna-based hyperparameter tuning hook.
+
+        Subclasses should override this method to provide Optuna tuning support.
+        """
+        raise NotImplementedError(f"Optuna tuning not implemented for {self.__class__.__name__}.")
+
     @staticmethod
     def _check_learner(learner, learner_name, regressor, classifier):
         err_msg_prefix = f"Invalid learner provided for {learner_name}: "
@@ -1246,8 +1442,8 @@ def evaluate_learners(self, learners=None, metric=_rmse):
         >>> from doubleml.irm.datasets import make_irm_data
         >>> from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
         >>> np.random.seed(3141)
-        >>> ml_g = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
-        >>> ml_m = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2)
+        >>> ml_g = RandomForestRegressor(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2, random_state=42)
+        >>> ml_m = RandomForestClassifier(n_estimators=100, max_features=20, max_depth=5, min_samples_leaf=2, random_state=42)
         >>> data = make_irm_data(theta=0.5, n_obs=500, dim_x=20, return_type='DataFrame')
         >>> obj_dml_data = dml.DoubleMLData(data, 'y', 'd')
         >>> dml_irm_obj = dml.DoubleMLIRM(obj_dml_data, ml_g, ml_m)
diff --git a/doubleml/irm/apo.py b/doubleml/irm/apo.py
index 7441d112..9248c88d 100644
--- a/doubleml/irm/apo.py
+++ b/doubleml/irm/apo.py
@@ -15,6 +15,7 @@
 )
 from doubleml.utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls
 from doubleml.utils._propensity_score import _propensity_score_adjustment
+from doubleml.utils._tune_optuna import _dml_tune_optuna
 from doubleml.utils.blp import DoubleMLBLP
 from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
@@ -457,6 +458,75 @@ def _nuisance_tuning(
 
         return res
 
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
+        dx = np.column_stack((d, x))
+        treated_indicator = self.treated.astype(bool)
+
+        if scoring_methods is None:
+            scoring_methods = {"ml_g_d_lvl0": None, "ml_g_d_lvl1": None, "ml_m": None}
+
+        mask_lvl1 = treated_indicator
+        mask_lvl0 = np.logical_not(mask_lvl1)
+
+        dx_lvl0 = dx[mask_lvl0, :]
+        y_lvl0 = y[mask_lvl0]
+        g_lvl0_param_grid = optuna_params["ml_g_d_lvl0"]
+        g_lvl0_scoring = scoring_methods["ml_g_d_lvl0"]
+        g_d_lvl0_tune_res = _dml_tune_optuna(
+            y_lvl0,
+            dx_lvl0,
+            self._learner["ml_g"],
+            g_lvl0_param_grid,
+            g_lvl0_scoring,
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g_d_lvl0",
+        )
+
+        x_lvl1 = x[mask_lvl1, :]
+        y_lvl1 = y[mask_lvl1]
+        g_lvl1_param_grid = optuna_params["ml_g_d_lvl1"]
+        g_lvl1_scoring = scoring_methods["ml_g_d_lvl1"]
+        g_d_lvl1_tune_res = _dml_tune_optuna(
+            y_lvl1,
+            x_lvl1,
+            self._learner["ml_g"],
+            g_lvl1_param_grid,
+            g_lvl1_scoring,
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g_d_lvl1",
+        )
+
+        m_tune_res = _dml_tune_optuna(
+            treated_indicator.astype(float),
+            x,
+            self._learner["ml_m"],
+            optuna_params["ml_m"],
+            scoring_methods["ml_m"],
+            cv,
+            optuna_settings,
+            learner_name="ml_m",
+            params_name="ml_m",
+        )
+
+        return {
+            "ml_g_d_lvl0": g_d_lvl0_tune_res,
+            "ml_g_d_lvl1": g_d_lvl1_tune_res,
+            "ml_m": m_tune_res,
+        }
+
     def _check_data(self, obj_dml_data):
         if len(obj_dml_data.d_cols) > 1:
             raise ValueError(
diff --git a/doubleml/irm/apos.py b/doubleml/irm/apos.py
index 23e7085e..848c87a6 100644
--- a/doubleml/irm/apos.py
+++ b/doubleml/irm/apos.py
@@ -16,12 +16,75 @@
 from doubleml.utils._checks import _check_score, _check_weights
 from doubleml.utils._descriptive import generate_summary
 from doubleml.utils._sensitivity import _compute_sensitivity_bias
+from doubleml.utils._tune_optuna import TUNE_ML_MODELS_DOC
 from doubleml.utils.gain_statistics import gain_statistics
 from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
 class DoubleMLAPOS(SampleSplittingMixin):
-    """Double machine learning for interactive regression models with multiple discrete treatments."""
+    """Double machine learning for interactive regression models with multiple discrete
+    treatments.
+
+    Parameters
+    ----------
+    obj_dml_data : :class:`DoubleMLData` object
+        The :class:`DoubleMLData` object providing the data and specifying the variables for the causal model.
+
+    ml_g : estimator implementing ``fit()`` and ``predict()``
+        A machine learner implementing ``fit()`` and ``predict()`` methods (e.g.
+        :py:class:`sklearn.ensemble.RandomForestRegressor`) for the nuisance function :math:`g_0(D, X) = E[Y | X, D]`.
+        For a binary outcome variable :math:`Y` (with values 0 and 1), a classifier implementing ``fit()`` and
+        ``predict_proba()`` can also be specified. If :py:func:`sklearn.base.is_classifier` returns ``True``,
+        ``predict_proba()`` is used otherwise ``predict()``.
+
+    ml_m : classifier implementing ``fit()`` and ``predict_proba()``
+        A machine learner implementing ``fit()`` and ``predict_proba()`` methods (e.g.
+        :py:class:`sklearn.ensemble.RandomForestClassifier`) for the nuisance function :math:`m_0(X) = E[D | X]`.
+
+    treatment_levels : iterable of int or float
+        The treatment levels for which average potential outcomes are evaluated. Each element must be present in the
+        treatment variable ``d`` of ``obj_dml_data``.
+
+    n_folds : int
+        Number of folds.
+        Default is ``5``.
+
+    n_rep : int
+        Number of repetitions for the sample splitting.
+        Default is ``1``.
+
+    score : str
+        A str (``'APO'``) specifying the score function.
+        Default is ``'APO'``.
+
+    weights : array, dict or None
+        A numpy array of weights for each individual observation. If ``None``, then the ``'APO'`` score
+        is applied (corresponds to weights equal to 1).
+        An array has to be of shape ``(n,)``, where ``n`` is the number of observations.
+        A dictionary can be used to specify weights which depend on the treatment variable.
+        In this case, the dictionary has to contain two keys ``weights`` and ``weights_bar``, where the values
+        have to be arrays of shape ``(n,)`` and ``(n, n_rep)``.
+        Default is ``None``.
+
+    normalize_ipw : bool
+        Indicates whether the inverse probability weights are normalized.
+        Default is ``False``.
+
+    trimming_rule : str, optional, deprecated
+        (DEPRECATED) A str (``'truncate'`` is the only choice) specifying the trimming approach.
+        Use ``ps_processor_config`` instead. Will be removed in a future version.
+
+    trimming_threshold : float, optional, deprecated
+        (DEPRECATED) The threshold used for trimming.
+        Use ``ps_processor_config`` instead. Will be removed in a future version.
+
+    ps_processor_config : PSProcessorConfig, optional
+        Configuration for propensity score processing (clipping, calibration, etc.).
+
+    draw_sample_splitting : bool
+        Indicates whether the sample splitting should be drawn during initialization of the object.
+        Default is ``True``.
+    """
 
     def __init__(
         self,
@@ -863,3 +926,32 @@ def _initialize_models(self):
             modellist[i_level] = model
 
         return modellist
+
+    def tune_ml_models(
+        self,
+        ml_param_space,
+        scoring_methods=None,
+        cv=5,
+        set_as_params=True,
+        return_tune_res=False,
+        optuna_settings=None,
+    ):
+        """Hyperparameter-tuning for DoubleML models using Optuna."""
+
+        tuning_kwargs = {
+            "ml_param_space": ml_param_space,
+            "scoring_methods": scoring_methods,
+            "cv": cv,
+            "set_as_params": set_as_params,
+            "return_tune_res": return_tune_res,
+            "optuna_settings": optuna_settings,
+        }
+
+        tune_res = [] if return_tune_res else None
+        for model in self._modellist:
+            res = model.tune_ml_models(**tuning_kwargs)
+            if return_tune_res:
+                tune_res.append(res[0])
+        return tune_res if return_tune_res else self
+
+    tune_ml_models.__doc__ = TUNE_ML_MODELS_DOC
diff --git a/doubleml/irm/cvar.py b/doubleml/irm/cvar.py
index 6c698293..cd32756e 100644
--- a/doubleml/irm/cvar.py
+++ b/doubleml/irm/cvar.py
@@ -25,6 +25,7 @@
     _solve_ipw_score,
 )
 from doubleml.utils._propensity_score import _normalize_ipw
+from doubleml.utils._tune_optuna import _dml_tune_optuna
 from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
@@ -413,6 +414,54 @@ def _nuisance_tuning(
 
         return res
 
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
+
+        if scoring_methods is None:
+            scoring_methods = {"ml_g": None, "ml_m": None}
+
+        mask_treat = d == self.treatment
+
+        quantile_approx = np.quantile(y[mask_treat], self.quantile)
+        g_target_1 = np.full_like(y, quantile_approx, dtype=float)
+        g_target_2 = (y - self.quantile * quantile_approx) / (1 - self.quantile)
+        g_target_approx = np.maximum(g_target_1, g_target_2)
+
+        x_treat = x[mask_treat, :]
+        target_treat = g_target_approx[mask_treat]
+        g_tune_res = _dml_tune_optuna(
+            target_treat,
+            x_treat,
+            self._learner["ml_g"],
+            optuna_params["ml_g"],
+            scoring_methods["ml_g"],
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g",
+        )
+
+        m_tune_res = _dml_tune_optuna(
+            d,
+            x,
+            self._learner["ml_m"],
+            optuna_params["ml_m"],
+            scoring_methods["ml_m"],
+            cv,
+            optuna_settings,
+            learner_name="ml_m",
+            params_name="ml_m",
+        )
+        return {"ml_g": g_tune_res, "ml_m": m_tune_res}
+
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
             raise TypeError(
diff --git a/doubleml/irm/iivm.py b/doubleml/irm/iivm.py
index 50513c0f..4f0c59e9 100644
--- a/doubleml/irm/iivm.py
+++ b/doubleml/irm/iivm.py
@@ -17,6 +17,7 @@
 )
 from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls, _solve_quadratic_inequality
 from doubleml.utils._propensity_score import _normalize_ipw
+from doubleml.utils._tune_optuna import _dml_tune_optuna
 from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
@@ -593,6 +594,112 @@ def _nuisance_tuning(
 
         return res
 
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+        """
+        Optuna-based hyperparameter tuning for IIVM nuisance models.
+
+        Performs tuning once on the whole dataset using cross-validation,
+        returning the same optimal parameters for all folds.
+        """
+
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
+        x, z = check_X_y(x, np.ravel(self._dml_data.z), ensure_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
+
+        if scoring_methods is None:
+            scoring_methods = {"ml_g0": None, "ml_g1": None, "ml_m": None, "ml_r0": None, "ml_r1": None}
+
+        # Separate data by instrument status for conditional mean tuning
+        mask_z0 = z == 0
+        mask_z1 = z == 1
+
+        x_z0 = x[mask_z0, :]
+        y_z0 = y[mask_z0]
+        g0_tune_res = _dml_tune_optuna(
+            y_z0,
+            x_z0,
+            self._learner["ml_g"],
+            optuna_params["ml_g0"],
+            scoring_methods["ml_g0"],
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g0",
+        )
+
+        x_z1 = x[mask_z1, :]
+        y_z1 = y[mask_z1]
+        g1_tune_res = _dml_tune_optuna(
+            y_z1,
+            x_z1,
+            self._learner["ml_g"],
+            optuna_params["ml_g1"],
+            scoring_methods["ml_g1"],
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g1",
+        )
+
+        # Tune propensity score on full dataset
+        m_tune_res = _dml_tune_optuna(
+            z,
+            x,
+            self._learner["ml_m"],
+            optuna_params["ml_m"],
+            scoring_methods["ml_m"],
+            cv,
+            optuna_settings,
+            learner_name="ml_m",
+            params_name="ml_m",
+        )
+
+        r0_tune_res = None
+        r1_tune_res = None
+        if self.subgroups["always_takers"]:
+            d_z0 = d[mask_z0]
+            r0_tune_res = _dml_tune_optuna(
+                d_z0,
+                x_z0,
+                self._learner["ml_r"],
+                optuna_params["ml_r0"],
+                scoring_methods["ml_r0"],
+                cv,
+                optuna_settings,
+                learner_name="ml_r",
+                params_name="ml_r0",
+            )
+
+        if self.subgroups["never_takers"]:
+            d_z1 = d[mask_z1]
+            r1_tune_res = _dml_tune_optuna(
+                d_z1,
+                x_z1,
+                self._learner["ml_r"],
+                optuna_params["ml_r1"],
+                scoring_methods["ml_r1"],
+                cv,
+                optuna_settings,
+                learner_name="ml_r",
+                params_name="ml_r1",
+            )
+
+        results = {
+            "ml_g0": g0_tune_res,
+            "ml_g1": g1_tune_res,
+            "ml_m": m_tune_res,
+            "ml_r0": r0_tune_res,
+            "ml_r1": r1_tune_res,
+        }
+
+        return results
+
     def _sensitivity_element_est(self, preds):
         pass
 
diff --git a/doubleml/irm/irm.py b/doubleml/irm/irm.py
index f5abdbd9..16db426b 100644
--- a/doubleml/irm/irm.py
+++ b/doubleml/irm/irm.py
@@ -18,6 +18,7 @@
 )
 from doubleml.utils._estimation import _cond_targets, _dml_cv_predict, _dml_tune, _get_cond_smpls
 from doubleml.utils._propensity_score import _propensity_score_adjustment
+from doubleml.utils._tune_optuna import _dml_tune_optuna
 from doubleml.utils.blp import DoubleMLBLP
 from doubleml.utils.policytree import DoubleMLPolicyTree
 from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
@@ -494,6 +495,72 @@ def _nuisance_tuning(
 
         return res
 
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+        """
+        Optuna-based hyperparameter tuning for IRM nuisance models.
+
+        Performs tuning once on the whole dataset using cross-validation,
+        returning the same optimal parameters for all folds.
+        """
+
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
+
+        if scoring_methods is None:
+            scoring_methods = {"ml_g0": None, "ml_g1": None, "ml_m": None}
+
+        # Separate data by treatment status for conditional mean tuning
+        mask_d0 = d == 0
+        mask_d1 = d == 1
+
+        x_d0 = x[mask_d0, :]
+        y_d0 = y[mask_d0]
+        g0_tune_res = _dml_tune_optuna(
+            y_d0,
+            x_d0,
+            self._learner["ml_g"],
+            optuna_params["ml_g0"],
+            scoring_methods["ml_g0"],
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g0",
+        )
+
+        x_d1 = x[mask_d1, :]
+        y_d1 = y[mask_d1]
+        g1_tune_res = _dml_tune_optuna(
+            y_d1,
+            x_d1,
+            self._learner["ml_g"],
+            optuna_params["ml_g1"],
+            scoring_methods["ml_g1"],
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g1",
+        )
+
+        # Tune propensity score on full dataset
+        m_tune_res = _dml_tune_optuna(
+            d,
+            x,
+            self._learner["ml_m"],
+            optuna_params["ml_m"],
+            scoring_methods["ml_m"],
+            cv,
+            optuna_settings,
+            learner_name="ml_m",
+            params_name="ml_m",
+        )
+        return {"ml_g0": g0_tune_res, "ml_g1": g1_tune_res, "ml_m": m_tune_res}
+
     def cate(self, basis, is_gate=False, **kwargs):
         """
         Calculate conditional average treatment effects (CATE) for a given basis.
diff --git a/doubleml/irm/lpq.py b/doubleml/irm/lpq.py
index 5dd8ff37..4301dfda 100644
--- a/doubleml/irm/lpq.py
+++ b/doubleml/irm/lpq.py
@@ -21,6 +21,7 @@
     _solve_ipw_score,
 )
 from doubleml.utils._propensity_score import _normalize_ipw
+from doubleml.utils._tune_optuna import _dml_tune_optuna
 from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
@@ -694,6 +695,105 @@ def _nuisance_tuning(
 
         return res
 
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
+        x, z = check_X_y(x, np.ravel(self._dml_data.z), ensure_all_finite=False)
+
+        if scoring_methods is None:
+            scoring_methods = {
+                "ml_m_z": None,
+                "ml_m_d_z0": None,
+                "ml_m_d_z1": None,
+                "ml_g_du_z0": None,
+                "ml_g_du_z1": None,
+            }
+
+        approx_quant = np.quantile(y[d == self.treatment], self.quantile)
+        du = (d == self.treatment) * (y <= approx_quant)
+
+        m_z_tune_res = _dml_tune_optuna(
+            z,
+            x,
+            self._learner["ml_m_z"],
+            optuna_params["ml_m_z"],
+            scoring_methods["ml_m_z"],
+            cv,
+            optuna_settings,
+            learner_name="ml_m",
+            params_name="ml_m_z",
+        )
+
+        mask_z0 = z == 0
+        mask_z1 = z == 1
+
+        x_z0 = x[mask_z0, :]
+        d_z0 = d[mask_z0]
+        du_z0 = du[mask_z0]
+        m_d_z0_tune_res = _dml_tune_optuna(
+            d_z0,
+            x_z0,
+            self._learner["ml_m_d_z0"],
+            optuna_params["ml_m_d_z0"],
+            scoring_methods["ml_m_d_z0"],
+            cv,
+            optuna_settings,
+            learner_name="ml_m",
+            params_name="ml_m_d_z0",
+        )
+        g_du_z0_tune_res = _dml_tune_optuna(
+            du_z0,
+            x_z0,
+            self._learner["ml_g_du_z0"],
+            optuna_params["ml_g_du_z0"],
+            scoring_methods["ml_g_du_z0"],
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g_du_z0",
+        )
+
+        x_z1 = x[mask_z1, :]
+        d_z1 = d[mask_z1]
+        du_z1 = du[mask_z1]
+        m_d_z1_tune_res = _dml_tune_optuna(
+            d_z1,
+            x_z1,
+            self._learner["ml_m_d_z1"],
+            optuna_params["ml_m_d_z1"],
+            scoring_methods["ml_m_d_z1"],
+            cv,
+            optuna_settings,
+            learner_name="ml_m",
+            params_name="ml_m_d_z1",
+        )
+        g_du_z1_tune_res = _dml_tune_optuna(
+            du_z1,
+            x_z1,
+            self._learner["ml_g_du_z1"],
+            optuna_params["ml_g_du_z1"],
+            scoring_methods["ml_g_du_z1"],
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g_du_z1",
+        )
+
+        return {
+            "ml_m_z": m_z_tune_res,
+            "ml_m_d_z0": m_d_z0_tune_res,
+            "ml_m_d_z1": m_d_z1_tune_res,
+            "ml_g_du_z0": g_du_z0_tune_res,
+            "ml_g_du_z1": g_du_z1_tune_res,
+        }
+
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
             raise TypeError(
diff --git a/doubleml/irm/pq.py b/doubleml/irm/pq.py
index 901c07b7..73e9e3d4 100644
--- a/doubleml/irm/pq.py
+++ b/doubleml/irm/pq.py
@@ -26,6 +26,7 @@
     _solve_ipw_score,
 )
 from doubleml.utils._propensity_score import _normalize_ipw
+from doubleml.utils._tune_optuna import _dml_tune_optuna
 from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
@@ -480,6 +481,50 @@ def _nuisance_tuning(
 
         return res
 
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
+
+        if scoring_methods is None:
+            scoring_methods = {"ml_g": None, "ml_m": None}
+
+        mask_treat = d == self.treatment
+        approx_goal = y <= np.quantile(y[mask_treat], self.quantile)
+
+        x_treat = x[mask_treat, :]
+        goal_treat = approx_goal[mask_treat]
+        g_tune_res = _dml_tune_optuna(
+            goal_treat,
+            x_treat,
+            self._learner["ml_g"],
+            optuna_params["ml_g"],
+            scoring_methods["ml_g"],
+            cv,
+            optuna_settings,
+            learner_name="ml_g",
+            params_name="ml_g",
+        )
+
+        m_tune_res = _dml_tune_optuna(
+            d,
+            x,
+            self._learner["ml_m"],
+            optuna_params["ml_m"],
+            scoring_methods["ml_m"],
+            cv,
+            optuna_settings,
+            learner_name="ml_m",
+            params_name="ml_m",
+        )
+        return {"ml_g": g_tune_res, "ml_m": m_tune_res}
+
     def _check_data(self, obj_dml_data):
         if not isinstance(obj_dml_data, DoubleMLData):
             raise TypeError(
diff --git a/doubleml/irm/qte.py b/doubleml/irm/qte.py
index c3325e08..c572001e 100644
--- a/doubleml/irm/qte.py
+++ b/doubleml/irm/qte.py
@@ -15,6 +15,7 @@
 from doubleml.utils._checks import _check_score, _check_zero_one_treatment
 from doubleml.utils._descriptive import generate_summary
 from doubleml.utils._estimation import _default_kde
+from doubleml.utils._tune_optuna import TUNE_ML_MODELS_DOC
 from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
@@ -536,6 +537,42 @@ def p_adjust(self, method="romano-wolf"):
 
         return p_val
 
+    def tune_ml_models(
+        self,
+        ml_param_space,
+        scoring_methods=None,
+        cv=5,
+        set_as_params=True,
+        return_tune_res=False,
+        optuna_settings=None,
+    ):
+        """Hyperparameter-tuning for DoubleML models using Optuna."""
+
+        tuning_kwargs = {
+            "ml_param_space": ml_param_space,
+            "scoring_methods": scoring_methods,
+            "cv": cv,
+            "set_as_params": set_as_params,
+            "return_tune_res": return_tune_res,
+            "optuna_settings": optuna_settings,
+        }
+
+        tune_res = [] if return_tune_res else None
+
+        for i_quant in range(self.n_quantiles):
+            model_0 = self.modellist_0[i_quant]
+            model_1 = self.modellist_1[i_quant]
+
+            res_0 = model_0.tune_ml_models(**tuning_kwargs)
+            res_1 = model_1.tune_ml_models(**tuning_kwargs)
+
+            if return_tune_res:
+                tune_res.append({"treatment_0": res_0[0], "treatment_1": res_1[0]})
+
+        return tune_res if return_tune_res else self
+
+    tune_ml_models.__doc__ = TUNE_ML_MODELS_DOC
+
     def _fit_quantile(self, i_quant, n_jobs_cv=None, store_predictions=True, store_models=False):
         model_0 = self.modellist_0[i_quant]
         model_1 = self.modellist_1[i_quant]
diff --git a/doubleml/irm/ssm.py b/doubleml/irm/ssm.py
index bc6cd739..9c1fd6ed 100644
--- a/doubleml/irm/ssm.py
+++ b/doubleml/irm/ssm.py
@@ -12,6 +12,7 @@
 from doubleml.double_ml_score_mixins import LinearScoreMixin
 from doubleml.utils._checks import _check_finite_predictions, _check_score
 from doubleml.utils._estimation import _dml_cv_predict, _dml_tune, _get_cond_smpls_2d, _predict_zero_one_propensity
+from doubleml.utils._tune_optuna import _dml_tune_optuna
 from doubleml.utils.propensity_score_processing import PSProcessorConfig, init_ps_processor
 
 
@@ -572,5 +573,99 @@ def filter_by_ds(inner_train1_inds, d, s):
 
         return {"params": params, "tune_res": tune_res}
 
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
+        x, s = check_X_y(x, self._dml_data.s, ensure_all_finite=False)
+
+        if scoring_methods is None:
+            scoring_methods = {
+                "ml_g_d0": None,
+                "ml_g_d1": None,
+                "ml_pi": None,
+                "ml_m": None,
+            }
+
+        def get_param_and_scoring(key):
+            return optuna_params[key], scoring_methods[key]
+
+        if self._score == "nonignorable":
+            raise NotImplementedError("Optuna tuning for nonignorable score is not implemented yet. ")
+        else:
+            mask_d0_s1 = np.logical_and(d == 0, s == 1)
+            mask_d1_s1 = np.logical_and(d == 1, s == 1)
+
+            g_d0_param, g_d0_scoring = get_param_and_scoring("ml_g_d0")
+            g_d1_param, g_d1_scoring = get_param_and_scoring("ml_g_d1")
+
+            x_d0 = x[mask_d0_s1, :]
+            y_d0 = y[mask_d0_s1]
+            g_d0_tune_res = _dml_tune_optuna(
+                y_d0,
+                x_d0,
+                self._learner["ml_g"],
+                g_d0_param,
+                g_d0_scoring,
+                cv,
+                optuna_settings,
+                learner_name="ml_g",
+                params_name="ml_g_d0",
+            )
+
+            x_d1 = x[mask_d1_s1, :]
+            y_d1 = y[mask_d1_s1]
+            g_d1_tune_res = _dml_tune_optuna(
+                y_d1,
+                x_d1,
+                self._learner["ml_g"],
+                g_d1_param,
+                g_d1_scoring,
+                cv,
+                optuna_settings,
+                learner_name="ml_g",
+                params_name="ml_g_d1",
+            )
+
+            x_d_feat = np.column_stack((x, d))
+            pi_tune_res = _dml_tune_optuna(
+                s,
+                x_d_feat,
+                self._learner["ml_pi"],
+                optuna_params["ml_pi"],
+                scoring_methods["ml_pi"],
+                cv,
+                optuna_settings,
+                learner_name="ml_pi",
+                params_name="ml_pi",
+            )
+
+            m_tune_res = _dml_tune_optuna(
+                d,
+                x,
+                self._learner["ml_m"],
+                optuna_params["ml_m"],
+                scoring_methods["ml_m"],
+                cv,
+                optuna_settings,
+                learner_name="ml_m",
+                params_name="ml_m",
+            )
+
+            results = {
+                "ml_g_d0": g_d0_tune_res,
+                "ml_g_d1": g_d1_tune_res,
+                "ml_pi": pi_tune_res,
+                "ml_m": m_tune_res,
+            }
+
+        return results
+
     def _sensitivity_element_est(self, preds):
         pass
diff --git a/doubleml/irm/tests/test_apo_tune_ml_models.py b/doubleml/irm/tests/test_apo_tune_ml_models.py
new file mode 100644
index 00000000..469cdcd9
--- /dev/null
+++ b/doubleml/irm/tests/test_apo_tune_ml_models.py
@@ -0,0 +1,43 @@
+import numpy as np
+import pytest
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+
+import doubleml as dml
+from doubleml.irm.datasets import make_irm_data
+from doubleml.tests._utils_tune_optuna import (
+    _SAMPLER_CASES,
+    _assert_tree_params,
+    _basic_optuna_settings,
+    _build_param_space,
+    _small_tree_params,
+)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
+def test_doubleml_apo_optuna_tune(sampler_name, optuna_sampler):
+    np.random.seed(3146)
+    theta = 0.5
+    dml_data = make_irm_data(n_obs=500, dim_x=6, theta=theta)
+
+    ml_g = DecisionTreeRegressor(random_state=321)
+    ml_m = DecisionTreeClassifier(random_state=654)
+
+    dml_apo = dml.DoubleMLAPO(dml_data, ml_g=ml_g, ml_m=ml_m, n_folds=2, treatment_level=1)
+    dml_apo.fit()
+    untuned_score = dml_apo.evaluate_learners()
+
+    optuna_params = _build_param_space(dml_apo, _small_tree_params)
+
+    optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler})
+    tune_res = dml_apo.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True)
+
+    dml_apo.fit()
+    tuned_score = dml_apo.evaluate_learners()
+
+    for learner_name in dml_apo.params_names:
+        tuned_params = tune_res[0][learner_name].best_params
+        _assert_tree_params(tuned_params)
+
+        # ensure tuning improved RMSE
+        assert tuned_score[learner_name] < untuned_score[learner_name]
diff --git a/doubleml/irm/tests/test_cvar_tune_ml_models.py b/doubleml/irm/tests/test_cvar_tune_ml_models.py
new file mode 100644
index 00000000..76dc89da
--- /dev/null
+++ b/doubleml/irm/tests/test_cvar_tune_ml_models.py
@@ -0,0 +1,44 @@
+import numpy as np
+import pytest
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+
+import doubleml as dml
+from doubleml.irm.datasets import make_irm_data
+from doubleml.tests._utils_tune_optuna import (
+    _SAMPLER_CASES,
+    _assert_tree_params,
+    _basic_optuna_settings,
+    _small_tree_params,
+)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
+def test_doubleml_cvar_optuna_tune(sampler_name, optuna_sampler):
+    np.random.seed(3145)
+    dml_data = make_irm_data(n_obs=200, dim_x=5)
+
+    ml_g = DecisionTreeRegressor(random_state=321)
+    ml_m = DecisionTreeClassifier(random_state=654)
+
+    dml_cvar = dml.DoubleMLCVAR(dml_data, ml_g=ml_g, ml_m=ml_m, n_folds=2)
+    dml_cvar.fit()
+    untuned_score = dml_cvar.evaluate_learners()
+
+    optuna_params = {"ml_g": _small_tree_params, "ml_m": _small_tree_params}
+
+    optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler})
+    tune_res = dml_cvar.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True)
+
+    dml_cvar.fit()
+    tuned_score = dml_cvar.evaluate_learners()
+
+    tuned_params_g = tune_res[0]["ml_g"].best_params
+    tuned_params_m = tune_res[0]["ml_m"].best_params
+
+    _assert_tree_params(tuned_params_g)
+    _assert_tree_params(tuned_params_m)
+
+    # ensure tuning improved RMSE
+    assert tuned_score["ml_g"] < untuned_score["ml_g"]
+    assert tuned_score["ml_m"] < untuned_score["ml_m"]
diff --git a/doubleml/irm/tests/test_datasets.py b/doubleml/irm/tests/test_datasets.py
index 79bf6794..9286e33a 100644
--- a/doubleml/irm/tests/test_datasets.py
+++ b/doubleml/irm/tests/test_datasets.py
@@ -131,6 +131,7 @@ def n_levels(request):
     return request.param
 
 
+@pytest.mark.ci
 def test_make_data_discrete_treatments(n_levels):
     np.random.seed(3141)
     n = 100
diff --git a/doubleml/irm/tests/test_iivm_tune_ml_models.py b/doubleml/irm/tests/test_iivm_tune_ml_models.py
new file mode 100644
index 00000000..40dc1b51
--- /dev/null
+++ b/doubleml/irm/tests/test_iivm_tune_ml_models.py
@@ -0,0 +1,62 @@
+import numpy as np
+import pytest
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+
+import doubleml as dml
+from doubleml.irm.datasets import make_iivm_data
+from doubleml.tests._utils_tune_optuna import (
+    _SAMPLER_CASES,
+    _assert_tree_params,
+    _basic_optuna_settings,
+    _small_tree_params,
+)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
+def test_doubleml_iivm_optuna_tune(sampler_name, optuna_sampler):
+    """Test IIVM with ml_g0, ml_g1, ml_m, ml_r0, ml_r1 nuisance models."""
+
+    np.random.seed(3143)
+    dml_data = make_iivm_data(n_obs=1000, dim_x=5)
+
+    ml_g = DecisionTreeRegressor(random_state=321)
+    ml_m = DecisionTreeClassifier(random_state=420)
+    ml_r = DecisionTreeClassifier(random_state=789)
+
+    dml_iivm = dml.DoubleMLIIVM(dml_data, ml_g, ml_m, ml_r, n_folds=2, subgroups={"always_takers": True, "never_takers": True})
+    dml_iivm.fit()
+    untuned_score = dml_iivm.evaluate_learners()
+
+    optuna_params = {
+        "ml_g0": _small_tree_params,
+        "ml_g1": _small_tree_params,
+        "ml_m": _small_tree_params,
+        "ml_r0": _small_tree_params,
+        "ml_r1": _small_tree_params,
+    }
+
+    optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler})
+    tune_res = dml_iivm.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True)
+
+    dml_iivm.fit()
+    tuned_score = dml_iivm.evaluate_learners()
+
+    tuned_params_g0 = tune_res[0]["ml_g0"].best_params
+    tuned_params_g1 = tune_res[0]["ml_g1"].best_params
+    tuned_params_m = tune_res[0]["ml_m"].best_params
+    tuned_params_r0 = tune_res[0]["ml_r0"].best_params
+    tuned_params_r1 = tune_res[0]["ml_r1"].best_params
+
+    _assert_tree_params(tuned_params_g0)
+    _assert_tree_params(tuned_params_g1)
+    _assert_tree_params(tuned_params_m)
+    _assert_tree_params(tuned_params_r0)
+    _assert_tree_params(tuned_params_r1)
+
+    # ensure tuning improved RMSE
+    assert tuned_score["ml_g0"] < untuned_score["ml_g0"]
+    assert tuned_score["ml_g1"] < untuned_score["ml_g1"]
+    assert tuned_score["ml_m"] < untuned_score["ml_m"]
+    assert tuned_score["ml_r0"] < untuned_score["ml_r0"]
+    assert tuned_score["ml_r1"] < untuned_score["ml_r1"]
diff --git a/doubleml/irm/tests/test_irm_tune_ml_models.py b/doubleml/irm/tests/test_irm_tune_ml_models.py
new file mode 100644
index 00000000..54242a02
--- /dev/null
+++ b/doubleml/irm/tests/test_irm_tune_ml_models.py
@@ -0,0 +1,48 @@
+import numpy as np
+import pytest
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+
+import doubleml as dml
+from doubleml.irm.datasets import make_irm_data
+from doubleml.tests._utils_tune_optuna import (
+    _SAMPLER_CASES,
+    _assert_tree_params,
+    _basic_optuna_settings,
+    _small_tree_params,
+)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
+def test_doubleml_irm_optuna_tune(sampler_name, optuna_sampler):
+    np.random.seed(3142)
+    dml_data = make_irm_data(n_obs=500, dim_x=5)
+
+    ml_g = DecisionTreeRegressor(random_state=321)
+    ml_m = DecisionTreeClassifier(random_state=654)
+
+    dml_irm = dml.DoubleMLIRM(dml_data, ml_g, ml_m, n_folds=2)
+    dml_irm.fit()
+    untuned_score = dml_irm.evaluate_learners()
+
+    optuna_params = {"ml_g0": _small_tree_params, "ml_g1": _small_tree_params, "ml_m": _small_tree_params}
+
+    optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler})
+
+    tune_res = dml_irm.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True)
+
+    dml_irm.fit()
+    tuned_score = dml_irm.evaluate_learners()
+
+    tuned_params_g0 = tune_res[0]["ml_g0"].best_params
+    tuned_params_g1 = tune_res[0]["ml_g1"].best_params
+    tuned_params_m = tune_res[0]["ml_m"].best_params
+
+    _assert_tree_params(tuned_params_g0)
+    _assert_tree_params(tuned_params_g1)
+    _assert_tree_params(tuned_params_m)
+
+    # ensure tuning improved RMSE
+    assert tuned_score["ml_g0"] < untuned_score["ml_g0"]
+    assert tuned_score["ml_g1"] < untuned_score["ml_g1"]
+    assert tuned_score["ml_m"] < untuned_score["ml_m"]
diff --git a/doubleml/irm/tests/test_irm_with_missings.py b/doubleml/irm/tests/test_irm_with_missings.py
index 838ea98a..5c2f7131 100644
--- a/doubleml/irm/tests/test_irm_with_missings.py
+++ b/doubleml/irm/tests/test_irm_with_missings.py
@@ -150,6 +150,7 @@ def test_dml_irm_w_missing_boot(dml_irm_w_missing_fixture):
         )
 
 
+@pytest.mark.ci
 def test_irm_exception_with_missings(generate_data_irm_w_missings, learner_sklearn):
     # collect data
     (x, y, d) = generate_data_irm_w_missings
diff --git a/doubleml/irm/tests/test_lpq_tune_ml_models.py b/doubleml/irm/tests/test_lpq_tune_ml_models.py
new file mode 100644
index 00000000..6db0aebc
--- /dev/null
+++ b/doubleml/irm/tests/test_lpq_tune_ml_models.py
@@ -0,0 +1,45 @@
+import numpy as np
+import pytest
+from sklearn.tree import DecisionTreeClassifier
+
+import doubleml as dml
+from doubleml.irm.datasets import make_iivm_data
+from doubleml.tests._utils_tune_optuna import (
+    _SAMPLER_CASES,
+    _assert_tree_params,
+    _basic_optuna_settings,
+    _build_param_space,
+    _small_tree_params,
+)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
+def test_doubleml_lpq_optuna_tune(sampler_name, optuna_sampler):
+    np.random.seed(3148)
+    dml_data = make_iivm_data(n_obs=500, dim_x=10)
+
+    ml_g = DecisionTreeClassifier(random_state=321)
+    ml_m = DecisionTreeClassifier(random_state=654)
+
+    dml_lpq = dml.DoubleMLLPQ(dml_data, ml_g=ml_g, ml_m=ml_m, n_folds=2)
+    dml_lpq.fit()
+    untuned_score = dml_lpq.nuisance_loss
+
+    optuna_params = _build_param_space(dml_lpq, _small_tree_params)
+
+    optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler})
+    optuna_settings["n_trials"] = 10
+    tune_res = dml_lpq.tune_ml_models(
+        ml_param_space=optuna_params, set_as_params=True, optuna_settings=optuna_settings, return_tune_res=True
+    )
+
+    dml_lpq.fit()
+    tuned_score = dml_lpq.nuisance_loss
+
+    for learner_name in dml_lpq.params_names:
+        tuned_params = tune_res[0][learner_name].best_params
+        _assert_tree_params(tuned_params)
+
+        # ensure tuning improved RMSE
+        assert tuned_score[learner_name] < untuned_score[learner_name]
diff --git a/doubleml/irm/tests/test_pq_tune_ml_models.py b/doubleml/irm/tests/test_pq_tune_ml_models.py
new file mode 100644
index 00000000..19759244
--- /dev/null
+++ b/doubleml/irm/tests/test_pq_tune_ml_models.py
@@ -0,0 +1,42 @@
+import numpy as np
+import pytest
+from sklearn.tree import DecisionTreeClassifier
+
+import doubleml as dml
+from doubleml.irm.datasets import make_irm_data
+from doubleml.tests._utils_tune_optuna import (
+    _SAMPLER_CASES,
+    _assert_tree_params,
+    _basic_optuna_settings,
+    _build_param_space,
+    _small_tree_params,
+)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
+def test_doubleml_pq_optuna_tune(sampler_name, optuna_sampler):
+    np.random.seed(3147)
+    dml_data = make_irm_data(n_obs=500, dim_x=10)
+
+    ml_g = DecisionTreeClassifier(random_state=321)
+    ml_m = DecisionTreeClassifier(random_state=654)
+
+    dml_pq = dml.DoubleMLPQ(dml_data, ml_g=ml_g, ml_m=ml_m, n_folds=2)
+    dml_pq.fit()
+    untuned_score = dml_pq.evaluate_learners()
+
+    optuna_params = _build_param_space(dml_pq, _small_tree_params)
+
+    optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler})
+    tune_res = dml_pq.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings, return_tune_res=True)
+
+    dml_pq.fit()
+    tuned_score = dml_pq.evaluate_learners()
+
+    for learner_name in dml_pq.params_names:
+        tuned_params = tune_res[0][learner_name].best_params
+        _assert_tree_params(tuned_params)
+
+        # ensure tuning improved RMSE
+        assert tuned_score[learner_name] < untuned_score[learner_name]
diff --git a/doubleml/irm/tests/test_ssm_tune_ml_models.py b/doubleml/irm/tests/test_ssm_tune_ml_models.py
new file mode 100644
index 00000000..6418ae0f
--- /dev/null
+++ b/doubleml/irm/tests/test_ssm_tune_ml_models.py
@@ -0,0 +1,64 @@
+import numpy as np
+import pytest
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+
+import doubleml as dml
+from doubleml.irm.datasets import make_ssm_data
+from doubleml.tests._utils_tune_optuna import (
+    _SAMPLER_CASES,
+    _assert_tree_params,
+    _basic_optuna_settings,
+    _build_param_space,
+    _small_tree_params,
+)
+
+
+# test NotImplementedError for nonignorable score
+@pytest.mark.ci
+def test_doubleml_ssm_optuna_tune_not_implemented():
+    np.random.seed(3149)
+    dml_data = make_ssm_data(n_obs=500, dim_x=10, mar=False)
+
+    ml_g = DecisionTreeRegressor(random_state=321)
+    ml_pi = DecisionTreeClassifier(random_state=654)
+    ml_m = DecisionTreeClassifier(random_state=987)
+
+    dml_ssm = dml.DoubleMLSSM(dml_data, ml_g=ml_g, ml_pi=ml_pi, ml_m=ml_m, n_folds=2, score="nonignorable")
+
+    optuna_params = _build_param_space(dml_ssm, _small_tree_params)
+    optuna_settings = _basic_optuna_settings({"sampler": "TPESampler"})
+
+    with pytest.raises(NotImplementedError, match="Optuna tuning for nonignorable score is not implemented yet."):
+        dml_ssm.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
+def test_doubleml_ssm_optuna_tune(sampler_name, optuna_sampler):
+    np.random.seed(3149)
+    dml_data = make_ssm_data(n_obs=500, dim_x=10, mar=True)
+
+    ml_g = DecisionTreeRegressor(random_state=321)
+    ml_pi = DecisionTreeClassifier(random_state=654)
+    ml_m = DecisionTreeClassifier(random_state=987)
+
+    dml_ssm = dml.DoubleMLSSM(dml_data, ml_g=ml_g, ml_pi=ml_pi, ml_m=ml_m, n_folds=2, score="missing-at-random")
+    dml_ssm.fit()
+    untuned_score = dml_ssm.evaluate_learners()
+
+    optuna_params = _build_param_space(dml_ssm, _small_tree_params)
+
+    optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler})
+    tune_res = dml_ssm.tune_ml_models(
+        ml_param_space=optuna_params, optuna_settings=optuna_settings, set_as_params=True, return_tune_res=True
+    )
+
+    dml_ssm.fit()
+    tuned_score = dml_ssm.evaluate_learners()
+
+    for learner_name in dml_ssm.params_names:
+        tuned_params = tune_res[0][learner_name].best_params
+        _assert_tree_params(tuned_params)
+
+        # ensure tuning improved RMSE
+        assert tuned_score[learner_name] < untuned_score[learner_name]
diff --git a/doubleml/plm/lplr.py b/doubleml/plm/lplr.py
index 1c674fef..ff578448 100644
--- a/doubleml/plm/lplr.py
+++ b/doubleml/plm/lplr.py
@@ -4,6 +4,7 @@
 import numpy as np
 import scipy
 from sklearn.base import clone
+from sklearn.model_selection import cross_val_predict
 from sklearn.utils import check_X_y
 from sklearn.utils.multiclass import type_of_target
 
@@ -15,6 +16,7 @@
     _dml_tune,
     _double_dml_cv_predict,
 )
+from doubleml.utils._tune_optuna import _dml_tune_optuna
 
 
 class DoubleMLLPLR(NonLinearScoreMixin, DoubleML):
@@ -423,8 +425,8 @@ def _sensitivity_element_est(self, preds):
     def _nuisance_tuning(
         self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
     ):
-        x, y = check_X_y(self._dml_data.x, self._dml_data.y, force_all_finite=False)
-        x, d = check_X_y(x, self._dml_data.d, force_all_finite=False)
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
         x_d_concat = np.hstack((d.reshape(-1, 1), x))
 
         if scoring_methods is None:
@@ -536,3 +538,102 @@ def _compute_score_deriv(self, psi_elements, coef, inds=None):
             deriv = -psi_elements["d"] * expit * (1 - expit) * psi_elements["d_tilde"]
 
         return deriv
+
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+        """
+        Optuna-based hyperparameter tuning for LPLR nuisance models.
+
+        Performs tuning once on the whole dataset using cross-validation,
+        returning the same optimal parameters for all folds.
+        """
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
+        x_d_concat = np.hstack((d.reshape(-1, 1), x))
+
+        if scoring_methods is None:
+            scoring_methods = {"ml_m": None, "ml_M": None, "ml_a": None, "ml_t": None}
+
+        M_tune_res = _dml_tune_optuna(
+            y,
+            x_d_concat,
+            self._learner["ml_M"],
+            optuna_params["ml_M"],
+            scoring_methods["ml_M"],
+            cv,
+            optuna_settings,
+            learner_name="ml_M",
+            params_name="ml_M",
+        )
+
+        if self.score == "nuisance_space":
+            mask_y0 = y == 0
+            outcome_ml_m = d[mask_y0]
+            features_ml_m = x[mask_y0, :]
+        elif self.score == "instrument":
+            outcome_ml_m = d
+            features_ml_m = x
+
+        m_tune_res = _dml_tune_optuna(
+            outcome_ml_m,
+            features_ml_m,
+            self._learner["ml_m"],
+            optuna_params["ml_m"],
+            scoring_methods["ml_m"],
+            cv,
+            optuna_settings,
+            learner_name="ml_m",
+            params_name="ml_m",
+        )
+
+        a_tune_res = _dml_tune_optuna(
+            d,
+            x,
+            self._learner["ml_a"],
+            optuna_params["ml_a"],
+            scoring_methods["ml_a"],
+            cv,
+            optuna_settings,
+            learner_name="ml_a",
+            params_name="ml_a",
+        )
+
+        # Create targets for tuning ml_t
+        # Unlike for inference in _nuisance_est, we do not use the double cross-fitting here and use a single model for
+        # predicting M_hat
+        # This presents a small risk of bias in the targets, but enables tuning without tune_on_folds=True
+
+        M_hat = cross_val_predict(
+            estimator=clone(M_tune_res.best_estimator),
+            X=x_d_concat,
+            y=y,
+            cv=cv,
+            method="predict_proba",
+        )[:, 1]
+        M_hat = np.clip(M_hat, 1e-8, 1 - 1e-8)
+        W_hat = scipy.special.logit(M_hat)
+
+        t_tune_res = _dml_tune_optuna(
+            W_hat,
+            x,
+            self._learner["ml_t"],
+            optuna_params["ml_t"],
+            scoring_methods["ml_t"],
+            cv,
+            optuna_settings,
+            learner_name="ml_t",
+            params_name="ml_t",
+        )
+
+        results = {
+            "ml_M": M_tune_res,
+            "ml_m": m_tune_res,
+            "ml_a": a_tune_res,
+            "ml_t": t_tune_res,
+        }
+        return results
diff --git a/doubleml/plm/pliv.py b/doubleml/plm/pliv.py
index e8fd0ed6..b7a384f0 100644
--- a/doubleml/plm/pliv.py
+++ b/doubleml/plm/pliv.py
@@ -3,14 +3,15 @@
 import numpy as np
 from sklearn.dummy import DummyRegressor
 from sklearn.linear_model import LinearRegression
-from sklearn.model_selection import GridSearchCV, KFold, RandomizedSearchCV
+from sklearn.model_selection import GridSearchCV, KFold, RandomizedSearchCV, cross_val_predict
 from sklearn.utils import check_X_y
 
-from ..data.base_data import DoubleMLData
-from ..double_ml import DoubleML
-from ..double_ml_score_mixins import LinearScoreMixin
-from ..utils._checks import _check_finite_predictions
-from ..utils._estimation import _dml_cv_predict, _dml_tune
+from doubleml.data.base_data import DoubleMLData
+from doubleml.double_ml import DoubleML
+from doubleml.double_ml_score_mixins import LinearScoreMixin
+from doubleml.utils._checks import _check_finite_predictions
+from doubleml.utils._estimation import _dml_cv_predict, _dml_tune
+from doubleml.utils._tune_optuna import _dml_tune_optuna
 
 
 class DoubleMLPLIV(LinearScoreMixin, DoubleML):
@@ -229,24 +230,79 @@ def _nuisance_est(self, smpls, n_jobs_cv, external_predictions, return_models=Fa
         return psi_elements, preds
 
     def _nuisance_tuning(
-        self, smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+        self,
+        smpls,
+        param_grids,
+        scoring_methods,
+        n_folds_tune,
+        n_jobs_cv,
+        search_mode,
+        n_iter_randomized_search,
     ):
         if self.partialX & (not self.partialZ):
             res = self._nuisance_tuning_partial_x(
-                smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+                smpls,
+                param_grids,
+                scoring_methods,
+                n_folds_tune,
+                n_jobs_cv,
+                search_mode,
+                n_iter_randomized_search,
             )
         elif (not self.partialX) & self.partialZ:
             res = self._nuisance_tuning_partial_z(
-                smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+                smpls,
+                param_grids,
+                scoring_methods,
+                n_folds_tune,
+                n_jobs_cv,
+                search_mode,
+                n_iter_randomized_search,
             )
         else:
             assert self.partialX & self.partialZ
             res = self._nuisance_tuning_partial_xz(
-                smpls, param_grids, scoring_methods, n_folds_tune, n_jobs_cv, search_mode, n_iter_randomized_search
+                smpls,
+                param_grids,
+                scoring_methods,
+                n_folds_tune,
+                n_jobs_cv,
+                search_mode,
+                n_iter_randomized_search,
             )
 
         return res
 
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+        if self.partialX & (not self.partialZ):
+            return self._nuisance_tuning_optuna_partial_x(
+                optuna_params,
+                scoring_methods,
+                cv,
+                optuna_settings,
+            )
+        elif (not self.partialX) & self.partialZ:
+            return self._nuisance_tuning_optuna_partial_z(
+                optuna_params,
+                scoring_methods,
+                cv,
+                optuna_settings,
+            )
+        else:
+            assert self.partialX & self.partialZ
+            return self._nuisance_tuning_optuna_partial_xz(
+                optuna_params,
+                scoring_methods,
+                cv,
+                optuna_settings,
+            )
+
     def _nuisance_est_partial_x(self, smpls, n_jobs_cv, external_predictions, return_models=False):
         x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
         x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
@@ -638,7 +694,7 @@ def _nuisance_tuning_partial_z(
             scoring_methods = {"ml_r": None}
 
         train_inds = [train_index for (train_index, _) in smpls]
-        m_tune_res = _dml_tune(
+        r_tune_res = _dml_tune(
             d,
             xz,
             train_inds,
@@ -651,11 +707,11 @@ def _nuisance_tuning_partial_z(
             n_iter_randomized_search,
         )
 
-        m_best_params = [xx.best_params_ for xx in m_tune_res]
+        r_best_params = [xx.best_params_ for xx in r_tune_res]
 
-        params = {"ml_r": m_best_params}
+        params = {"ml_r": r_best_params}
 
-        tune_res = {"r_tune": m_tune_res}
+        tune_res = {"r_tune": r_tune_res}
 
         res = {"params": params, "tune_res": tune_res}
 
@@ -733,5 +789,188 @@ def _nuisance_tuning_partial_xz(
 
         return res
 
+    def _nuisance_tuning_optuna_partial_x(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
+
+        if scoring_methods is None:
+            scoring_methods = {"ml_l": None, "ml_m": None, "ml_r": None, "ml_g": None}
+
+        l_tune_res = _dml_tune_optuna(
+            y,
+            x,
+            self._learner["ml_l"],
+            optuna_params["ml_l"],
+            scoring_methods["ml_l"],
+            cv,
+            optuna_settings,
+            learner_name="ml_l",
+            params_name="ml_l",
+        )
+
+        if self._dml_data.n_instr > 1:
+            m_tune_res = {}
+            z_all = self._dml_data.z
+            for i_instr, instr_var in enumerate(self._dml_data.z_cols):
+                x_instr, this_z = check_X_y(x, z_all[:, i_instr], ensure_all_finite=False)
+                scoring_key = scoring_methods.get(f"ml_m_{instr_var}", scoring_methods.get("ml_m"))
+                m_tune_res[instr_var] = _dml_tune_optuna(
+                    this_z,
+                    x_instr,
+                    self._learner["ml_m"],
+                    optuna_params[f"ml_m_{instr_var}"],
+                    scoring_key,
+                    cv,
+                    optuna_settings,
+                    learner_name="ml_m",
+                    params_name=f"ml_m_{instr_var}",
+                )
+            x_m_features = x  # keep reference for later when constructing params
+            z_vector = None
+        else:
+            x_m_features, z_vector = check_X_y(x, np.ravel(self._dml_data.z), ensure_all_finite=False)
+            m_tune_res = _dml_tune_optuna(
+                z_vector,
+                x_m_features,
+                self._learner["ml_m"],
+                optuna_params["ml_m"],
+                scoring_methods["ml_m"],
+                cv,
+                optuna_settings,
+                learner_name="ml_m",
+                params_name="ml_m",
+            )
+
+        r_tune_res = _dml_tune_optuna(
+            d,
+            x,
+            self._learner["ml_r"],
+            optuna_params["ml_r"],
+            scoring_methods["ml_r"],
+            cv,
+            optuna_settings,
+            learner_name="ml_r",
+            params_name="ml_r",
+        )
+
+        results = {"ml_l": l_tune_res, "ml_r": r_tune_res}
+
+        if self._dml_data.n_instr > 1:
+            for instr_var in self._dml_data.z_cols:
+                results["ml_m_" + instr_var] = m_tune_res[instr_var]
+        else:
+            results["ml_m"] = m_tune_res
+            if "ml_g" in self._learner:
+                l_hat = cross_val_predict(l_tune_res.best_estimator, x, y, cv=cv, method=self._predict_method["ml_l"])
+                m_hat = cross_val_predict(
+                    m_tune_res.best_estimator, x_m_features, z_vector, cv=cv, method=self._predict_method["ml_m"]
+                )
+                r_hat = cross_val_predict(r_tune_res.best_estimator, x, cv=cv, method=self._predict_method["ml_r"])
+                psi_a = -np.multiply(d - r_hat, z_vector - m_hat)
+                psi_b = np.multiply(z_vector - m_hat, y - l_hat)
+                theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
+
+                g_tune_res = _dml_tune_optuna(
+                    y - theta_initial * d,
+                    x,
+                    self._learner["ml_g"],
+                    optuna_params["ml_g"],
+                    scoring_methods["ml_g"],
+                    cv,
+                    optuna_settings,
+                    learner_name="ml_g",
+                    params_name="ml_g",
+                )
+
+                results["ml_g"] = g_tune_res
+
+        return results
+
+    def _nuisance_tuning_optuna_partial_z(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+
+        xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)), self._dml_data.d, ensure_all_finite=False)
+
+        if scoring_methods is None:
+            scoring_methods = {"ml_r": None}
+
+        r_tune_res = _dml_tune_optuna(
+            d,
+            xz,
+            self._learner["ml_r"],
+            optuna_params["ml_r"],
+            scoring_methods["ml_r"],
+            cv,
+            optuna_settings,
+            learner_name="ml_r",
+            params_name="ml_r",
+        )
+        return {"ml_r": r_tune_res}
+
+    def _nuisance_tuning_optuna_partial_xz(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
+        xz, d = check_X_y(np.hstack((self._dml_data.x, self._dml_data.z)), self._dml_data.d, ensure_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
+
+        if scoring_methods is None:
+            scoring_methods = {"ml_l": None, "ml_m": None, "ml_r": None}
+
+        l_tune_res = _dml_tune_optuna(
+            y,
+            x,
+            self._learner["ml_l"],
+            optuna_params["ml_l"],
+            scoring_methods["ml_l"],
+            cv,
+            optuna_settings,
+            learner_name="ml_l",
+            params_name="ml_l",
+        )
+
+        m_tune_res = _dml_tune_optuna(
+            d,
+            xz,
+            self._learner["ml_m"],
+            optuna_params["ml_m"],
+            scoring_methods["ml_m"],
+            cv,
+            optuna_settings,
+            learner_name="ml_m",
+            params_name="ml_m",
+        )
+
+        m_hat = cross_val_predict(m_tune_res.best_estimator, xz, d, cv=cv, method=self._predict_method["ml_m"])
+        r_tune_res = _dml_tune_optuna(
+            m_hat,
+            x,
+            self._learner["ml_r"],
+            optuna_params["ml_r"],
+            scoring_methods["ml_r"],
+            cv,
+            optuna_settings,
+            learner_name="ml_r",
+            params_name="ml_r",
+        )
+        return {"ml_l": l_tune_res, "ml_m": m_tune_res, "ml_r": r_tune_res}
+
     def _sensitivity_element_est(self, preds):
         pass
diff --git a/doubleml/plm/plr.py b/doubleml/plm/plr.py
index 9b28231f..e943d87b 100644
--- a/doubleml/plm/plr.py
+++ b/doubleml/plm/plr.py
@@ -3,14 +3,16 @@
 import numpy as np
 import pandas as pd
 from sklearn.base import clone
+from sklearn.model_selection import cross_val_predict
 from sklearn.utils import check_X_y
 
-from ..data.base_data import DoubleMLData
-from ..double_ml import DoubleML
-from ..double_ml_score_mixins import LinearScoreMixin
-from ..utils._checks import _check_binary_predictions, _check_finite_predictions, _check_is_propensity, _check_score
-from ..utils._estimation import _dml_cv_predict, _dml_tune
-from ..utils.blp import DoubleMLBLP
+from doubleml.data.base_data import DoubleMLData
+from doubleml.double_ml import DoubleML
+from doubleml.double_ml_score_mixins import LinearScoreMixin
+from doubleml.utils._checks import _check_binary_predictions, _check_finite_predictions, _check_is_propensity, _check_score
+from doubleml.utils._estimation import _dml_cv_predict, _dml_tune
+from doubleml.utils._tune_optuna import _dml_tune_optuna
+from doubleml.utils.blp import DoubleMLBLP
 
 
 class DoubleMLPLR(LinearScoreMixin, DoubleML):
@@ -372,6 +374,76 @@ def _nuisance_tuning(
 
         return res
 
+    def _nuisance_tuning_optuna(
+        self,
+        optuna_params,
+        scoring_methods,
+        cv,
+        optuna_settings,
+    ):
+        """
+        Optuna-based hyperparameter tuning for PLR nuisance models.
+
+        Performs tuning once on the whole dataset using cross-validation,
+        returning the same optimal parameters for all folds.
+        """
+
+        x, y = check_X_y(self._dml_data.x, self._dml_data.y, ensure_all_finite=False)
+        x, d = check_X_y(x, self._dml_data.d, ensure_all_finite=False)
+
+        if scoring_methods is None:
+            scoring_methods = {"ml_l": None, "ml_m": None, "ml_g": None}
+
+        l_tune_res = _dml_tune_optuna(
+            y,
+            x,
+            self._learner["ml_l"],
+            optuna_params["ml_l"],
+            scoring_methods["ml_l"],
+            cv,
+            optuna_settings,
+            learner_name="ml_l",
+            params_name="ml_l",
+        )
+        m_tune_res = _dml_tune_optuna(
+            d,
+            x,
+            self._learner["ml_m"],
+            optuna_params["ml_m"],
+            scoring_methods["ml_m"],
+            cv,
+            optuna_settings,
+            learner_name="ml_m",
+            params_name="ml_m",
+        )
+
+        results = {"ml_l": l_tune_res, "ml_m": m_tune_res}
+
+        # an ML model for g is obtained for the IV-type score and callable scores
+        if "ml_g" in self._learner:
+            # construct an initial theta estimate from the tuned models using the partialling out score
+            # use cross-fitting for tuning ml_g
+            l_hat = cross_val_predict(l_tune_res.best_estimator, x, y, cv=cv, method=self._predict_method["ml_l"])
+            m_hat = cross_val_predict(m_tune_res.best_estimator, x, d, cv=cv, method=self._predict_method["ml_m"])
+            psi_a = -np.multiply(d - m_hat, d - m_hat)
+            psi_b = np.multiply(d - m_hat, y - l_hat)
+            theta_initial = -np.nanmean(psi_b) / np.nanmean(psi_a)
+
+            g_tune_res = _dml_tune_optuna(
+                y - theta_initial * d,
+                x,
+                self._learner["ml_g"],
+                optuna_params["ml_g"],
+                scoring_methods["ml_g"],
+                cv,
+                optuna_settings,
+                learner_name="ml_g",
+                params_name="ml_g",
+            )
+            results["ml_g"] = g_tune_res
+
+        return results
+
     def cate(self, basis, is_gate=False, **kwargs):
         """
         Calculate conditional average treatment effects (CATE) for a given basis.
diff --git a/doubleml/plm/tests/test_lplr_tune_ml_models.py b/doubleml/plm/tests/test_lplr_tune_ml_models.py
new file mode 100644
index 00000000..2c649392
--- /dev/null
+++ b/doubleml/plm/tests/test_lplr_tune_ml_models.py
@@ -0,0 +1,93 @@
+import numpy as np
+import pytest
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+
+import doubleml as dml
+from doubleml.plm.datasets import make_lplr_LZZ2020
+from doubleml.tests._utils_tune_optuna import (
+    _SAMPLER_CASES,
+    _assert_tree_params,
+    _basic_optuna_settings,
+    _small_tree_params,
+)
+
+
+@pytest.fixture(scope="module", params=["nuisance_space", "instrument"])
+def score(request):
+    return request.param
+
+
+@pytest.fixture(
+    scope="module",
+    params=[DecisionTreeRegressor(random_state=567), None],
+)
+def ml_a(request):
+    return request.param
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
+def test_doubleml_lplr_optuna_tune(sampler_name, optuna_sampler, score, ml_a):
+    np.random.seed(3141)
+    alpha = 0.5
+    dml_data = make_lplr_LZZ2020(n_obs=200, dim_x=15, alpha=alpha)
+
+    ml_M = DecisionTreeClassifier(random_state=123)
+    ml_t = DecisionTreeRegressor(random_state=234)
+    ml_m = DecisionTreeRegressor(random_state=456)
+
+    dml_lplr = dml.DoubleMLLPLR(
+        dml_data,
+        ml_M=ml_M,
+        ml_t=ml_t,
+        ml_m=ml_m,
+        ml_a=ml_a,
+        n_folds=2,
+        n_folds_inner=2,
+        score=score,
+    )
+    dml_lplr.fit()
+    untuned_score = dml_lplr.evaluate_learners()
+
+    optuna_params = {
+        "ml_M": _small_tree_params,
+        "ml_m": _small_tree_params,
+        "ml_t": _small_tree_params,
+        "ml_a": _small_tree_params,
+    }
+
+    tune_res = dml_lplr.tune_ml_models(
+        ml_param_space=optuna_params,
+        optuna_settings=_basic_optuna_settings({"sampler": optuna_sampler, "n_trials": 5}),
+        return_tune_res=True,
+    )
+
+    dml_lplr.fit()
+    tuned_score = dml_lplr.evaluate_learners()
+
+    tuned_params_M = tune_res[0]["ml_M"].best_params
+    tuned_params_t = tune_res[0]["ml_t"].best_params
+    tuned_params_m = tune_res[0]["ml_m"].best_params
+    tuned_params_a = tune_res[0]["ml_a"].best_params
+
+    _assert_tree_params(tuned_params_M)
+    _assert_tree_params(tuned_params_t)
+    _assert_tree_params(tuned_params_m)
+    _assert_tree_params(tuned_params_a)
+
+    # ensure results contain optuna objects and best params
+    assert isinstance(tune_res[0], dict)
+    assert set(tune_res[0].keys()) == {"ml_M", "ml_m", "ml_t", "ml_a"}
+    assert hasattr(tune_res[0]["ml_M"], "best_params")
+    assert tune_res[0]["ml_M"].best_params["max_depth"] == tuned_params_M["max_depth"]
+    assert hasattr(tune_res[0]["ml_t"], "best_params")
+    assert tune_res[0]["ml_t"].best_params["max_depth"] == tuned_params_t["max_depth"]
+    assert hasattr(tune_res[0]["ml_m"], "best_params")
+    assert tune_res[0]["ml_m"].best_params["max_depth"] == tuned_params_m["max_depth"]
+    assert hasattr(tune_res[0]["ml_a"], "best_params")
+    assert tune_res[0]["ml_a"].best_params["max_depth"] == tuned_params_a["max_depth"]
+
+    # ensure tuning improved RMSE #  not actually possible for ml_t as the targets are not available
+    assert tuned_score["ml_M"] < untuned_score["ml_M"]
+    assert tuned_score["ml_m"] < untuned_score["ml_m"]
+    assert tuned_score["ml_a"] < untuned_score["ml_a"]
diff --git a/doubleml/plm/tests/test_pliv_tune_ml_models.py b/doubleml/plm/tests/test_pliv_tune_ml_models.py
new file mode 100644
index 00000000..3189980a
--- /dev/null
+++ b/doubleml/plm/tests/test_pliv_tune_ml_models.py
@@ -0,0 +1,48 @@
+import numpy as np
+import pytest
+from sklearn.tree import DecisionTreeRegressor
+
+import doubleml as dml
+from doubleml.plm.datasets import make_pliv_CHS2015
+from doubleml.tests._utils_tune_optuna import (
+    _SAMPLER_CASES,
+    _assert_tree_params,
+    _basic_optuna_settings,
+    _build_param_space,
+    _small_tree_params,
+)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
+def test_doubleml_pliv_optuna_tune(sampler_name, optuna_sampler):
+    """Test PLIV with ml_l, ml_m, ml_r nuisance models."""
+
+    np.random.seed(3144)
+    dml_data = make_pliv_CHS2015(n_obs=500, dim_x=15, dim_z=3)
+
+    ml_l = DecisionTreeRegressor(random_state=123)
+    ml_m = DecisionTreeRegressor(random_state=456)
+    ml_r = DecisionTreeRegressor(random_state=789)
+
+    dml_pliv = dml.DoubleMLPLIV(dml_data, ml_l, ml_m, ml_r, n_folds=2)
+    dml_pliv.fit()
+    untuned_score = dml_pliv.evaluate_learners()
+
+    optuna_params = _build_param_space(dml_pliv, _small_tree_params)
+
+    optuna_settings = _basic_optuna_settings({"sampler": optuna_sampler})
+    optuna_settings["n_trials"] = 10
+    tune_res = dml_pliv.tune_ml_models(
+        ml_param_space=optuna_params, set_as_params=True, optuna_settings=optuna_settings, return_tune_res=True
+    )
+
+    dml_pliv.fit()
+    tuned_score = dml_pliv.evaluate_learners()
+
+    for learner_name in dml_pliv.params_names:
+        tuned_params = tune_res[0][learner_name].best_params
+        _assert_tree_params(tuned_params)
+
+        # ensure tuning improved RMSE
+        assert tuned_score[learner_name] < untuned_score[learner_name]
diff --git a/doubleml/plm/tests/test_plr_tune_ml_models.py b/doubleml/plm/tests/test_plr_tune_ml_models.py
new file mode 100644
index 00000000..380bca8b
--- /dev/null
+++ b/doubleml/plm/tests/test_plr_tune_ml_models.py
@@ -0,0 +1,80 @@
+import numpy as np
+import pytest
+from sklearn.tree import DecisionTreeRegressor
+
+import doubleml as dml
+from doubleml.plm.datasets import make_plr_CCDDHNR2018
+from doubleml.tests._utils_tune_optuna import (
+    _SAMPLER_CASES,
+    _assert_tree_params,
+    _basic_optuna_settings,
+    _small_tree_params,
+)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
+def test_doubleml_plr_optuna_tune(sampler_name, optuna_sampler):
+    np.random.seed(3141)
+    alpha = 0.5
+    dml_data = make_plr_CCDDHNR2018(n_obs=500, dim_x=5, alpha=alpha)
+
+    ml_l = DecisionTreeRegressor(random_state=123)
+    ml_m = DecisionTreeRegressor(random_state=456)
+
+    dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, score="partialling out")
+    dml_plr.fit()
+    untuned_score = dml_plr.evaluate_learners()
+
+    optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params}
+
+    tune_res = dml_plr.tune_ml_models(
+        ml_param_space=optuna_params,
+        optuna_settings=_basic_optuna_settings({"sampler": optuna_sampler}),
+        return_tune_res=True,
+    )
+
+    dml_plr.fit()
+    tuned_score = dml_plr.evaluate_learners()
+
+    tuned_params_l = tune_res[0]["ml_l"].best_params
+    tuned_params_m = tune_res[0]["ml_m"].best_params
+
+    _assert_tree_params(tuned_params_l)
+    _assert_tree_params(tuned_params_m)
+
+    # ensure results contain optuna objects and best params
+    assert isinstance(tune_res[0], dict)
+    assert set(tune_res[0].keys()) == {"ml_l", "ml_m"}
+    assert hasattr(tune_res[0]["ml_l"], "best_params")
+    assert tune_res[0]["ml_l"].best_params["max_depth"] == tuned_params_l["max_depth"]
+    assert hasattr(tune_res[0]["ml_m"], "best_params")
+    assert tune_res[0]["ml_m"].best_params["max_depth"] == tuned_params_m["max_depth"]
+
+    # ensure tuning improved RMSE
+    assert tuned_score["ml_l"] < untuned_score["ml_l"]
+    assert tuned_score["ml_m"] < untuned_score["ml_m"]
+
+
+@pytest.mark.ci
+def test_doubleml_plr_optuna_tune_with_ml_g():
+    np.random.seed(3150)
+    dml_data = make_plr_CCDDHNR2018(n_obs=200, dim_x=5, alpha=0.5)
+
+    ml_l = DecisionTreeRegressor(random_state=11)
+    ml_m = DecisionTreeRegressor(random_state=12)
+    ml_g = DecisionTreeRegressor(random_state=13)
+
+    dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, ml_g, n_folds=2, score="IV-type")
+
+    optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params, "ml_g": _small_tree_params}
+
+    tune_res = dml_plr.tune_ml_models(
+        ml_param_space=optuna_params,
+        optuna_settings=_basic_optuna_settings({"n_trials": 1}),
+        return_tune_res=True,
+    )
+
+    assert "ml_g" in tune_res[0]
+    ml_g_res = tune_res[0]["ml_g"]
+    assert ml_g_res.best_params is not None
diff --git a/doubleml/tests/_utils_tune_optuna.py b/doubleml/tests/_utils_tune_optuna.py
new file mode 100644
index 00000000..241451fd
--- /dev/null
+++ b/doubleml/tests/_utils_tune_optuna.py
@@ -0,0 +1,51 @@
+import numpy as np
+import optuna
+
+
+def _basic_optuna_settings(additional=None):
+    base_settings = {
+        "n_trials": 10,
+        "sampler": optuna.samplers.TPESampler(seed=3141),
+        "verbosity": optuna.logging.WARNING,
+        "show_progress_bar": False,
+    }
+    if additional is not None:
+        base_settings.update(additional)
+    return base_settings
+
+
+_SAMPLER_CASES = [
+    ("random", optuna.samplers.RandomSampler(seed=3141)),
+    ("tpe", optuna.samplers.TPESampler(seed=3141)),
+]
+
+
+def _small_tree_params(trial):
+    return {
+        "max_depth": trial.suggest_int("max_depth", 1, 20),
+        "min_samples_split": trial.suggest_int("min_samples_split", 2, 20),
+        "min_samples_leaf": trial.suggest_int("min_samples_leaf", 1, 10),
+    }
+
+
+def _assert_tree_params(param_dict, depth_range=(1, 20), leaf_range=(1, 10), split_range=(2, 20)):
+    assert set(param_dict.keys()) == {"max_depth", "min_samples_leaf", "min_samples_split"}
+    assert depth_range[0] <= param_dict["max_depth"] <= depth_range[1]
+    assert leaf_range[0] <= param_dict["min_samples_leaf"] <= leaf_range[1]
+    assert split_range[0] <= param_dict["min_samples_split"] <= split_range[1]
+
+
+def _build_param_space(dml_obj, param_fn):
+    """Build parameter grid using the actual params_names from the DML object."""
+    param_grid = {learner_name: param_fn for learner_name in dml_obj.params_names}
+    return param_grid
+
+
+def _select_binary_periods(panel_data):
+    t_values = np.sort(panel_data.t_values)
+    finite_g = sorted(val for val in panel_data.g_values if np.isfinite(val))
+    for candidate in finite_g:
+        pre_candidates = [t for t in t_values if t < candidate]
+        if pre_candidates:
+            return candidate, pre_candidates[-1], candidate
+    raise RuntimeError("No valid treatment group found for binary DID data.")
diff --git a/doubleml/tests/test_datasets.py b/doubleml/tests/test_datasets.py
index 95b6ea53..7d805346 100644
--- a/doubleml/tests/test_datasets.py
+++ b/doubleml/tests/test_datasets.py
@@ -7,6 +7,7 @@
 msg_inv_return_type = "Invalid return_type."
 
 
+@pytest.mark.ci
 def test_fetch_401K_return_types():
     res = fetch_401K("DoubleMLData")
     assert isinstance(res, DoubleMLData)
@@ -16,12 +17,14 @@ def test_fetch_401K_return_types():
         _ = fetch_401K("matrix")
 
 
+@pytest.mark.ci
 def test_fetch_401K_poly():
     msg = "polynomial_features os not implemented yet for fetch_401K."
     with pytest.raises(NotImplementedError, match=msg):
         _ = fetch_401K(polynomial_features=True)
 
 
+@pytest.mark.ci
 def test_fetch_bonus_return_types():
     res = fetch_bonus("DoubleMLData")
     assert isinstance(res, DoubleMLData)
@@ -31,6 +34,7 @@ def test_fetch_bonus_return_types():
         _ = fetch_bonus("matrix")
 
 
+@pytest.mark.ci
 def test_fetch_bonus_poly():
     data_bonus_wo_poly = fetch_bonus(polynomial_features=False)
     n_x = len(data_bonus_wo_poly.x_cols)
diff --git a/doubleml/tests/test_dml_tune_optuna.py b/doubleml/tests/test_dml_tune_optuna.py
new file mode 100644
index 00000000..4a583c1c
--- /dev/null
+++ b/doubleml/tests/test_dml_tune_optuna.py
@@ -0,0 +1,605 @@
+import numpy as np
+import optuna
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+from sklearn.model_selection import KFold
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+
+import doubleml as dml
+from doubleml.irm.datasets import make_irm_data
+from doubleml.plm.datasets import make_plr_CCDDHNR2018
+from doubleml.tests._utils_tune_optuna import (
+    _assert_tree_params,
+    _basic_optuna_settings,
+    _small_tree_params,
+)
+from doubleml.utils._tune_optuna import (
+    DMLOptunaResult,
+    _create_study,
+    _resolve_optuna_scoring,
+    resolve_optuna_cv,
+)
+
+
+@pytest.mark.ci
+def test_resolve_optuna_scoring_regressor_default():
+    learner = LinearRegression()
+    scoring, message = _resolve_optuna_scoring(None, learner, "ml_l")
+    assert scoring == "neg_root_mean_squared_error"
+    assert "neg_root_mean_squared_error" in message
+
+
+@pytest.mark.ci
+def test_resolve_optuna_scoring_classifier_default():
+    learner = LogisticRegression()
+    scoring, message = _resolve_optuna_scoring(None, learner, "ml_m")
+    assert scoring == "neg_log_loss"
+    assert "neg_log_loss" in message
+
+
+@pytest.mark.ci
+def test_resolve_optuna_scoring_with_criterion_keeps_default():
+    learner = DecisionTreeRegressor(random_state=0)
+    scoring, message = _resolve_optuna_scoring(None, learner, "ml_l")
+    assert scoring == "neg_root_mean_squared_error"
+    assert "neg_root_mean_squared_error" in message
+
+
+@pytest.mark.ci
+def test_resolve_optuna_scoring_lightgbm_regressor_default():
+    pytest.importorskip("lightgbm")
+    from lightgbm import LGBMRegressor
+
+    learner = LGBMRegressor()
+    scoring, message = _resolve_optuna_scoring(None, learner, "ml_l")
+    assert scoring == "neg_root_mean_squared_error"
+    assert "neg_root_mean_squared_error" in message
+
+
+@pytest.mark.ci
+def test_resolve_optuna_cv_sets_random_state():
+    cv = resolve_optuna_cv(3)
+    assert isinstance(cv, KFold)
+    assert cv.shuffle is True
+    assert cv.random_state == 42
+
+
+@pytest.mark.ci
+def test_doubleml_optuna_cv_variants():
+    np.random.seed(3142)
+    dml_data = make_plr_CCDDHNR2018(n_obs=100, dim_x=5)
+
+    ml_l = DecisionTreeRegressor(random_state=10, max_depth=5, min_samples_leaf=4)
+    ml_m = DecisionTreeRegressor(random_state=11, max_depth=5, min_samples_leaf=4)
+    dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, score="partialling out")
+
+    optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params}
+
+    dml_plr.tune_ml_models(
+        ml_param_space=optuna_params,
+        cv=3,
+        optuna_settings=_basic_optuna_settings(),
+    )
+
+    int_l_params = dml_plr.get_params("ml_l")["d"][0][0]
+    int_m_params = dml_plr.get_params("ml_m")["d"][0][0]
+
+    assert int_l_params is not None
+    assert int_m_params is not None
+
+    cv_splitter = KFold(n_splits=3, shuffle=True, random_state=3142)
+
+    dml_plr.tune_ml_models(
+        ml_param_space=optuna_params,
+        cv=cv_splitter,
+        optuna_settings=_basic_optuna_settings(),
+    )
+
+    split_l_params = dml_plr.get_params("ml_l")["d"][0][0]
+    split_m_params = dml_plr.get_params("ml_m")["d"][0][0]
+
+    assert split_l_params is not None
+    assert split_m_params is not None
+
+    class SimpleSplitter:
+        def __init__(self, n_splits=3):
+            self._kfold = KFold(n_splits=n_splits, shuffle=True, random_state=3142)
+            self._n_splits = n_splits
+
+        def split(self, X, y=None, groups=None):
+            return self._kfold.split(X, y, groups)
+
+        def get_n_splits(self, X=None, y=None, groups=None):
+            return self._n_splits
+
+    custom_cv = SimpleSplitter(n_splits=3)
+
+    dml_plr.tune_ml_models(
+        ml_param_space=optuna_params,
+        cv=custom_cv,
+        optuna_settings=_basic_optuna_settings({"n_trials": 2}),
+    )
+
+    custom_l_params = dml_plr.get_params("ml_l")["d"][0][0]
+    custom_m_params = dml_plr.get_params("ml_m")["d"][0][0]
+
+    assert custom_l_params is not None
+    assert custom_m_params is not None
+
+    base_iter_kfold = KFold(n_splits=3, shuffle=True, random_state=3142)
+    cv_iterable = list(base_iter_kfold.split(np.arange(dml_data.n_obs)))
+
+    dml_plr.tune_ml_models(
+        ml_param_space=optuna_params,
+        cv=cv_iterable,
+        optuna_settings=_basic_optuna_settings({"n_trials": 2}),
+    )
+
+    iterable_l_params = dml_plr.get_params("ml_l")["d"][0][0]
+    iterable_m_params = dml_plr.get_params("ml_m")["d"][0][0]
+
+    assert iterable_l_params is not None
+    assert iterable_m_params is not None
+
+    explicit_cv_iterable = [
+        (list(train_idx), list(test_idx)) for train_idx, test_idx in base_iter_kfold.split(np.arange(dml_data.n_obs))
+    ]
+
+    dml_plr.tune_ml_models(
+        ml_param_space=optuna_params,
+        cv=explicit_cv_iterable,
+        optuna_settings=_basic_optuna_settings({"n_trials": 2}),
+    )
+
+    explicit_l_params = dml_plr.get_params("ml_l")["d"][0][0]
+    explicit_m_params = dml_plr.get_params("ml_m")["d"][0][0]
+
+    assert explicit_l_params is not None
+    assert explicit_m_params is not None
+
+    cv = None
+    dml_plr.tune_ml_models(
+        ml_param_space=optuna_params,
+        cv=cv,
+        optuna_settings=_basic_optuna_settings({"n_trials": 2}),
+    )
+    none_l_params = dml_plr.get_params("ml_l")["d"][0][0]
+    none_m_params = dml_plr.get_params("ml_m")["d"][0][0]
+
+    assert none_l_params is not None
+    assert none_m_params is not None
+
+
+@pytest.mark.ci
+def test_create_study_respects_user_study_name(monkeypatch):
+    captured_kwargs = {}
+
+    def fake_create_study(**kwargs):
+        captured_kwargs.update(kwargs)
+
+        class _DummyStudy:
+            pass
+
+        return _DummyStudy()
+
+    monkeypatch.setattr(optuna, "create_study", fake_create_study)
+
+    settings = {
+        "study": None,
+        "study_kwargs": {"study_name": "custom-study", "direction": "maximize"},
+        "direction": "maximize",
+        "sampler": None,
+    }
+
+    _create_study(settings, "ml_l")
+
+    assert captured_kwargs["study_name"] == "custom-study"
+
+
+@pytest.mark.ci
+def test_doubleml_optuna_partial_tuning_single_learner():
+    np.random.seed(3143)
+    dml_data = make_plr_CCDDHNR2018(n_obs=100, dim_x=5)
+
+    ml_l = DecisionTreeRegressor(random_state=20, max_depth=5, min_samples_leaf=4)
+    ml_m = DecisionTreeRegressor(random_state=21, max_depth=5, min_samples_leaf=4)
+
+    dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, score="partialling out")
+
+    optuna_params = {"ml_l": _small_tree_params}
+
+    tune_res = dml_plr.tune_ml_models(
+        ml_param_space=optuna_params,
+        optuna_settings=_basic_optuna_settings(),
+        return_tune_res=True,
+    )
+
+    res_ml_m = tune_res[0]["ml_m"]
+    res_ml_l = tune_res[0]["ml_l"]
+
+    assert res_ml_m.tuned is False
+    assert res_ml_m.best_estimator.get_params() == ml_m.get_params()  # assert default params kept
+    assert res_ml_l.tuned is True
+    _assert_tree_params(res_ml_l.best_params)  # assert tuned params valid
+
+    assert isinstance(tune_res[0], dict)
+    assert set(tune_res[0].keys()) == {"ml_l", "ml_m"}
+
+
+@pytest.mark.ci
+def test_doubleml_optuna_sets_params_for_all_folds():
+    np.random.seed(3153)
+    dml_data = make_plr_CCDDHNR2018(n_obs=100, dim_x=5)
+
+    ml_l = DecisionTreeRegressor(random_state=101, max_depth=5, min_samples_leaf=4)
+    ml_m = DecisionTreeRegressor(random_state=202, max_depth=5, min_samples_leaf=4)
+
+    dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=3, n_rep=2)
+
+    optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params}
+
+    dml_plr.tune_ml_models(ml_param_space=optuna_params, optuna_settings=_basic_optuna_settings())
+
+    l_params = dml_plr.get_params("ml_l")
+    m_params = dml_plr.get_params("ml_m")
+
+    assert set(l_params.keys()) == {"d"}
+    assert set(m_params.keys()) == {"d"}
+
+    expected_l = dict(l_params["d"][0][0])
+    expected_m = dict(m_params["d"][0][0])
+
+    assert len(l_params["d"]) == dml_plr.n_rep
+    assert len(m_params["d"]) == dml_plr.n_rep
+
+    for rep_idx in range(dml_plr.n_rep):
+        assert len(l_params["d"][rep_idx]) == dml_plr.n_folds
+        assert len(m_params["d"][rep_idx]) == dml_plr.n_folds
+        for fold_idx in range(dml_plr.n_folds):
+            l_fold_params = l_params["d"][rep_idx][fold_idx]
+            m_fold_params = m_params["d"][rep_idx][fold_idx]
+            assert l_fold_params is not None
+            assert m_fold_params is not None
+            assert l_fold_params == expected_l
+            assert m_fold_params == expected_m
+
+
+@pytest.mark.ci
+def test_doubleml_optuna_fit_uses_tuned_params():
+    np.random.seed(3154)
+    dml_data = make_plr_CCDDHNR2018(n_obs=100, dim_x=5)
+
+    ml_l = DecisionTreeRegressor(random_state=303, max_depth=5, min_samples_leaf=4)
+    ml_m = DecisionTreeRegressor(random_state=404, max_depth=5, min_samples_leaf=4)
+
+    dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, n_rep=1)
+
+    optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params}
+
+    dml_plr.tune_ml_models(ml_param_space=optuna_params, optuna_settings=_basic_optuna_settings())
+
+    expected_l = dict(dml_plr.get_params("ml_l")["d"][0][0])
+    expected_m = dict(dml_plr.get_params("ml_m")["d"][0][0])
+
+    dml_plr.fit(store_predictions=False, store_models=True)
+
+    for rep_idx in range(dml_plr.n_rep):
+        for fold_idx in range(dml_plr.n_folds):
+            ml_l_model = dml_plr.models["ml_l"]["d"][rep_idx][fold_idx]
+            ml_m_model = dml_plr.models["ml_m"]["d"][rep_idx][fold_idx]
+            assert ml_l_model is not None
+            assert ml_m_model is not None
+            for key, value in expected_l.items():
+                assert ml_l_model.get_params()[key] == value
+            for key, value in expected_m.items():
+                assert ml_m_model.get_params()[key] == value
+
+
+@pytest.mark.ci
+def test_dml_optuna_result_str_representation():
+    def custom_scorer(**args):
+        return 0.0
+
+    primary_result = DMLOptunaResult(
+        learner_name="ml_l",
+        params_name="ml_l",
+        best_estimator=LinearRegression(),
+        best_params={"alpha": 1, "depth": 3},
+        best_score=0.123,
+        scoring_method="neg_mean_squared_error",
+        study=None,
+        tuned=True,
+    )
+
+    primary_str = str(primary_result)
+    assert primary_str.startswith("================== DMLOptunaResult")
+    assert "Learner name: ml_l" in primary_str
+    assert "Best score: 0.123" in primary_str
+    assert "Scoring method: neg_mean_squared_error" in primary_str
+    assert "'alpha': 1" in primary_str
+    assert "'depth': 3" in primary_str
+
+    empty_params_result = DMLOptunaResult(
+        learner_name="ml_m",
+        params_name="ml_m",
+        best_estimator=LinearRegression(),
+        best_params={},
+        best_score=-0.5,
+        scoring_method=custom_scorer,
+        study=None,
+        tuned=False,
+    )
+
+    empty_str = str(empty_params_result)
+    assert "Learner name: ml_m" in empty_str
+    assert "Scoring method: custom_scorer" in empty_str
+    assert "No best parameters available." in empty_str
+
+
+@pytest.mark.ci
+def test_doubleml_optuna_scoring_method_variants():
+    np.random.seed(3156)
+    dml_data = make_plr_CCDDHNR2018(n_obs=120, dim_x=5)
+
+    optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params}
+
+    ml_l_string = DecisionTreeRegressor(random_state=501)
+    ml_m_default = DecisionTreeRegressor(random_state=502)
+    dml_plr_string = dml.DoubleMLPLR(dml_data, ml_l_string, ml_m_default, n_folds=2)
+
+    scoring_methods_string = {"ml_l": "neg_mean_squared_error"}
+
+    tune_res_string = dml_plr_string.tune_ml_models(
+        ml_param_space=optuna_params,
+        scoring_methods=scoring_methods_string,
+        optuna_settings=_basic_optuna_settings({"n_trials": 2}),
+        return_tune_res=True,
+    )
+
+    assert tune_res_string[0]["ml_l"].scoring_method == "neg_mean_squared_error"
+    assert tune_res_string[0]["ml_m"].scoring_method == "neg_root_mean_squared_error"
+
+    def neg_mae_scorer(estimator, x, y):
+        preds = estimator.predict(x)
+        return -np.mean(np.abs(y - preds))
+
+    ml_l_callable = DecisionTreeRegressor(random_state=601)
+    ml_m_callable = DecisionTreeRegressor(random_state=602)
+    dml_plr_callable = dml.DoubleMLPLR(dml_data, ml_l_callable, ml_m_callable, n_folds=2)
+
+    scoring_methods_callable = {"ml_l": neg_mae_scorer, "ml_m": neg_mae_scorer}
+
+    tune_res_callable = dml_plr_callable.tune_ml_models(
+        ml_param_space=optuna_params,
+        scoring_methods=scoring_methods_callable,
+        optuna_settings=_basic_optuna_settings({"n_trials": 2}),
+        return_tune_res=True,
+    )
+
+    assert tune_res_callable[0]["ml_l"].scoring_method is neg_mae_scorer
+    assert tune_res_callable[0]["ml_m"].scoring_method is neg_mae_scorer
+
+
+@pytest.mark.ci
+def test_doubleml_optuna_invalid_settings_key_raises():
+    np.random.seed(3155)
+    dml_data = make_irm_data(n_obs=100, dim_x=5)
+
+    ml_g = DecisionTreeRegressor(random_state=111, max_depth=5, min_samples_leaf=4)
+    ml_m = DecisionTreeClassifier(random_state=222, max_depth=5, min_samples_leaf=4)
+
+    dml_irm = dml.DoubleMLIRM(dml_data, ml_g, ml_m, n_folds=2)
+
+    optuna_params = {"ml_g0": _small_tree_params, "ml_g1": _small_tree_params, "ml_m": _small_tree_params}
+    invalid_settings = _basic_optuna_settings({"ml_l": {"n_trials": 2}})
+
+    with pytest.raises(ValueError, match="ml_l"):
+        dml_irm.tune_ml_models(ml_param_space=optuna_params, optuna_settings=invalid_settings)
+
+
+@pytest.mark.ci
+def test_optuna_settings_hierarchy_overrides():
+    np.random.seed(3160)
+    dml_data = make_irm_data(n_obs=80, dim_x=5)
+
+    ml_g = DecisionTreeRegressor(random_state=123)
+    ml_m = DecisionTreeClassifier(random_state=456)
+    dml_irm = dml.DoubleMLIRM(dml_data, ml_g, ml_m, n_folds=2, n_rep=1)
+
+    optuna_params = {"ml_g": _small_tree_params, "ml_g1": _small_tree_params, "ml_m": _small_tree_params}
+    scoring_methods = {
+        "ml_g0": "neg_mean_squared_error",
+        "ml_g1": "neg_mean_squared_error",
+        "ml_m": "roc_auc",
+    }
+
+    optuna_settings = {
+        "n_trials": 4,
+        "direction": "maximize",
+        "show_progress_bar": False,
+        "ml_g": {"n_trials": 2},
+        "ml_g1": {"n_trials": 3},
+    }
+
+    tune_res = dml_irm.tune_ml_models(
+        ml_param_space=optuna_params,
+        optuna_settings=optuna_settings,
+        scoring_methods=scoring_methods,
+        cv=3,
+        return_tune_res=True,
+    )
+
+    result_map = tune_res[0]
+
+    def _completed_trials(study):
+        return sum(trial.state == optuna.trial.TrialState.COMPLETE for trial in study.trials)
+
+    assert _completed_trials(result_map["ml_g0"].study) == 2
+    assert _completed_trials(result_map["ml_g1"].study) == 3
+    assert _completed_trials(result_map["ml_m"].study) == 4
+
+
+@pytest.mark.ci
+def test_optuna_logging_integration():
+    """Test that logging integration works correctly with Optuna."""
+    import logging
+
+    np.random.seed(3154)
+    dml_data = make_plr_CCDDHNR2018(n_obs=100, dim_x=5)
+
+    ml_l = DecisionTreeRegressor(random_state=303, max_depth=5, min_samples_leaf=4)
+    ml_m = DecisionTreeRegressor(random_state=404, max_depth=5, min_samples_leaf=4)
+
+    dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, n_rep=1)
+
+    optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params}
+
+    # Capture log messages
+    logger = logging.getLogger("doubleml.utils._tune_optuna")
+    original_level = logger.level
+
+    # Create a custom handler to capture log records
+    log_records = []
+
+    class ListHandler(logging.Handler):
+        def emit(self, record):
+            log_records.append(record)
+
+    handler = ListHandler()
+    handler.setLevel(logging.INFO)
+    logger.addHandler(handler)
+    logger.setLevel(logging.INFO)
+
+    try:
+        # Tune with specific settings that should trigger log messages
+        optuna_settings = {
+            "n_trials": 2,
+            "sampler": optuna.samplers.TPESampler(seed=42),
+            "show_progress_bar": False,
+        }
+
+        dml_plr.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings)
+
+        # Check that we got log messages
+        log_messages = [record.getMessage() for record in log_records]
+
+        # Should have messages about direction and sampler for each learner
+        direction_messages = [msg for msg in log_messages if "direction set to" in msg]
+        sampler_messages = [msg for msg in log_messages if "sampler" in msg.lower()]
+        scoring_messages = [msg for msg in log_messages if "scoring method" in msg.lower()]
+
+        # We should have at least one message about direction
+        assert len(direction_messages) > 0, "Expected log messages about optimization direction"
+
+        # We should have messages about the sampler
+        assert len(sampler_messages) > 0, "Expected log messages about sampler"
+
+        # We should have messages about scoring
+        assert len(scoring_messages) > 0, "Expected log messages about scoring method"
+
+        # Verify that the tuning actually worked
+        tuned_l = dml_plr.get_params("ml_l")["d"][0][0]
+        tuned_m = dml_plr.get_params("ml_m")["d"][0][0]
+        assert tuned_l is not None
+        assert tuned_m is not None
+
+    finally:
+        # Clean up
+        logger.removeHandler(handler)
+        logger.setLevel(original_level)
+
+
+@pytest.mark.ci
+def test_optuna_logging_verbosity_sync():
+    """Test that DoubleML logger level syncs with Optuna logger level."""
+    import logging
+
+    np.random.seed(3155)
+    dml_data = make_plr_CCDDHNR2018(n_obs=100, dim_x=5)
+
+    ml_l = DecisionTreeRegressor(random_state=111)
+    ml_m = DecisionTreeRegressor(random_state=222)
+
+    dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2)
+
+    optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params}
+
+    # Set DoubleML logger to DEBUG
+    logger = logging.getLogger("doubleml.utils._tune_optuna")
+    original_level = logger.level
+    logger.setLevel(logging.DEBUG)
+
+    try:
+        # Tune without explicit verbosity setting
+        optuna_settings = {
+            "n_trials": 1,
+            "show_progress_bar": False,
+        }
+
+        dml_plr.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings)
+
+        # The test passes if no exception is raised
+        # The actual sync happens internally in _dml_tune_optuna
+        assert True
+
+    finally:
+        logger.setLevel(original_level)
+
+
+@pytest.mark.ci
+def test_optuna_logging_explicit_verbosity():
+    """Test that explicit verbosity setting in optuna_settings takes precedence."""
+    np.random.seed(3156)
+    dml_data = make_plr_CCDDHNR2018(n_obs=100, dim_x=5)
+
+    ml_l = DecisionTreeRegressor(random_state=333)
+    ml_m = DecisionTreeRegressor(random_state=444)
+
+    dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2)
+
+    optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params}
+
+    # Explicitly set Optuna verbosity
+    optuna_settings = {
+        "n_trials": 1,
+        "verbosity": optuna.logging.WARNING,
+        "show_progress_bar": False,
+    }
+
+    # This should not raise an error
+    dml_plr.tune_ml_models(ml_param_space=optuna_params, optuna_settings=optuna_settings)
+
+    # Verify tuning worked
+    tuned_l = dml_plr.get_params("ml_l")["d"][0][0]
+    assert tuned_l is not None
+
+
+@pytest.mark.ci
+def test_doubleml_optuna_respects_provided_study_instances():
+    np.random.seed(3157)
+    dml_data = make_plr_CCDDHNR2018(n_obs=80, dim_x=4)
+
+    ml_l = DecisionTreeRegressor(random_state=555, max_depth=3, min_samples_leaf=3)
+    ml_m = DecisionTreeRegressor(random_state=556, max_depth=3, min_samples_leaf=3)
+
+    dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2)
+
+    study_l = optuna.create_study(direction="maximize")
+    study_m = optuna.create_study(direction="maximize")
+
+    optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params}
+    optuna_settings = {
+        "n_trials": 1,
+        "show_progress_bar": False,
+        "ml_l": {"study": study_l},
+        "ml_m": {"study": study_m},
+    }
+
+    tune_res = dml_plr.tune_ml_models(
+        ml_param_space=optuna_params,
+        optuna_settings=optuna_settings,
+        return_tune_res=True,
+    )
+
+    assert tune_res[0]["ml_l"].study is study_l
+    assert tune_res[0]["ml_m"].study is study_m
diff --git a/doubleml/tests/test_dml_tune_optuna_exceptions.py b/doubleml/tests/test_dml_tune_optuna_exceptions.py
new file mode 100644
index 00000000..d7e7c0fe
--- /dev/null
+++ b/doubleml/tests/test_dml_tune_optuna_exceptions.py
@@ -0,0 +1,314 @@
+import re
+
+import numpy as np
+import pytest
+from sklearn.base import BaseEstimator, RegressorMixin
+from sklearn.linear_model import Lasso
+
+from doubleml import DoubleMLData, DoubleMLPLR
+from doubleml.plm.datasets import make_plr_CCDDHNR2018
+from doubleml.utils._tune_optuna import (
+    _check_tuning_inputs,
+    _create_objective,
+    _default_optuna_settings,
+    _dml_tune_optuna,
+    _get_optuna_settings,
+    _resolve_optuna_scoring,
+    resolve_optuna_cv,
+)
+
+np.random.seed(42)
+data = make_plr_CCDDHNR2018(n_obs=100, dim_x=5, return_type="DataFrame")
+dml_data = DoubleMLData(data, "y", "d")
+ml_l = Lasso()
+ml_m = Lasso()
+dml_plr = DoubleMLPLR(dml_data, ml_l, ml_m)
+
+
+def ml_l_params(trial):
+    return {"alpha": trial.suggest_float("alpha", 0.01, 0.1)}
+
+
+def ml_m_params(trial):
+    return {"alpha": trial.suggest_float("alpha", 0.01, 0.1)}
+
+
+valid_param_space = {"ml_l": ml_l_params, "ml_m": ml_m_params}
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize(
+    "ml_param_space, msg",
+    [
+        (None, "ml_param_space must be a non-empty dictionary."),
+        (
+            {"ml_l": ml_l_params, "invalid_key": ml_m_params},
+            r"Invalid ml_param_space keys for DoubleMLPLR: invalid_key. Valid keys are: ml_l, ml_m.",
+        ),
+        (
+            {"ml_l": ml_l_params, "ml_m": "invalid"},
+            "Parameter space for 'ml_m' must be a callable function that takes a trial and returns a dict. Got str.",
+        ),
+    ],
+)
+def test_tune_ml_models_invalid_param_space(ml_param_space, msg):
+    with pytest.raises(ValueError if "keys" in msg or "non-empty" in msg else TypeError, match=msg):
+        dml_plr.tune_ml_models(ml_param_space)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize(
+    "scoring_methods, exc, msg",
+    [
+        ("invalid", TypeError, "scoring_methods must be provided as a dictionary keyed by learner name."),
+        (
+            {"ml_l": "accuracy", "invalid_learner": "accuracy"},
+            ValueError,
+            r"Invalid scoring_methods keys for DoubleMLPLR: invalid_learner. Valid keys are: ml_l, ml_m.",
+        ),
+        (
+            {"ml_l": 123},
+            TypeError,
+            r"scoring_method must be None, a string, a callable, accepted by scikit-learn. Got int for learner 'ml_l'.",
+        ),
+    ],
+)
+def test_tune_ml_models_invalid_scoring_methods(scoring_methods, exc, msg):
+    with pytest.raises(exc, match=re.escape(msg)):
+        dml_plr.tune_ml_models(valid_param_space, scoring_methods=scoring_methods)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize(
+    "cv, msg",
+    [
+        ("invalid", "cv must not be provided as a string. Pass an integer or a cross-validation splitter."),
+        (1, "The number of folds used for tuning must be at least two. 1 was passed."),
+    ],
+)
+def test_tune_ml_models_invalid_cv(cv, msg):
+    with pytest.raises(ValueError if "folds" in msg else TypeError, match=msg):
+        dml_plr.tune_ml_models(valid_param_space, cv=cv)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize(
+    "set_as_params, msg",
+    [
+        ("invalid", "set_as_params must be True or False. Got invalid."),
+        (None, "set_as_params must be True or False. Got None."),
+    ],
+)
+def test_tune_ml_models_invalid_set_as_params(set_as_params, msg):
+    with pytest.raises(TypeError, match=msg):
+        dml_plr.tune_ml_models(valid_param_space, set_as_params=set_as_params)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize(
+    "return_tune_res, msg",
+    [
+        ("invalid", "return_tune_res must be True or False. Got invalid."),
+        (None, "return_tune_res must be True or False. Got None."),
+    ],
+)
+def test_tune_ml_models_invalid_return_tune_res(return_tune_res, msg):
+    with pytest.raises(TypeError, match=msg):
+        dml_plr.tune_ml_models(valid_param_space, return_tune_res=return_tune_res)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize(
+    "optuna_settings, msg",
+    [
+        ("invalid", "optuna_settings must be a dict or None. Got <class 'str'>."),
+        (
+            {"invalid_key": "value"},
+            r"Invalid optuna_settings keys for DoubleMLPLR: invalid_key. Valid learner-specific keys are: ml_l, ml_m.",
+        ),
+        ({"ml_l": "invalid"}, "Optuna settings for 'ml_l' must be a dict."),
+    ],
+)
+def test_tune_ml_models_invalid_optuna_settings(optuna_settings, msg):
+    with pytest.raises(TypeError if "dict" in msg else ValueError, match=msg):
+        dml_plr.tune_ml_models(valid_param_space, optuna_settings=optuna_settings)
+
+
+# add test for giving non iterable cv object
+@pytest.mark.ci
+def test_tune_ml_models_non_iterable_cv():
+    class NonIterableCV:
+        pass
+
+    non_iterable_cv = NonIterableCV()
+    msg = re.escape(
+        "cv must be an integer >= 2, a scikit-learn cross-validation splitter, "
+        "or an iterable of (train_indices, test_indices) pairs."
+    )
+    with pytest.raises(TypeError, match=msg):
+        dml_plr.tune_ml_models(valid_param_space, cv=non_iterable_cv)
+
+
+@pytest.mark.ci
+def test_resolve_optuna_cv_invalid_iterable_pairs():
+    invalid_cv = [(np.array([0, 1]),)]
+    msg = re.escape("cv iterable must yield (train_indices, test_indices) pairs.")
+    with pytest.raises(TypeError, match=msg):
+        resolve_optuna_cv(invalid_cv)
+
+
+@pytest.mark.ci
+def test_resolve_optuna_scoring_unknown_estimator_type():
+    class GenericEstimator(BaseEstimator):
+        def fit(self, x, y):
+            return self
+
+        def set_params(self, **params):
+            return self
+
+    msg = (
+        "No scoring method provided and estimator type could not be inferred. "
+        "Please provide a scoring_method for learner 'ml_l'."
+    )
+    with pytest.raises(ValueError, match=re.escape(msg)):
+        _resolve_optuna_scoring(None, GenericEstimator(), "ml_l")
+
+
+@pytest.mark.ci
+def test_check_tuning_inputs_mismatched_dimensions():
+    x = np.zeros((3, 2))
+    y = np.zeros(5)
+    with pytest.raises(
+        ValueError,
+        match=re.escape("Features and target must contain the same number of observations for learner 'ml_l'."),
+    ):
+        _check_tuning_inputs(y, x, Lasso(), lambda trial: {}, "neg_mean_squared_error", 2, "ml_l")
+
+
+@pytest.mark.ci
+def test_check_tuning_inputs_empty_target():
+    x = np.zeros((0, 2))
+    y = np.zeros(0)
+    with pytest.raises(
+        ValueError,
+        match=re.escape("Empty target passed to Optuna tuner for learner 'ml_l'."),
+    ):
+        _check_tuning_inputs(y, x, Lasso(), lambda trial: {}, "neg_mean_squared_error", 2, "ml_l")
+
+
+@pytest.mark.ci
+def test_check_tuning_inputs_invalid_learner_interface():
+    class BadLearner:
+        def set_params(self, **kwargs):
+            return self
+
+    x = np.zeros((5, 2))
+    y = np.zeros(5)
+    with pytest.raises(
+        TypeError,
+        match=re.escape("Learner 'ml_l' must implement fit and set_params to be tuned with Optuna."),
+    ):
+        _check_tuning_inputs(y, x, BadLearner(), lambda trial: {}, "neg_mean_squared_error", 2, "ml_l")
+
+
+@pytest.mark.ci
+def test_check_tuning_inputs_non_callable_param_grid():
+    x = np.zeros((5, 2))
+    y = np.zeros(5)
+    msg = "param_grid must be a callable function that takes a trial and returns a dict. " "Got str for learner 'ml_l'."
+    with pytest.raises(TypeError, match=re.escape(msg)):
+        _check_tuning_inputs(y, x, Lasso(), "not-callable", "neg_mean_squared_error", 2, "ml_l")
+
+
+@pytest.mark.ci
+def test_get_optuna_settings_requires_dict():
+    with pytest.raises(TypeError, match="optuna_settings must be a dict or None."):
+        _get_optuna_settings("invalid", "ml_l")
+
+
+@pytest.mark.ci
+def test_get_optuna_settings_returns_default_copy_for_none():
+    resolved_a = _get_optuna_settings(None, "ml_l")
+    resolved_b = _get_optuna_settings(None, "ml_l")
+    # Ensure defaults are preserved
+    for key, value in _default_optuna_settings().items():
+        assert resolved_a[key] == value
+    # Ensure copies are independent
+    resolved_a["n_trials"] = 5
+    assert resolved_b["n_trials"] == _default_optuna_settings()["n_trials"]
+
+
+@pytest.mark.ci
+def test_get_optuna_settings_validates_study_kwargs_type():
+    with pytest.raises(TypeError, match="study_kwargs must be a dict."):
+        _get_optuna_settings({"study_kwargs": "invalid"}, "ml_l")
+
+
+@pytest.mark.ci
+def test_get_optuna_settings_validates_optimize_kwargs_type():
+    with pytest.raises(TypeError, match="optimize_kwargs must be a dict."):
+        _get_optuna_settings({"optimize_kwargs": "invalid"}, "ml_l")
+
+
+@pytest.mark.ci
+def test_get_optuna_settings_validates_callbacks_type():
+    with pytest.raises(TypeError, match="callbacks must be a sequence of callables or None."):
+        _get_optuna_settings({"callbacks": "invalid"}, "ml_l")
+
+
+@pytest.mark.ci
+def test_create_objective_requires_dict_params():
+    x = np.asarray(dml_data.x)
+    y = np.asarray(dml_data.y)
+
+    def bad_param_func(trial):
+        return ["not-a-dict"]
+
+    objective = _create_objective(
+        bad_param_func,
+        Lasso(),
+        x,
+        y,
+        resolve_optuna_cv(2),
+        "neg_mean_squared_error",
+    )
+    msg = (
+        "param function must return a dict. Got list. Example: def params(trial): "
+        "return {'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1)}"
+    )
+    with pytest.raises(TypeError, match=re.escape(msg)):
+        objective(None)
+
+
+@pytest.mark.ci
+def test_dml_tune_optuna_raises_when_no_trials_complete():
+    class FailingRegressor(BaseEstimator, RegressorMixin):
+        def fit(self, x, y):
+            raise ValueError("fail")
+
+        def predict(self, x):
+            return np.zeros(x.shape[0])
+
+    x = np.asarray(dml_data.x)
+    y = np.asarray(dml_data.y)
+    optuna_settings = {
+        "n_trials": 1,
+        "catch": (ValueError,),
+        "study_kwargs": {},
+        "optimize_kwargs": {},
+    }
+    with pytest.raises(
+        RuntimeError,
+        match="Optuna optimization failed to produce any complete trials.",
+    ):
+        _dml_tune_optuna(
+            y,
+            x,
+            FailingRegressor(),
+            lambda trial: {},
+            "neg_mean_squared_error",
+            2,
+            optuna_settings,
+            "ml_l",
+            "ml_l",
+        )
diff --git a/doubleml/tests/test_exceptions.py b/doubleml/tests/test_exceptions.py
index 4fca5318..76b0ff12 100644
--- a/doubleml/tests/test_exceptions.py
+++ b/doubleml/tests/test_exceptions.py
@@ -1153,6 +1153,7 @@ def test_doubleml_sensitivity_inputs():
         _ = dml_irm._set_sensitivity_elements(sensitivity_elements=sensitivity_elements, i_rep=0, i_treat=0)
 
 
+@pytest.mark.ci
 def test_doubleml_sensitivity_reestimation_warning():
     dml_irm = DoubleMLIRM(
         dml_data_irm, Lasso(), LogisticRegression(), ps_processor_config=PSProcessorConfig(clipping_threshold=0.1)
@@ -1166,6 +1167,7 @@ def test_doubleml_sensitivity_reestimation_warning():
         dml_irm._validate_sensitivity_elements()
 
 
+@pytest.mark.ci
 def test_doubleml_sensitivity_summary():
     dml_irm = DoubleMLIRM(
         dml_data_irm, Lasso(), LogisticRegression(), ps_processor_config=PSProcessorConfig(clipping_threshold=0.1)
diff --git a/doubleml/tests/test_framework_exceptions.py b/doubleml/tests/test_framework_exceptions.py
index f562f98d..e8440650 100644
--- a/doubleml/tests/test_framework_exceptions.py
+++ b/doubleml/tests/test_framework_exceptions.py
@@ -167,6 +167,7 @@ def test_input_exceptions():
         framework_names.treatment_names = ["test", "test2", "test3"]
 
 
+@pytest.mark.ci
 def test_operation_exceptions():
     # addition
     msg = "Unsupported operand type: <class 'float'>"
diff --git a/doubleml/tests/test_nonlinear_score_mixin.py b/doubleml/tests/test_nonlinear_score_mixin.py
index d68785aa..ebc4f192 100644
--- a/doubleml/tests/test_nonlinear_score_mixin.py
+++ b/doubleml/tests/test_nonlinear_score_mixin.py
@@ -162,6 +162,9 @@ def _nuisance_tuning(
     ):
         pass
 
+    def _nuisance_tuning_optuna(self, optuna_params, scoring_methods, cv, optuna_settings):
+        pass
+
     def _sensitivity_element_est(self, preds):
         pass
 
diff --git a/doubleml/tests/test_optuna_multi_wrappers.py b/doubleml/tests/test_optuna_multi_wrappers.py
new file mode 100644
index 00000000..d484b4ef
--- /dev/null
+++ b/doubleml/tests/test_optuna_multi_wrappers.py
@@ -0,0 +1,205 @@
+from unittest.mock import MagicMock
+
+import numpy as np
+import pandas as pd
+import pytest
+from sklearn.linear_model import LinearRegression, LogisticRegression
+
+import doubleml as dml
+from doubleml.data import DoubleMLPanelData
+from doubleml.did.datasets import make_did_CS2021
+from doubleml.irm.datasets import make_irm_data, make_irm_data_discrete_treatments
+from doubleml.tests._utils_tune_optuna import _basic_optuna_settings, _small_tree_params
+
+
+def _build_apos_object():
+    np.random.seed(3141)
+    data = make_irm_data_discrete_treatments(n_obs=40, n_levels=3, random_state=42)
+    x = data["x"]
+    y = data["y"]
+    d = data["d"]
+    columns = ["y", "d"] + [f"x{i + 1}" for i in range(x.shape[1])]
+    df = pd.DataFrame(np.column_stack((y, d, x)), columns=columns)
+    dml_data = dml.DoubleMLData(df, "y", "d")
+
+    ml_g = LinearRegression()
+    ml_m = LogisticRegression(max_iter=200, multi_class="auto")
+
+    return dml.DoubleMLAPOS(
+        dml_data,
+        ml_g=ml_g,
+        ml_m=ml_m,
+        treatment_levels=[0, 1, 2],
+        n_folds=2,
+        n_rep=1,
+    )
+
+
+def _build_qte_object():
+    np.random.seed(3141)
+    dml_data = make_irm_data(n_obs=80, dim_x=5)
+    ml = LogisticRegression(max_iter=200, multi_class="auto")
+
+    return dml.DoubleMLQTE(
+        dml_data,
+        ml_g=ml,
+        ml_m=ml,
+        quantiles=[0.25, 0.75],
+        n_folds=2,
+        n_rep=1,
+    )
+
+
+def _build_did_multi_object():
+    np.random.seed(3141)
+    df = make_did_CS2021(n_obs=40, n_periods=4, time_type="datetime")
+    x_cols = [col for col in df.columns if col.startswith("Z")]
+    dml_panel = DoubleMLPanelData(df, y_col="y", d_cols="d", t_col="t", id_col="id", x_cols=x_cols)
+
+    ml_g = LinearRegression()
+    ml_m = LogisticRegression(max_iter=200, multi_class="auto")
+
+    return dml.did.DoubleMLDIDMulti(
+        obj_dml_data=dml_panel,
+        ml_g=ml_g,
+        ml_m=ml_m,
+        control_group="never_treated",
+        n_folds=2,
+        n_rep=1,
+        panel=True,
+    )
+
+
+def _collect_param_names(dml_obj):
+    if hasattr(dml_obj, "params_names") and dml_obj.params_names:
+        return dml_obj.params_names
+    learner_dict = getattr(dml_obj, "_learner", None)
+    if isinstance(learner_dict, dict) and learner_dict:
+        return list(learner_dict.keys())
+    return ["ml_g"]
+
+
+def _make_tune_kwargs(dml_obj, return_tune_res=True):
+    optuna_params = {name: _small_tree_params for name in _collect_param_names(dml_obj)}
+    return {
+        "ml_param_space": optuna_params,
+        "cv": 3,
+        "set_as_params": False,
+        "return_tune_res": return_tune_res,
+        "optuna_settings": _basic_optuna_settings({"n_trials": 2}),
+        "scoring_methods": None,
+    }
+
+
+@pytest.fixture
+def apos_obj():
+    return _build_apos_object()
+
+
+@pytest.fixture
+def qte_obj():
+    return _build_qte_object()
+
+
+@pytest.fixture
+def did_multi_obj():
+    return _build_did_multi_object()
+
+
+@pytest.mark.ci
+def test_doubleml_apos_tune_ml_models_collects_results(apos_obj):
+    dml_obj = apos_obj
+    mocks = []
+    expected_payload = []
+
+    for idx in range(dml_obj.n_treatment_levels):
+        mock_model = MagicMock()
+        payload = {"params": f"level-{idx}"}
+        mock_model.tune_ml_models.return_value = [payload]
+        mocks.append(mock_model)
+        expected_payload.append(payload)
+
+    dml_obj._modellist = mocks
+
+    tune_kwargs = _make_tune_kwargs(dml_obj)
+
+    res = dml_obj.tune_ml_models(**tune_kwargs)
+    assert res == expected_payload
+    for mock in mocks:
+        mock.tune_ml_models.assert_called_once_with(**tune_kwargs)
+
+    for mock in mocks:
+        mock.reset_mock()
+    tune_kwargs_nores = _make_tune_kwargs(dml_obj, return_tune_res=False)
+
+    assert dml_obj.tune_ml_models(**tune_kwargs_nores) is dml_obj
+    for mock in mocks:
+        mock.tune_ml_models.assert_called_once_with(**tune_kwargs_nores)
+
+
+@pytest.mark.ci
+def test_doubleml_qte_tune_ml_models_returns_quantile_results(qte_obj):
+    dml_obj = qte_obj
+    modellist_0 = []
+    modellist_1 = []
+    expected_payload = []
+
+    for idx in range(dml_obj.n_quantiles):
+        mock_0 = MagicMock()
+        mock_1 = MagicMock()
+        payload_0 = {"params": f"quantile-{idx}-treatment-0"}
+        payload_1 = {"params": f"quantile-{idx}-treatment-1"}
+        mock_0.tune_ml_models.return_value = [payload_0]
+        mock_1.tune_ml_models.return_value = [payload_1]
+        modellist_0.append(mock_0)
+        modellist_1.append(mock_1)
+        expected_payload.append({"treatment_0": payload_0, "treatment_1": payload_1})
+
+    dml_obj._modellist_0 = modellist_0
+    dml_obj._modellist_1 = modellist_1
+
+    tune_kwargs = _make_tune_kwargs(dml_obj)
+
+    res = dml_obj.tune_ml_models(**tune_kwargs)
+    assert res == expected_payload
+    for mock in modellist_0 + modellist_1:
+        mock.tune_ml_models.assert_called_once_with(**tune_kwargs)
+
+    for mock in modellist_0 + modellist_1:
+        mock.reset_mock()
+    tune_kwargs_nores = _make_tune_kwargs(dml_obj, return_tune_res=False)
+
+    assert dml_obj.tune_ml_models(**tune_kwargs_nores) is dml_obj
+    for mock in modellist_0 + modellist_1:
+        mock.tune_ml_models.assert_called_once_with(**tune_kwargs_nores)
+
+
+@pytest.mark.ci
+def test_doubleml_did_multi_tune_ml_models_handles_all_group_time_models(did_multi_obj):
+    dml_obj = did_multi_obj
+    mocks = []
+    expected_payload = []
+
+    for idx in range(len(dml_obj.modellist)):
+        mock_model = MagicMock()
+        payload = {"params": f"gt-{idx}"}
+        mock_model.tune_ml_models.return_value = [payload]
+        mocks.append(mock_model)
+        expected_payload.append(payload)
+
+    dml_obj._modellist = mocks
+
+    tune_kwargs = _make_tune_kwargs(dml_obj)
+
+    res = dml_obj.tune_ml_models(**tune_kwargs)
+    assert res == expected_payload
+    for mock in mocks:
+        mock.tune_ml_models.assert_called_once_with(**tune_kwargs)
+
+    for mock in mocks:
+        mock.reset_mock()
+    tune_kwargs_nores = _make_tune_kwargs(dml_obj, return_tune_res=False)
+
+    assert dml_obj.tune_ml_models(**tune_kwargs_nores) is dml_obj
+    for mock in mocks:
+        mock.tune_ml_models.assert_called_once_with(**tune_kwargs_nores)
diff --git a/doubleml/tests/test_optuna_settings_validation.py b/doubleml/tests/test_optuna_settings_validation.py
new file mode 100644
index 00000000..8414ff06
--- /dev/null
+++ b/doubleml/tests/test_optuna_settings_validation.py
@@ -0,0 +1,143 @@
+import numpy as np
+import pytest
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+
+import doubleml as dml
+from doubleml.did.datasets import make_did_SZ2020
+from doubleml.irm.datasets import make_irm_data
+from doubleml.plm.datasets import make_pliv_CHS2015, make_plr_CCDDHNR2018
+
+
+def _constant_params(_trial):
+    return {}
+
+
+@pytest.mark.ci
+def test_optuna_settings_invalid_key_for_irm_raises():
+    np.random.seed(2024)
+    dml_data = make_irm_data(n_obs=40, dim_x=2)
+
+    ml_g = DecisionTreeRegressor(random_state=11)
+    ml_m = DecisionTreeClassifier(random_state=22)
+    dml_irm = dml.DoubleMLIRM(dml_data, ml_g, ml_m, n_folds=2, n_rep=1)
+
+    optuna_params = {"ml_g0": _constant_params, "ml_g1": _constant_params, "ml_m": _constant_params}
+    invalid_settings = {"ml_l": {"n_trials": 5}}
+
+    with pytest.raises(ValueError, match="ml_l"):
+        dml_irm.tune_ml_models(ml_param_space=optuna_params, optuna_settings=invalid_settings)
+
+
+@pytest.mark.ci
+def test_optuna_settings_invalid_key_for_plr_raises():
+    np.random.seed(2025)
+    dml_data = make_plr_CCDDHNR2018(n_obs=80, dim_x=4)
+
+    ml_l = DecisionTreeRegressor(random_state=33)
+    ml_m = DecisionTreeRegressor(random_state=44)
+    dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, n_rep=1)
+
+    optuna_params = {"ml_l": _constant_params, "ml_m": _constant_params}
+    invalid_settings = {"ml_g0": {"n_trials": 5}}
+
+    with pytest.raises(ValueError, match="ml_g0"):
+        dml_plr.tune_ml_models(ml_param_space=optuna_params, optuna_settings=invalid_settings)
+
+
+@pytest.mark.ci
+def test_optuna_settings_invalid_key_for_pliv_raises():
+    np.random.seed(2026)
+    dml_data = make_pliv_CHS2015(n_obs=80, dim_x=4, dim_z=2)
+
+    ml_l = DecisionTreeRegressor(random_state=55)
+    ml_m = DecisionTreeRegressor(random_state=66)
+    ml_r = DecisionTreeRegressor(random_state=77)
+    dml_pliv = dml.DoubleMLPLIV(dml_data, ml_l, ml_m, ml_r, n_folds=2, n_rep=1)
+
+    optuna_params = {
+        "ml_l": _constant_params,
+        "ml_m_Z1": _constant_params,
+        "ml_m_Z2": _constant_params,
+        "ml_r": _constant_params,
+    }
+
+    invalid_settings = {"ml_g": {"n_trials": 5}}
+
+    with pytest.raises(ValueError, match="ml_g"):
+        dml_pliv.tune_ml_models(ml_param_space=optuna_params, optuna_settings=invalid_settings)
+
+
+@pytest.mark.ci
+def test_optuna_settings_invalid_key_for_did_raises():
+    np.random.seed(2027)
+    dml_data = make_did_SZ2020(n_obs=120, dgp_type=1, return_type="DoubleMLDIDData")
+
+    ml_g = DecisionTreeRegressor(random_state=88)
+    ml_m = DecisionTreeClassifier(random_state=99)
+    dml_did = dml.DoubleMLDID(dml_data, ml_g, ml_m, score="observational", n_folds=2, n_rep=1)
+
+    optuna_params = {"ml_g0": _constant_params, "ml_g1": _constant_params, "ml_m": _constant_params}
+    invalid_settings = {"ml_l": {"n_trials": 5}}
+
+    with pytest.raises(ValueError, match="ml_l"):
+        dml_did.tune_ml_models(ml_param_space=optuna_params, optuna_settings=invalid_settings)
+
+
+@pytest.mark.ci
+def test_optuna_params_invalid_key_for_irm_raises():
+    np.random.seed(2028)
+    dml_data = make_irm_data(n_obs=40, dim_x=2)
+
+    ml_g = DecisionTreeRegressor(random_state=99)
+    ml_m = DecisionTreeClassifier(random_state=101)
+    dml_irm = dml.DoubleMLIRM(dml_data, ml_g, ml_m, n_folds=2, n_rep=1)
+
+    optuna_params = {"ml_g0": _constant_params, "ml_g1": _constant_params, "ml_m": _constant_params, "ml_l": _constant_params}
+
+    with pytest.raises(ValueError, match="ml_l"):
+        dml_irm.tune_ml_models(ml_param_space=optuna_params)
+
+
+@pytest.mark.ci
+def test_optuna_params_invalid_key_for_plr_raises():
+    np.random.seed(2029)
+    dml_data = make_plr_CCDDHNR2018(n_obs=80, dim_x=4)
+
+    ml_l = DecisionTreeRegressor(random_state=111)
+    ml_m = DecisionTreeRegressor(random_state=222)
+    dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, n_rep=1)
+
+    optuna_params = {"ml_l": _constant_params, "ml_m": _constant_params, "ml_g0": _constant_params}
+
+    with pytest.raises(ValueError, match="ml_g0"):
+        dml_plr.tune_ml_models(ml_param_space=optuna_params)
+
+
+@pytest.mark.ci
+def test_optuna_params_invalid_key_for_pliv_raises():
+    np.random.seed(2030)
+    dml_data = make_pliv_CHS2015(n_obs=80, dim_x=4, dim_z=2)
+
+    ml_l = DecisionTreeRegressor(random_state=333)
+    ml_m = DecisionTreeRegressor(random_state=444)
+    ml_r = DecisionTreeRegressor(random_state=555)
+    dml_pliv = dml.DoubleMLPLIV(dml_data, ml_l, ml_m, ml_r, n_folds=2, n_rep=1)
+
+    optuna_params = {"ml_l": _constant_params, "ml_m": _constant_params, "ml_r": _constant_params, "ml_g": _constant_params}
+
+    with pytest.raises(ValueError, match="ml_g"):
+        dml_pliv.tune_ml_models(ml_param_space=optuna_params)
+
+
+@pytest.mark.ci
+def test_optuna_params_invalid_key_for_did_raises():
+    np.random.seed(2031)
+    dml_data = make_did_SZ2020(n_obs=100, dgp_type=1, return_type="DoubleMLDIDData")
+
+    ml_g = DecisionTreeRegressor(random_state=666)
+    dml_did = dml.DoubleMLDID(dml_data, ml_g, score="experimental", n_folds=2, n_rep=1)
+
+    optuna_params = {"ml_g0": _constant_params, "ml_g1": _constant_params, "ml_l": _constant_params}
+
+    with pytest.raises(ValueError, match="ml_l"):
+        dml_did.tune_ml_models(ml_param_space=optuna_params)
diff --git a/doubleml/tests/test_optuna_tune_multiple_treatments.py b/doubleml/tests/test_optuna_tune_multiple_treatments.py
new file mode 100644
index 00000000..70328747
--- /dev/null
+++ b/doubleml/tests/test_optuna_tune_multiple_treatments.py
@@ -0,0 +1,61 @@
+import numpy as np
+import pytest
+from sklearn.tree import DecisionTreeRegressor
+
+import doubleml as dml
+from doubleml.plm.datasets import make_plr_CCDDHNR2018
+from doubleml.tests._utils_tune_optuna import (
+    _SAMPLER_CASES,
+    _assert_tree_params,
+    _basic_optuna_settings,
+    _small_tree_params,
+)
+
+
+@pytest.mark.ci
+@pytest.mark.parametrize("sampler_name,optuna_sampler", _SAMPLER_CASES, ids=[case[0] for case in _SAMPLER_CASES])
+def test_doubleml_plr_optuna_multiple_treatments(sampler_name, optuna_sampler):
+    np.random.seed(3141)
+    alpha = 0.5
+    data = make_plr_CCDDHNR2018(n_obs=500, dim_x=5, alpha=alpha, return_type="DataFrame")
+    treats = ["d", "X1"]
+    dml_data = dml.DoubleMLData(
+        data, y_col="y", d_cols=treats, x_cols=[col for col in data.columns if col not in ["y", "d", "X1"]]
+    )
+
+    ml_l = DecisionTreeRegressor(random_state=123)
+    ml_m = DecisionTreeRegressor(random_state=456)
+
+    dml_plr = dml.DoubleMLPLR(dml_data, ml_l, ml_m, n_folds=2, score="partialling out")
+    dml_plr.fit()
+    untuned_score = dml_plr.evaluate_learners()
+
+    optuna_params = {"ml_l": _small_tree_params, "ml_m": _small_tree_params}
+
+    tune_res = dml_plr.tune_ml_models(
+        ml_param_space=optuna_params,
+        optuna_settings=_basic_optuna_settings({"sampler": optuna_sampler}),
+        return_tune_res=True,
+    )
+
+    dml_plr.fit()
+    tuned_score = dml_plr.evaluate_learners()
+
+    for i, _ in enumerate(treats):
+        tuned_params_l = tune_res[i]["ml_l"].best_params
+        tuned_params_m = tune_res[i]["ml_m"].best_params
+
+        _assert_tree_params(tuned_params_l)
+        _assert_tree_params(tuned_params_m)
+
+        # ensure results contain optuna objects and best params
+        assert isinstance(tune_res[i], dict)
+        assert set(tune_res[i].keys()) == {"ml_l", "ml_m"}
+        assert hasattr(tune_res[i]["ml_l"], "best_params")
+        assert tune_res[i]["ml_l"].best_params["max_depth"] == tuned_params_l["max_depth"]
+        assert hasattr(tune_res[i]["ml_m"], "best_params")
+        assert tune_res[i]["ml_m"].best_params["max_depth"] == tuned_params_m["max_depth"]
+
+        # ensure tuning improved RMSE
+        assert tuned_score["ml_l"].squeeze()[i] < untuned_score["ml_l"].squeeze()[i]
+        assert tuned_score["ml_m"].squeeze()[i] < untuned_score["ml_m"].squeeze()[i]
diff --git a/doubleml/utils/__init__.py b/doubleml/utils/__init__.py
index 4f6269dd..868429da 100644
--- a/doubleml/utils/__init__.py
+++ b/doubleml/utils/__init__.py
@@ -2,6 +2,7 @@
 The :mod:`doubleml.utils` module includes various utilities.
 """
 
+from ._tune_optuna import DMLOptunaResult
 from .blp import DoubleMLBLP
 from .dummy_learners import DMLDummyClassifier, DMLDummyRegressor
 from .gain_statistics import gain_statistics
@@ -13,6 +14,7 @@
 __all__ = [
     "DMLDummyRegressor",
     "DMLDummyClassifier",
+    "DMLOptunaResult",
     "DoubleMLResampling",
     "DoubleMLClusterResampling",
     "DoubleMLBLP",
diff --git a/doubleml/utils/_tune_optuna.py b/doubleml/utils/_tune_optuna.py
new file mode 100644
index 00000000..36d8f7e7
--- /dev/null
+++ b/doubleml/utils/_tune_optuna.py
@@ -0,0 +1,708 @@
+"""
+Optuna-based hyperparameter tuning utilities for DoubleML.
+
+This module provides Optuna-specific functionality for hyperparameter optimization,
+decoupled from sklearn-based grid/randomized search.
+
+Logging
+-------
+This module uses Python's logging module. The logger is synchronized with Optuna's logging
+system. You can control the verbosity by:
+1. Setting the logging level for 'doubleml.utils._tune_optuna'
+2. Passing 'verbosity' in optuna_settings (takes precedence)
+
+Example:
+    >>> import logging
+    >>> logging.basicConfig(level=logging.INFO)
+    >>> # Now you'll see tuning progress and information
+"""
+
+import logging
+from collections.abc import Iterable
+from copy import deepcopy
+from dataclasses import dataclass
+from pprint import pformat
+from typing import Callable, Union
+
+import numpy as np
+import optuna
+from sklearn.base import clone, is_classifier, is_regressor
+from sklearn.model_selection import BaseCrossValidator, KFold, cross_val_score
+
+logger = logging.getLogger(__name__)
+
+_OPTUNA_KFOLD_RANDOM_STATE = 42
+
+_OPTUNA_DEFAULT_SETTINGS = {
+    "n_trials": 100,
+    "timeout": None,
+    "direction": "maximize",
+    "study_kwargs": {},
+    "optimize_kwargs": {},
+    "sampler": None,
+    "callbacks": None,
+    "catch": (),
+    "show_progress_bar": False,
+    "gc_after_trial": False,
+    "study": None,
+    "n_jobs_optuna": None,
+    "verbosity": None,
+}
+
+
+@dataclass
+class DMLOptunaResult:
+    """
+    Container for Optuna search results.
+
+    This dataclass holds the results of Optuna-based hyperparameter tuning,
+    including the best estimator, parameters, score, and the complete study history.
+
+    Parameters
+    ----------
+    learner_name : str
+        Name of the learner passed (e.g., 'ml_g').
+
+    params_name : str
+        Name of the nuisance parameter being tuned (e.g., 'ml_g0').
+
+    best_estimator : object
+        The estimator instance with the best found hyperparameters set and fitted on the
+        full dataset used during tuning.
+
+    best_params : dict
+        The best hyperparameters found during tuning.
+
+    best_score : float
+        The best average cross-validation score achieved during tuning.
+
+    scoring_method : str or callable
+        The scoring method used during tuning.
+
+    study : optuna.study.Study
+        The Optuna study object containing the tuning history.
+
+    tuned : bool
+        Indicates whether tuning was performed (True) or skipped (False).
+    """
+
+    learner_name: str
+    params_name: str
+    best_estimator: object
+    best_params: dict
+    best_score: float
+    scoring_method: Union[str, Callable]
+    study: optuna.study.Study
+    tuned: bool
+
+    def __str__(self):
+        core_summary = self._core_summary_str()
+        params_summary = self._best_params_str()
+        res = (
+            "================== DMLOptunaResult ==================\n"
+            + core_summary
+            + "\n------------------ Best parameters    ------------------\n"
+            + params_summary
+        )
+        return res
+
+    def _core_summary_str(self):
+        scoring_repr = (
+            self.scoring_method.__name__
+            if callable(self.scoring_method) and hasattr(self.scoring_method, "__name__")
+            else str(self.scoring_method)
+        )
+        summary = (
+            f"Learner name: {self.learner_name}\n"
+            f"Params name: {self.params_name}\n"
+            f"Tuned: {self.tuned}\n"
+            f"Best score: {self.best_score}\n"
+            f"Scoring method: {scoring_repr}\n"
+        )
+        return summary
+
+    def _best_params_str(self):
+        if not self.best_params:
+            return "No best parameters available.\n"
+        formatted = pformat(self.best_params, sort_dicts=True, compact=True)
+        return f"{formatted}\n"
+
+
+OPTUNA_GLOBAL_SETTING_KEYS = frozenset(_OPTUNA_DEFAULT_SETTINGS.keys())
+
+TUNE_ML_MODELS_DOC = """
+        Hyperparameter-tuning for DoubleML models using Optuna.
+
+        The hyperparameter-tuning is performed using Optuna's Bayesian optimization.
+        Unlike grid/randomized search, Optuna tuning is performed once on the whole dataset
+        using cross-validation, and the same optimal hyperparameters are used for all folds.
+
+        Parameters
+        ----------
+        ml_param_space : dict
+            A dict with a parameter grid function for each nuisance model
+            (see attribute ``params_names``) or for each learner (see attribute ``learner_names``).
+            Mixed specification are allowed, i.e., some nuisance models can share the same learner.
+            For mixed specifications, learner-specific settings will be overwritten by nuisance model-specific settings.
+
+            Each parameter grid must be specified as a callable function that takes an Optuna trial
+            and returns a dictionary of hyperparameters.
+
+            For PLR models, keys should be: ``'ml_l'``, ``'ml_m'`` (and optionally ``'ml_g'`` for IV-type score).
+            For IRM models, keys should be: ``'ml_g0'``, ``'ml_g1'`` (or just ``'ml_g'`` for both), ``'ml_m'``.
+
+            Example:
+
+            .. code-block:: python
+
+                def ml_l_params(trial):
+                    return {
+                        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
+                        'n_estimators': trial.suggest_int('n_estimators', 100, 500, step=50),
+                        'num_leaves': trial.suggest_int('num_leaves', 20, 256),
+                        'min_child_samples': trial.suggest_int('min_child_samples', 5, 100),
+                    }
+
+                ml_param_space = {'ml_l': ml_l_params, 'ml_m': ml_m_params}
+
+            Note: Optuna tuning is performed globally (not fold-specific) to ensure consistent
+            hyperparameters across all folds.
+
+        scoring_methods : None or dict
+            The scoring method used to evaluate the predictions. The scoring method must be set per
+            nuisance model via a dict (see attribute ``params_names`` for the keys).
+            If None, the estimator's score method is used.
+            Default is ``None``.
+
+        cv : int, cross-validation splitter, or iterable of (train_indices, test_indices)
+            Cross-validation strategy used for Optuna-based tuning. If an integer is provided, a shuffled
+            :class:`sklearn.model_selection.KFold` with the specified number of splits and ``random_state=42`` is used.
+            Custom splitters must implement ``split`` (and ideally ``get_n_splits``), or be an iterable yielding
+            ``(train_indices, test_indices)`` pairs. Default is ``5``.
+
+        set_as_params : bool
+            Indicates whether the hyperparameters should be set in order to be used when :meth:`fit` is called.
+            Default is ``True``.
+
+        return_tune_res : bool
+            Indicates whether detailed tuning results should be returned.
+            Default is ``False``.
+
+        optuna_settings : None or dict
+            Optional configuration passed to the Optuna tuner. Supports global settings
+            as well as learner-specific overrides (using the keys from ``ml_param_space``).
+            The dictionary can contain entries corresponding to Optuna's study and optimize
+            configuration such as:
+
+            - ``n_trials`` (int): Number of optimization trials (default: 100)
+            - ``timeout`` (float): Time limit in seconds for the study (default: None)
+            - ``direction`` (str): Optimization direction, 'maximize' or 'minimize'.
+              For sklearn scorers, use 'maximize' for negative metrics like 'neg_mean_squared_error'
+              (since -0.1 > -0.2 means better performance). Can be set globally or per learner.
+              (default: 'maximize')
+            - ``sampler`` (optuna.samplers.BaseSampler): Optuna sampler instance (default: None, uses TPE)
+            - ``callbacks`` (list): List of callback functions (default: None)
+            - ``show_progress_bar`` (bool): Show progress bar during optimization (default: False)
+            - ``n_jobs_optuna`` (int): Number of parallel trials (default: None)
+            - ``verbosity`` (int): Optuna logging verbosity level (default: None)
+            - ``study`` (optuna.study.Study): Pre-created study instance (default: None)
+            - ``study_kwargs`` (dict): Additional kwargs for study creation (default: {})
+            - ``optimize_kwargs`` (dict): Additional kwargs for study.optimize() (default: {})
+
+            To set direction per learner (similar to ``scoring_methods``):
+
+            .. code-block:: python
+
+                optuna_settings = {
+                    'n_trials': 50,
+                    'direction': 'maximize',  # Global default
+                    'ml_g0': {'direction': 'maximize'},  # Per-learner override
+                    'ml_m': {'n_trials': 100, 'direction': 'maximize'}
+                }
+
+            Defaults to ``None``.
+
+        Returns
+        -------
+        self : object
+            Returned if ``return_tune_res`` is ``False``.
+
+        tune_res: list
+            A list containing detailed tuning results and the proposed hyperparameters.
+            Returned if ``return_tune_res`` is ``True``.
+
+        Examples
+        --------
+        >>> import numpy as np
+        >>> from doubleml import DoubleMLData, DoubleMLPLR
+        >>> from doubleml.plm.datasets import make_plr_CCDDHNR2018
+        >>> from lightgbm import LGBMRegressor
+        >>> import optuna
+        >>> # Generate data
+        >>> np.random.seed(42)
+        >>> data = make_plr_CCDDHNR2018(n_obs=500, dim_x=20, return_type='DataFrame')
+        >>> dml_data = DoubleMLData(data, 'y', 'd')
+        >>> # Initialize model
+        >>> dml_plr = DoubleMLPLR(
+        ...    dml_data,
+        ...    LGBMRegressor(n_estimators=50, verbose=-1, random_state=42),
+        ...    LGBMRegressor(n_estimators=50, verbose=-1, random_state=42)
+        ... )
+        >>> # Define parameter grid functions
+        >>> def ml_l_params(trial):
+        ...     return {
+        ...         'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
+        ...     }
+        >>> def ml_m_params(trial):
+        ...     return {
+        ...         'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
+        ...     }
+        >>> ml_param_space = {'ml_l': ml_l_params, 'ml_m': ml_m_params}
+        >>> # Tune with TPE sampler
+        >>> optuna_settings = {
+        ...     'n_trials': 5,
+        ...     'sampler': optuna.samplers.TPESampler(seed=42),
+        ... }
+        >>> tune_res = dml_plr.tune_ml_models(ml_param_space, optuna_settings=optuna_settings, return_tune_res=True)
+        >>> print(tune_res[0]['ml_l'].best_params)  # doctest: +SKIP
+        {'learning_rate': 0.03907122389107094}
+        >>> # Fit and get results
+        >>> dml_plr.fit().summary # doctest: +SKIP
+              coef   std err          t         P>|t|     2.5 %    97.5 %
+        d  0.57436  0.045206  12.705519  5.510257e-37  0.485759  0.662961
+        >>> # Example with scoring methods and directions
+        >>> scoring_methods = {
+        ...     'ml_l': 'neg_mean_squared_error',  # Negative metric
+        ...     'ml_m': 'neg_mean_squared_error'
+        ... }
+        >>> optuna_settings = {
+        ...     'n_trials': 50,
+        ...     'direction': 'maximize',  # Maximize negative MSE (minimize MSE)
+        ...     'sampler': optuna.samplers.TPESampler(seed=42),
+        ... }
+        >>> tune_res = dml_plr.tune_ml_models(ml_param_space, scoring_methods=scoring_methods,
+        ...                                   optuna_settings=optuna_settings, return_tune_res=True)
+        >>> print(tune_res[0]['ml_l'].best_params)  # doctest: +SKIP
+        {'learning_rate': 0.04300012336462904}
+        >>> dml_plr.fit().summary # doctest: +SKIP
+               coef   std err          t         P>|t|     2.5 %    97.5 %
+        d  0.574796  0.045062  12.755721  2.896820e-37  0.486476  0.663115
+        """
+
+
+def _default_optuna_settings():
+    return deepcopy(_OPTUNA_DEFAULT_SETTINGS)
+
+
+def _resolve_optuna_scoring(scoring_method, learner, params_name):
+    """Resolve the scoring argument for an Optuna-tuned learner.
+
+    Parameters
+    ----------
+    scoring_method : str, callable or None
+        Scoring argument supplied by the caller. ``None`` triggers automatic
+        fallback selection.
+    learner : estimator
+        Estimator instance that will be tuned.
+    params_name : str
+        Identifier used for logging and error messages.
+
+    Returns
+    -------
+    tuple
+    A pair consisting of the scoring argument to pass to
+    :func:`sklearn.model_selection.cross_val_score` (``None`` means use the
+    estimator's default ``score``) and a human-readable message describing
+    the decision for logging purposes.
+    """
+
+    if scoring_method is not None:
+        message = f"Using provided scoring method: {scoring_method} for learner '{params_name}'"
+        return scoring_method, message
+
+    if is_regressor(learner):
+        message = "No scoring method provided, using 'neg_root_mean_squared_error' (RMSE) " f"for learner '{params_name}'."
+        return "neg_root_mean_squared_error", message
+
+    if is_classifier(learner):
+        message = f"No scoring method provided, using 'neg_log_loss' " f"for learner '{params_name}'."
+        return "neg_log_loss", message
+
+    raise ValueError(
+        f"No scoring method provided and estimator type could not be inferred. Please provide a scoring_method for learner "
+        f"'{params_name}'."
+    )
+
+
+def resolve_optuna_cv(cv):
+    """Normalize the ``cv`` argument for Optuna-based tuning."""
+
+    if cv is None:
+        cv = 5
+
+    if isinstance(cv, int):
+        if cv < 2:
+            raise ValueError(f"The number of folds used for tuning must be at least two. {cv} was passed.")
+        return KFold(n_splits=cv, shuffle=True, random_state=_OPTUNA_KFOLD_RANDOM_STATE)
+
+    if isinstance(cv, BaseCrossValidator):
+        return cv
+
+    if isinstance(cv, str):
+        raise TypeError("cv must not be provided as a string. Pass an integer or a cross-validation splitter.")
+
+    split_attr = getattr(cv, "split", None)
+    if callable(split_attr):
+        return cv
+
+    if isinstance(cv, Iterable):
+        cv_list = list(cv)
+        if not cv_list:
+            raise ValueError("cv iterable must not be empty.")
+        for split in cv_list:
+            if not isinstance(split, (tuple, list)) or len(split) != 2:
+                raise TypeError("cv iterable must yield (train_indices, test_indices) pairs.")
+        return cv_list
+
+    raise TypeError(
+        "cv must be an integer >= 2, a scikit-learn cross-validation splitter, or an iterable of "
+        "(train_indices, test_indices) pairs."
+    )
+
+
+def _check_tuning_inputs(
+    y,
+    x,
+    learner,
+    param_grid_func,
+    scoring_method,
+    cv,
+    params_name,
+):
+    """Validate Optuna tuning inputs and normalize the cross-validation splitter.
+
+    Parameters
+    ----------
+    y : np.ndarray
+        Target array used during tuning.
+    x : np.ndarray
+        Feature matrix used during tuning.
+    learner : estimator
+        Estimator that will be tuned.
+    param_grid_func : callable or None
+        Callback that samples hyperparameters from an Optuna trial.
+    scoring_method : str, callable or None
+    Scoring argument after applying :func:`doubleml.utils._tune_optuna._resolve_optuna_scoring`.
+    cv : int, cross-validation splitter or iterable
+        Cross-validation definition provided by the caller.
+    params_name : str
+        Name of the nuisance parameter for logging purposes.
+
+    Returns
+    -------
+    cross-validator or iterable
+        Cross-validation splitter compatible with
+        :func:`sklearn.model_selection.cross_val_score`.
+    """
+
+    if y.shape[0] != x.shape[0]:
+        raise ValueError(f"Features and target must contain the same number of observations for learner '{params_name}'.")
+    if y.size == 0:
+        raise ValueError(f"Empty target passed to Optuna tuner for learner '{params_name}'.")
+
+    if param_grid_func is not None and not callable(param_grid_func):
+        raise TypeError(
+            "param_grid must be a callable function that takes a trial and returns a dict. "
+            f"Got {type(param_grid_func).__name__} for learner '{params_name}'."
+        )
+
+    if scoring_method is not None and not callable(scoring_method) and not isinstance(scoring_method, str):
+        raise TypeError(
+            "scoring_method must be None, a string, a callable, accepted by scikit-learn. "
+            f"Got {type(scoring_method).__name__} for learner '{params_name}'."
+        )
+
+    if not hasattr(learner, "fit") or not hasattr(learner, "set_params"):
+        raise TypeError(f"Learner '{params_name}' must implement fit and set_params to be tuned with Optuna.")
+
+    return resolve_optuna_cv(cv)
+
+
+def _get_optuna_settings(optuna_settings, params_name):
+    """
+    Get Optuna settings, considering defaults, user-provided values, and learner-specific overrides.
+
+    Parameters
+    ----------
+    optuna_settings : dict or None
+        User-provided Optuna settings.
+    params_name : str
+        Name of the nuisance params to check for specific setting, e.g. `ml_g0` or `ml_g1` for `DoubleMLIRM`.
+
+    Returns
+    -------
+    dict
+        Resolved settings dictionary.
+    """
+    default_settings = _default_optuna_settings()
+
+    if optuna_settings is None:
+        return default_settings
+
+    if not isinstance(optuna_settings, dict):
+        raise TypeError("optuna_settings must be a dict or None.")
+
+    # Base settings are the user-provided settings filtered by default keys
+    base_settings = {key: value for key, value in optuna_settings.items() if key in OPTUNA_GLOBAL_SETTING_KEYS}
+    learner_or_params_keys = set(optuna_settings.keys()) - set(base_settings.keys())
+
+    # Find matching learner-specific settings, handles the case to match ml_g to ml_g0, ml_g1, etc.
+    learner_specific_settings = {}
+    for k in learner_or_params_keys:
+        if k in params_name and params_name != k:
+            learner_specific_settings = optuna_settings[k]
+
+    # set params specific settings
+    params_specific_settings = {}
+    if params_name in learner_or_params_keys:
+        params_specific_settings = optuna_settings[params_name]
+
+    # Merge settings: defaults < base < learner-specific < params_specific
+    resolved = default_settings.copy() | base_settings | learner_specific_settings | params_specific_settings
+
+    # Validate types
+    if not isinstance(resolved["study_kwargs"], dict):
+        raise TypeError("study_kwargs must be a dict.")
+    if not isinstance(resolved["optimize_kwargs"], dict):
+        raise TypeError("optimize_kwargs must be a dict.")
+    if resolved["callbacks"] is not None and not isinstance(resolved["callbacks"], (list, tuple)):
+        raise TypeError("callbacks must be a sequence of callables or None.")
+
+    return resolved
+
+
+def _create_study(settings, learner_name):
+    """
+    Create or retrieve an Optuna :class:`optuna.study.Study` instance.
+
+    Parameters
+    ----------
+    settings : dict
+        Resolved Optuna settings containing study configuration.
+    learner_name : str
+        Identifier used for logging the resolved study configuration.
+
+    Returns
+    -------
+    optuna.study.Study
+        The Optuna study object ready for optimization.
+    """
+
+    # Check if a study instance is provided directly
+    study_instance = settings.get("study")
+    if study_instance is not None:
+        return study_instance
+
+    # Build study kwargs from settings
+    study_kwargs = settings.get("study_kwargs", {}).copy()
+    if "direction" not in study_kwargs:
+        study_kwargs["direction"] = settings.get("direction", "maximize")
+        logger.info(f"Optuna study direction set to '{study_kwargs['direction']}' for learner '{learner_name}'.")
+    if settings.get("sampler") is not None:
+        study_kwargs["sampler"] = settings["sampler"]
+        logger.info(f"Using sampler {settings['sampler'].__class__.__name__} for learner '{learner_name}'.")
+    study_kwargs.setdefault("study_name", f"tune_{learner_name}")
+
+    return optuna.create_study(**study_kwargs)
+
+
+def _create_objective(param_grid_func, learner, x, y, cv, scoring_method):
+    """
+    Create an Optuna objective function for hyperparameter optimization.
+
+    Parameters
+    ----------
+    param_grid_func : callable
+        Function that takes an Optuna trial and returns a parameter dictionary.
+        Example: def params(trial): return {"learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1)}
+    learner : estimator
+        The machine learning model to tune.
+    x : np.ndarray
+        Features (full dataset).
+    y : np.ndarray
+        Target variable (full dataset).
+    cv : cross-validation generator
+        KFold or similar cross-validation splitter.
+    scoring_method : str, callable or None
+        Scoring argument for cross-validation. ``None`` delegates to the
+        estimator's default ``score`` implementation.
+
+    Returns
+    -------
+    callable
+        Objective function for Optuna optimization.
+    """
+
+    def objective(trial):
+        """Objective function for Optuna optimization."""
+        # Get parameters from the user-provided function
+        params = param_grid_func(trial)
+
+        if not isinstance(params, dict):
+            raise TypeError(
+                f"param function must return a dict. Got {type(params).__name__}. "
+                f"Example: def params(trial): return {{'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1)}}"
+            )
+
+        # Clone learner and set parameters
+        estimator = clone(learner).set_params(**params)
+
+        # Perform cross-validation on full dataset
+        scores = cross_val_score(
+            estimator,
+            x,
+            y,
+            cv=cv,
+            scoring=scoring_method,
+            error_score="raise",
+        )
+
+        # Return mean test score
+        return np.nanmean(scores)
+
+    return objective
+
+
+def _dml_tune_optuna(
+    y,
+    x,
+    learner,
+    param_grid_func,
+    scoring_method,
+    cv,
+    optuna_settings,
+    learner_name,
+    params_name,
+):
+    """
+    Tune hyperparameters using Optuna on the whole dataset with cross-validation.
+
+    Unlike the grid/randomized search which tunes separately for each fold, this function
+    tunes once on the full dataset and returns a single tuning result per learner.
+
+    Parameters
+    ----------
+    y : np.ndarray
+        Target variable (full dataset).
+    x : np.ndarray
+        Features (full dataset).
+    learner : estimator
+        The machine learning model to tune.
+    param_grid_func : callable
+        Function that takes an Optuna trial and returns a parameter dictionary.
+        Example: def params(trial): return {"learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1)}
+    scoring_method : str, callable or None
+        Scoring argument passed to cross-validation. ``None`` triggers an
+        automatic fallback chosen by :func:`_resolve_optuna_scoring`.
+    cv : int, cross-validation splitter, or iterable of (train_indices, test_indices)
+        Cross-validation strategy used during tuning. If an integer is provided, a shuffled
+        :class:`sklearn.model_selection.KFold` with the specified number of splits and ``random_state=42`` is used.
+    optuna_settings : dict or None
+        Optuna-specific settings.
+    learner_name : str
+        Name of the learner for logging and identification.
+    params_name : str or None
+        Name of the nuisance parameter for settings selection.
+
+    Returns
+    -------
+    DMLOptunaResult
+        A tuning result containing the optuna.Study object and further information.
+    """
+
+    scoring_method, scoring_message = _resolve_optuna_scoring(scoring_method, learner, params_name)
+    if scoring_message:
+        logger.info(scoring_message)
+
+    cv_splitter = _check_tuning_inputs(
+        y,
+        x,
+        learner,
+        param_grid_func,
+        scoring_method,
+        cv,
+        params_name=params_name,
+    )
+
+    if param_grid_func is None:
+        estimator = clone(learner)
+        estimator.fit(x, y)
+        best_params = estimator.get_params(deep=True)
+        return DMLOptunaResult(
+            learner_name=learner_name,
+            params_name=params_name,
+            best_estimator=estimator,
+            best_params=best_params,
+            best_score=np.nan,
+            scoring_method=scoring_method,
+            study=None,
+            tuned=False,
+        )
+
+    settings = _get_optuna_settings(optuna_settings, params_name)
+    # Set Optuna logging verbosity if specified
+    verbosity = settings.get("verbosity")
+    if verbosity is not None:
+        optuna.logging.set_verbosity(verbosity)
+
+    # Create the study
+    study = _create_study(settings, params_name)
+
+    # Optionally set metric names (commented out as there is a warning in Optuna about this)
+    # study.set_metric_names([f"{scoring_method}_{params_name}"])
+
+    # Create the objective function
+    objective = _create_objective(param_grid_func, learner, x, y, cv_splitter, scoring_method)
+
+    # Build optimize kwargs
+    optimize_kwargs = {
+        "n_trials": settings["n_trials"],
+        "timeout": settings["timeout"],
+        "callbacks": settings["callbacks"],
+        "catch": settings["catch"],
+        "show_progress_bar": settings["show_progress_bar"],
+        "gc_after_trial": settings["gc_after_trial"],
+        "n_jobs": settings["n_jobs_optuna"],
+    }
+    optimize_kwargs.update(settings.get("optimize_kwargs", {}))
+
+    # Filter out None values, but keep boolean flags
+    final_optimize_kwargs = {
+        k: v for k, v in optimize_kwargs.items() if v is not None or k in ["show_progress_bar", "gc_after_trial"]
+    }
+
+    # Run optimization once on the full dataset
+    study.optimize(objective, **final_optimize_kwargs)
+
+    # Validate optimization results
+    if not study.trials or all(t.state != optuna.trial.TrialState.COMPLETE for t in study.trials):
+        raise RuntimeError("Optuna optimization failed to produce any complete trials.")
+
+    # Extract best parameters and score
+    # Since each learner is tuned independently, use all parameters from the study
+    best_params = dict(study.best_trial.params)
+    best_score = study.best_value
+
+    # Fit the best estimator on the full dataset once
+    best_estimator = clone(learner).set_params(**best_params)
+
+    return DMLOptunaResult(
+        learner_name=learner_name,
+        params_name=params_name,
+        best_estimator=best_estimator,
+        best_params=best_params,
+        best_score=best_score,
+        scoring_method=scoring_method,
+        study=study,
+        tuned=True,
+    )
diff --git a/doubleml/utils/tests/test_quadratic_inequality.py b/doubleml/utils/tests/test_quadratic_inequality.py
index 68faff0f..d59219af 100644
--- a/doubleml/utils/tests/test_quadratic_inequality.py
+++ b/doubleml/utils/tests/test_quadratic_inequality.py
@@ -27,6 +27,7 @@
     ],
 )
 def test_solve_quadratic_inequation(a, b, c, expected):
+    np.random.seed(42)
     result = _solve_quadratic_inequality(a, b, c)
 
     assert len(result) == len(expected), f"Expected {len(expected)} intervals but got {len(result)}"
diff --git a/pyproject.toml b/pyproject.toml
index 6aa06ab5..eb80c712 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -22,6 +22,7 @@ dependencies = [
     "scipy>=1.7.0",
     "scikit-learn>=1.6.0",
     "statsmodels>=0.14.0",
+    "optuna>=4.6.0",
     "matplotlib>=3.9.0",
     "seaborn>=0.13",
     "plotly>=5.0.0"