diff --git a/HISTORY.rst b/HISTORY.rst index 044f96ef1..b43dd91fd 100644 --- a/HISTORY.rst +++ b/HISTORY.rst @@ -4,6 +4,7 @@ History 1.x.x (2025-xx-xx) ------------------ +* Remove dependency of internal classes on sklearn's check_is_fitted * Add an example of risk control with LLM as a judge * Add comparison with naive threshold in risk control quick start example diff --git a/mapie/calibration.py b/mapie/calibration.py index ded5106e7..02586486a 100644 --- a/mapie/calibration.py +++ b/mapie/calibration.py @@ -4,14 +4,14 @@ from typing import Dict, Optional, Tuple, Union, cast import numpy as np +from numpy.typing import ArrayLike, NDArray from sklearn.base import BaseEstimator, ClassifierMixin, RegressorMixin, clone from sklearn.calibration import _SigmoidCalibration from sklearn.isotonic import IsotonicRegression from sklearn.utils import check_random_state from sklearn.utils.multiclass import type_of_target -from sklearn.utils.validation import _check_y, _num_samples, check_is_fitted, indexable +from sklearn.utils.validation import _check_y, _num_samples, indexable -from numpy.typing import ArrayLike, NDArray from .utils import ( _check_estimator_classification, _check_estimator_fit_predict, @@ -19,6 +19,7 @@ _check_null_weight, _fit_estimator, _get_calib_set, + check_is_fitted, ) @@ -123,6 +124,12 @@ def __init__( self.estimator = estimator self.calibrator = calibrator self.cv = cv + self._is_fitted = False + + @property + def is_fitted(self): + """Returns True if the estimator is fitted""" + return self._is_fitted def _check_cv( self, @@ -480,6 +487,9 @@ def fit( self.calibrators = self._fit_calibrators( X_calib, y_calib, sw_calib, calibrator ) + + self._is_fitted = True + return self def predict_proba( @@ -501,7 +511,7 @@ def predict_proba( The calibrated score for each max score and zeros at every other position in that line. """ - check_is_fitted(self, self.fit_attributes) + check_is_fitted(self) self.uncalib_pred = self.single_estimator_.predict_proba(X=X) max_prob, y_pred = self._get_labels(X) @@ -537,5 +547,5 @@ def predict( NDArray of shape (n_samples,) The class from the scores. """ - check_is_fitted(self, self.fit_attributes) + check_is_fitted(self) return self.single_estimator_.predict(X) diff --git a/mapie/classification.py b/mapie/classification.py index b29127a23..3d5075c4d 100644 --- a/mapie/classification.py +++ b/mapie/classification.py @@ -4,49 +4,44 @@ from typing import Any, Iterable, Optional, Tuple, Union, cast import numpy as np +from numpy.typing import ArrayLike, NDArray from sklearn import clone from sklearn.base import BaseEstimator, ClassifierMixin from sklearn.linear_model import LogisticRegression -from sklearn.model_selection import ( - BaseCrossValidator, - BaseShuffleSplit, -) +from sklearn.model_selection import BaseCrossValidator, BaseShuffleSplit from sklearn.preprocessing import LabelEncoder from sklearn.utils import check_random_state -from sklearn.utils.validation import _check_y, check_is_fitted, indexable - -from numpy.typing import ArrayLike, NDArray +from sklearn.utils.validation import _check_y, indexable from mapie.conformity_scores import BaseClassificationScore from mapie.conformity_scores.sets.raps import RAPSConformityScore from mapie.conformity_scores.utils import ( + check_and_select_conformity_score, check_classification_conformity_score, check_target, - check_and_select_conformity_score, ) from mapie.estimator.classifier import EnsembleClassifier from mapie.utils import ( + _cast_point_predictions_to_ndarray, + _cast_predictions_to_ndarray_tuple, _check_alpha, _check_alpha_and_n_samples, _check_cv, + _check_cv_not_string, _check_estimator_classification, _check_n_features_in, _check_n_jobs, _check_null_weight, _check_predict_params, _check_verbose, - check_proba_normalized, -) -from mapie.utils import ( - _transform_confidence_level_to_alpha_list, + _prepare_fit_params_and_sample_weight, + _prepare_params, _raise_error_if_fit_called_in_prefit_mode, _raise_error_if_method_already_called, - _prepare_params, _raise_error_if_previous_method_not_called, - _cast_predictions_to_ndarray_tuple, - _cast_point_predictions_to_ndarray, - _check_cv_not_string, - _prepare_fit_params_and_sample_weight, + _transform_confidence_level_to_alpha_list, + check_is_fitted, + check_proba_normalized, ) @@ -734,6 +729,12 @@ def __init__( self.conformity_score = conformity_score self.random_state = random_state self.verbose = verbose + self._is_fitted = False + + @property + def is_fitted(self): + """Returns True if the estimator is fitted""" + return self._is_fitted def _check_parameters(self) -> None: """ @@ -967,6 +968,7 @@ def fit( groups=groups, predict_params=predict_params, ) + self._is_fitted = True return self def predict( @@ -1049,7 +1051,7 @@ def predict( if hasattr(self, "_predict_params"): _check_predict_params(self._predict_params, predict_params, self.cv) - check_is_fitted(self, self.fit_attributes) + check_is_fitted(self) alpha = cast(Optional[NDArray], _check_alpha(alpha)) # Estimate predictions diff --git a/mapie/conformity_scores/bounds/residuals.py b/mapie/conformity_scores/bounds/residuals.py index 0b5d9ba27..20930bdb2 100644 --- a/mapie/conformity_scores/bounds/residuals.py +++ b/mapie/conformity_scores/bounds/residuals.py @@ -2,14 +2,15 @@ from typing import Optional, Tuple, Union, cast import numpy as np +from numpy.typing import ArrayLike, NDArray from sklearn.base import RegressorMixin, clone from sklearn.linear_model import LinearRegression from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline -from sklearn.utils.validation import check_is_fitted, check_random_state, indexable +from sklearn.utils.validation import check_random_state, indexable -from numpy.typing import ArrayLike, NDArray from mapie.conformity_scores import BaseRegressionScore +from mapie.utils import check_sklearn_user_model_is_fitted class ResidualNormalisedScore(BaseRegressionScore): @@ -112,9 +113,9 @@ def _check_estimator( ) if self.prefit: if isinstance(estimator, Pipeline): - check_is_fitted(estimator[-1]) + check_sklearn_user_model_is_fitted(estimator[-1]) else: - check_is_fitted(estimator) + check_sklearn_user_model_is_fitted(estimator) return estimator def _check_parameters( diff --git a/mapie/estimator/classifier.py b/mapie/estimator/classifier.py index 20abfdd69..81d7e4303 100644 --- a/mapie/estimator/classifier.py +++ b/mapie/estimator/classifier.py @@ -4,13 +4,18 @@ import numpy as np from joblib import Parallel, delayed +from numpy.typing import ArrayLike, NDArray from sklearn.base import ClassifierMixin, clone from sklearn.model_selection import BaseCrossValidator, BaseShuffleSplit from sklearn.utils import _safe_indexing -from sklearn.utils.validation import _num_samples, check_is_fitted +from sklearn.utils.validation import _num_samples -from numpy.typing import ArrayLike, NDArray -from mapie.utils import _check_no_agg_cv, _fit_estimator, _fix_number_of_classes +from mapie.utils import ( + _check_no_agg_cv, + _fit_estimator, + _fix_number_of_classes, + check_is_fitted, +) class EnsembleClassifier: @@ -121,6 +126,12 @@ def __init__( self.n_jobs = n_jobs self.test_size = test_size self.verbose = verbose + self._is_fitted = False + + @property + def is_fitted(self): + """Returns True if the estimator is fitted""" + return self._is_fitted @staticmethod def _fit_oof_estimator( @@ -344,6 +355,8 @@ def fit( self.estimators_ = estimators_ self.k_ = k_ + self._is_fitted = True + return self def predict_proba_calib( @@ -381,7 +394,7 @@ def predict_proba_calib( NDArray of shape (n_samples_test, 1) The predictions. """ - check_is_fitted(self, self.fit_attributes) + check_is_fitted(self) if self.cv == "prefit": y_pred_proba = self.single_estimator_.predict_proba(X, **predict_params) @@ -445,7 +458,7 @@ def predict_agg_proba( Predictions of shape (n_samples, n_classes) """ - check_is_fitted(self, self.fit_attributes) + check_is_fitted(self) y_pred_proba_k = np.asarray( Parallel(n_jobs=self.n_jobs, verbose=self.verbose)( diff --git a/mapie/estimator/regressor.py b/mapie/estimator/regressor.py index f872e4ede..3fba10bf2 100644 --- a/mapie/estimator/regressor.py +++ b/mapie/estimator/regressor.py @@ -4,17 +4,18 @@ import numpy as np from joblib import Parallel, delayed +from numpy.typing import ArrayLike, NDArray from sklearn.base import RegressorMixin, clone from sklearn.model_selection import BaseCrossValidator from sklearn.utils import _safe_indexing -from sklearn.utils.validation import _num_samples, check_is_fitted +from sklearn.utils.validation import _num_samples -from numpy.typing import ArrayLike, NDArray from mapie.aggregation_functions import aggregate_all, phi2D from mapie.utils import ( _check_nan_in_aposteriori_prediction, _check_no_agg_cv, _fit_estimator, + check_is_fitted, ) @@ -169,6 +170,12 @@ def __init__( self.n_jobs = n_jobs self.test_size = test_size self.verbose = verbose + self._is_fitted = False + + @property + def is_fitted(self): + """Returns True if the estimator is fitted""" + return self._is_fitted @staticmethod def _fit_oof_estimator( @@ -353,7 +360,7 @@ def predict_calib( NDArray of shape (n_samples_test, 1) The predictions. """ - check_is_fitted(self, self.fit_attributes) + check_is_fitted(self) if self.cv == "prefit": y_pred = self.single_estimator_.predict(X) @@ -400,7 +407,7 @@ def fit( """ Note to developer: this fit method has been broken down into fit_single_estimator and fit_multi_estimators, - but we kept it so that EnsembleRegressor passes sklearn.check_is_fitted. + but we kept it for consistency with the public fit() API. Prefer using fit_single_estimator and fit_multi_estimators. Fit the base estimator under the ``single_estimator_`` attribute. @@ -515,6 +522,9 @@ def fit_single_estimator( ) self.single_estimator_ = single_estimator_ + + self._is_fitted = True + return self def predict( @@ -560,7 +570,7 @@ def predict( - The multiple predictions for the lower bound of the intervals. - The multiple predictions for the upper bound of the intervals. """ - check_is_fitted(self, self.fit_attributes) + check_is_fitted(self) y_pred = self.single_estimator_.predict(X, **predict_params) if not return_multi_pred and not ensemble: diff --git a/mapie/regression/quantile_regression.py b/mapie/regression/quantile_regression.py index 1e2170810..e80e30af1 100644 --- a/mapie/regression/quantile_regression.py +++ b/mapie/regression/quantile_regression.py @@ -1,36 +1,36 @@ from __future__ import annotations -from typing import Iterable, List, Optional, Tuple, Union, cast, Any +from typing import Any, Iterable, List, Optional, Tuple, Union, cast import numpy as np +from numpy.typing import ArrayLike, NDArray from sklearn.base import RegressorMixin, clone from sklearn.linear_model import QuantileRegressor from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.utils import check_random_state -from sklearn.utils.validation import _check_y, _num_samples, check_is_fitted, indexable +from sklearn.utils.validation import _check_y, _num_samples, indexable -from numpy.typing import ArrayLike, NDArray from mapie.utils import ( + _cast_predictions_to_ndarray_tuple, _check_alpha_and_n_samples, _check_defined_variables_predict_cqr, _check_estimator_fit_predict, _check_lower_upper_bounds, _check_null_weight, _fit_estimator, -) - -from .regression import _MapieRegressor -from mapie.utils import ( - _cast_predictions_to_ndarray_tuple, - _prepare_params, _prepare_fit_params_and_sample_weight, - _raise_error_if_previous_method_not_called, - _raise_error_if_method_already_called, + _prepare_params, _raise_error_if_fit_called_in_prefit_mode, + _raise_error_if_method_already_called, + _raise_error_if_previous_method_not_called, _transform_confidence_level_to_alpha, + check_is_fitted, + check_sklearn_user_model_is_fitted, ) +from .regression import _MapieRegressor + class ConformalizedQuantileRegressor: """ @@ -428,6 +428,13 @@ def __init__( ) self.cv = cv self.alpha = alpha + self._is_fitted = False + self._is_fitted = True if self.cv == "prefit" else False + + @property + def is_fitted(self): + """Returns True if the estimator is fitted""" + return self._is_fitted def _check_alpha( self, @@ -668,7 +675,7 @@ def _check_prefit_params( if len(estimator) == 3: for est in estimator: _check_estimator_fit_predict(est) - check_is_fitted(est) + check_sklearn_user_model_is_fitted(est) else: raise ValueError( "You need to have provided 3 different estimators, they" @@ -876,6 +883,8 @@ def _fit_estimators( ) ) + self._is_fitted = True + self.single_estimator_ = self.estimators_[2] def conformalize( @@ -961,7 +970,7 @@ def predict( - [:, 0, :]: Lower bound of the prediction interval. - [:, 1, :]: Upper bound of the prediction interval. """ - check_is_fitted(self, self.fit_attributes) + check_is_fitted(self) _check_defined_variables_predict_cqr(ensemble, alpha) alpha = self.alpha if symmetry else self.alpha / 2 _check_alpha_and_n_samples(alpha, self.n_calib_samples) diff --git a/mapie/regression/regression.py b/mapie/regression/regression.py index 60b3d4464..b4091cc3a 100644 --- a/mapie/regression/regression.py +++ b/mapie/regression/regression.py @@ -3,44 +3,44 @@ from typing import Any, Iterable, Optional, Tuple, Union, cast import numpy as np +from numpy.typing import ArrayLike, NDArray from sklearn.base import BaseEstimator, RegressorMixin, clone from sklearn.linear_model import LinearRegression from sklearn.model_selection import BaseCrossValidator from sklearn.pipeline import Pipeline from sklearn.utils import check_random_state -from sklearn.utils.validation import _check_y, check_is_fitted, indexable +from sklearn.utils.validation import _check_y, indexable -from numpy.typing import ArrayLike, NDArray from mapie.conformity_scores import BaseRegressionScore, ResidualNormalisedScore from mapie.conformity_scores.utils import ( - check_regression_conformity_score, check_and_select_conformity_score, + check_regression_conformity_score, ) from mapie.estimator.regressor import EnsembleRegressor from mapie.subsample import Subsample from mapie.utils import ( + _cast_point_predictions_to_ndarray, + _cast_predictions_to_ndarray_tuple, _check_alpha, _check_alpha_and_n_samples, _check_cv, + _check_cv_not_string, _check_estimator_fit_predict, + _check_if_param_in_allowed_values, _check_n_features_in, _check_n_jobs, _check_null_weight, + _check_predict_params, _check_verbose, _get_effective_calibration_samples, - _check_predict_params, -) -from mapie.utils import ( - _transform_confidence_level_to_alpha_list, - _check_if_param_in_allowed_values, - _check_cv_not_string, - _cast_point_predictions_to_ndarray, - _cast_predictions_to_ndarray_tuple, - _prepare_params, _prepare_fit_params_and_sample_weight, - _raise_error_if_previous_method_not_called, - _raise_error_if_method_already_called, + _prepare_params, _raise_error_if_fit_called_in_prefit_mode, + _raise_error_if_method_already_called, + _raise_error_if_previous_method_not_called, + _transform_confidence_level_to_alpha_list, + check_is_fitted, + check_sklearn_user_model_is_fitted, ) @@ -1127,6 +1127,12 @@ def __init__( self.verbose = verbose self.conformity_score = conformity_score self.random_state = random_state + self._is_fitted = False + + @property + def is_fitted(self): + """Returns True if the estimator is fitted""" + return self._is_fitted def _check_parameters(self) -> None: """ @@ -1238,9 +1244,9 @@ def _check_estimator( _check_estimator_fit_predict(estimator) if self.cv == "prefit": if isinstance(estimator, Pipeline): - check_is_fitted(estimator[-1]) + check_sklearn_user_model_is_fitted(estimator[-1]) else: - check_is_fitted(estimator) + check_sklearn_user_model_is_fitted(estimator) return estimator def _check_ensemble( @@ -1400,6 +1406,8 @@ def fit( self.fit_estimator(X, y, sample_weight, groups) self.conformalize(X, y, sample_weight, groups, **kwargs) + self._is_fitted = True + return self def init_fit( @@ -1546,7 +1554,7 @@ def predict( # Checks if hasattr(self, "_predict_params"): _check_predict_params(self._predict_params, predict_params, self.cv) - check_is_fitted(self, self.fit_attributes) + check_is_fitted(self) self._check_ensemble(ensemble) alpha = cast(Optional[NDArray], _check_alpha(alpha)) diff --git a/mapie/regression/time_series_regression.py b/mapie/regression/time_series_regression.py index 1e7a2ad76..c5ac26b14 100644 --- a/mapie/regression/time_series_regression.py +++ b/mapie/regression/time_series_regression.py @@ -6,7 +6,6 @@ from numpy.typing import ArrayLike, NDArray from sklearn.base import RegressorMixin from sklearn.model_selection import BaseCrossValidator -from sklearn.utils.validation import check_is_fitted from mapie.conformity_scores import BaseRegressionScore from mapie.regression.regression import _MapieRegressor @@ -14,6 +13,7 @@ _check_alpha, _check_gamma, _transform_confidence_level_to_alpha_list, + check_is_fitted, ) @@ -161,7 +161,7 @@ def _update_conformity_scores_with_ensemble( If the length of ``y`` is greater than the length of the training set. """ - check_is_fitted(self, self.fit_attributes) + check_is_fitted(self) X, y = cast(NDArray, X), cast(NDArray, y) m, n = len(X), len(self.conformity_scores_) if m > n: @@ -282,7 +282,7 @@ def adapt_conformal_inference( f"not with '{self.method}'." ) - check_is_fitted(self, self.fit_attributes) + check_is_fitted(self) _check_gamma(gamma) X, y = cast(NDArray, X), cast(NDArray, y) diff --git a/mapie/risk_control/multi_label_classification.py b/mapie/risk_control/multi_label_classification.py index 84e8ba534..059fb861f 100644 --- a/mapie/risk_control/multi_label_classification.py +++ b/mapie/risk_control/multi_label_classification.py @@ -12,9 +12,15 @@ from sklearn.multioutput import MultiOutputClassifier from sklearn.pipeline import Pipeline from sklearn.utils import check_random_state -from sklearn.utils.validation import _check_y, _num_samples, check_is_fitted, indexable - -from mapie.utils import _check_alpha, _check_n_jobs, _check_verbose +from sklearn.utils.validation import _check_y, _num_samples, indexable + +from mapie.utils import ( + _check_alpha, + _check_n_jobs, + _check_verbose, + check_is_fitted, + check_sklearn_user_model_is_fitted, +) from .methods import ( find_lambda_star, @@ -182,6 +188,12 @@ def __init__( self.n_jobs = n_jobs self.random_state = random_state self.verbose = verbose + self._is_fitted = False + + @property + def is_fitted(self): + """Returns True if the estimator is fitted""" + return self._is_fitted def _check_parameters(self) -> None: """ @@ -375,7 +387,7 @@ def _check_estimator( "Please provide a classifier with fit," "predict, and predict_proba methods." ) - check_is_fitted(est) + check_sklearn_user_model_is_fitted(est) return estimator, X, y def _check_partial_fit_first_call(self) -> bool: @@ -526,6 +538,8 @@ def partial_fit( ) self.risks = np.concatenate([self.risks, partial_risk], axis=0) + self._is_fitted = True + return self def fit( @@ -608,7 +622,7 @@ def predict( self._check_delta(delta) self._check_bound(bound) alpha = cast(Optional[NDArray], _check_alpha(alpha)) - check_is_fitted(self, self.fit_attributes) + check_is_fitted(self) # Estimate prediction sets y_pred = self.single_estimator_.predict(X) diff --git a/mapie/tests/risk_control/test_precision_recall_control.py b/mapie/tests/risk_control/test_precision_recall_control.py index 61c31cb2f..08307dfe2 100644 --- a/mapie/tests/risk_control/test_precision_recall_control.py +++ b/mapie/tests/risk_control/test_precision_recall_control.py @@ -11,10 +11,10 @@ from sklearn.multioutput import MultiOutputClassifier from sklearn.pipeline import Pipeline, make_pipeline from sklearn.preprocessing import OneHotEncoder -from sklearn.utils.validation import check_is_fitted from typing_extensions import TypedDict from mapie.risk_control import PrecisionRecallController +from mapie.utils import check_is_fitted Params = TypedDict( "Params", @@ -163,9 +163,6 @@ def predict_proba(self, X: NDArray, *args: Any) -> NDArray: def predict(self, X: NDArray, *args: Any) -> NDArray: return self.predict_proba(X) >= 0.3 - def __sklearn_is_fitted__(self): - return True - X_toy = np.arange(9).reshape(-1, 1) y_toy = np.stack( @@ -208,7 +205,7 @@ def test_valid_method() -> None: """Test that valid methods raise no errors.""" mapie_clf = PrecisionRecallController(random_state=random_state) mapie_clf.fit(X_toy, y_toy) - check_is_fitted(mapie_clf, mapie_clf.fit_attributes) + check_is_fitted(mapie_clf) @pytest.mark.parametrize("strategy", [*STRATEGIES]) @@ -219,7 +216,7 @@ def test_valid_metric_method(strategy: str) -> None: random_state=random_state, metric_control=args["metric_control"] ) mapie_clf.fit(X_toy, y_toy) - check_is_fitted(mapie_clf, mapie_clf.fit_attributes) + check_is_fitted(mapie_clf) @pytest.mark.parametrize("bound", BOUNDS) @@ -228,7 +225,7 @@ def test_valid_bound(bound: str) -> None: mapie_clf = PrecisionRecallController(random_state=random_state, method="rcps") mapie_clf.fit(X_toy, y_toy) mapie_clf.predict(X_toy, bound=bound, delta=0.1) - check_is_fitted(mapie_clf, mapie_clf.fit_attributes) + check_is_fitted(mapie_clf) @pytest.mark.parametrize("strategy", [*STRATEGIES]) diff --git a/mapie/tests/test_classification.py b/mapie/tests/test_classification.py index 5a29b064d..61555d817 100644 --- a/mapie/tests/test_classification.py +++ b/mapie/tests/test_classification.py @@ -23,7 +23,6 @@ ) from sklearn.pipeline import Pipeline, make_pipeline from sklearn.preprocessing import OneHotEncoder -from sklearn.utils.validation import check_is_fitted from typing_extensions import TypedDict from mapie.classification import _MapieClassifier @@ -36,7 +35,7 @@ TopKConformityScore, ) from mapie.metrics.classification import classification_coverage_score -from mapie.utils import check_proba_normalized +from mapie.utils import check_is_fitted, check_proba_normalized random_state = 42 @@ -856,7 +855,7 @@ def test_valid_conformity_score(conformity_score: BaseClassificationScore) -> No conformity_score=conformity_score, cv="prefit", random_state=random_state ) mapie_clf.fit(X, y) - check_is_fitted(mapie_clf, mapie_clf.fit_attributes) + check_is_fitted(mapie_clf) @pytest.mark.parametrize( diff --git a/mapie/tests/test_common.py b/mapie/tests/test_common.py index 8f70a39b6..f24cc6f72 100644 --- a/mapie/tests/test_common.py +++ b/mapie/tests/test_common.py @@ -4,29 +4,28 @@ import numpy as np import pytest from sklearn.base import BaseEstimator -from sklearn.datasets import make_regression, make_classification -from sklearn.dummy import DummyRegressor, DummyClassifier -from sklearn.exceptions import NotFittedError +from sklearn.datasets import make_classification, make_regression +from sklearn.dummy import DummyClassifier, DummyRegressor from sklearn.linear_model import LinearRegression, LogisticRegression, QuantileRegressor from sklearn.model_selection import KFold, train_test_split from sklearn.pipeline import make_pipeline -from sklearn.utils.validation import check_is_fitted from mapie.classification import ( - _MapieClassifier, - SplitConformalClassifier, CrossConformalClassifier, + SplitConformalClassifier, + _MapieClassifier, +) +from mapie.regression.quantile_regression import ( + ConformalizedQuantileRegressor, + _MapieQuantileRegressor, ) from mapie.regression.regression import ( - _MapieRegressor, - SplitConformalRegressor, CrossConformalRegressor, JackknifeAfterBootstrapRegressor, + SplitConformalRegressor, + _MapieRegressor, ) -from mapie.regression.quantile_regression import ( - _MapieQuantileRegressor, - ConformalizedQuantileRegressor, -) +from mapie.utils import NotFittedError, check_sklearn_user_model_is_fitted RANDOM_STATE = 1 @@ -315,12 +314,16 @@ def test_invalid_estimator(MapieEstimator: BaseEstimator, estimator: Any) -> Non mapie_estimator.fit(X_toy, y_toy) +@pytest.mark.filterwarnings("ignore:Estimator does not appear fitted.*:UserWarning") @pytest.mark.parametrize("pack", MapieTestEstimators()) def test_invalid_prefit_estimator(pack: Tuple[BaseEstimator, BaseEstimator]) -> None: """Test that non-fitted estimator with prefit cv raise errors.""" MapieEstimator, estimator = pack mapie_estimator = MapieEstimator(estimator=estimator, cv="prefit") - with pytest.raises(NotFittedError): + with pytest.raises( + (AttributeError, ValueError), + match=r".*(does not contain 'classes_'|is not fitted).*", + ): mapie_estimator.fit(X_toy, y_toy) @@ -331,7 +334,7 @@ def test_valid_prefit_estimator(pack: Tuple[BaseEstimator, BaseEstimator]) -> No estimator.fit(X_toy, y_toy) mapie_estimator = MapieEstimator(estimator=estimator, cv="prefit") mapie_estimator.fit(X_toy, y_toy) - check_is_fitted(mapie_estimator, mapie_estimator.fit_attributes) + check_sklearn_user_model_is_fitted(mapie_estimator) assert mapie_estimator.n_features_in_ == 1 diff --git a/mapie/tests/test_quantile_regression.py b/mapie/tests/test_quantile_regression.py index d2ea6f534..d50a1ea9e 100644 --- a/mapie/tests/test_quantile_regression.py +++ b/mapie/tests/test_quantile_regression.py @@ -6,6 +6,7 @@ import numpy as np import pandas as pd import pytest +from numpy.typing import NDArray from sklearn.base import BaseEstimator, RegressorMixin, clone from sklearn.compose import ColumnTransformer from sklearn.datasets import make_regression @@ -15,14 +16,11 @@ from sklearn.model_selection import KFold, LeaveOneOut, train_test_split from sklearn.pipeline import Pipeline, make_pipeline from sklearn.preprocessing import OneHotEncoder -from sklearn.utils.validation import check_is_fitted from typing_extensions import TypedDict -from numpy.typing import NDArray -from mapie.metrics.regression import ( - regression_coverage_score, -) +from mapie.metrics.regression import regression_coverage_score from mapie.regression.quantile_regression import _MapieQuantileRegressor +from mapie.utils import check_is_fitted X_toy = np.array( [0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5] @@ -183,7 +181,7 @@ def test_valid_method(strategy: str, estimator: RegressorMixin) -> None: """Test that valid strategies and estimators raise no error""" mapie_reg = _MapieQuantileRegressor(estimator=estimator, **STRATEGIES[strategy]) mapie_reg.fit(X_train_toy, y_train_toy, X_calib=X_calib_toy, y_calib=y_calib_toy) - check_is_fitted(mapie_reg, mapie_reg.fit_attributes) + check_is_fitted(mapie_reg) assert mapie_reg.__dict__["method"] == "quantile" @@ -463,6 +461,7 @@ def test_prefit_no_fit_predict() -> None: mapie_reg.fit(X_calib, y_calib) +@pytest.mark.filterwarnings("ignore:Estimator does not appear fitted.*:UserWarning") def test_non_trained_estimator() -> None: """ Check that the estimators are all already trained when used in prefit. diff --git a/mapie/tests/test_regression.py b/mapie/tests/test_regression.py index 708800bdc..aba143c10 100644 --- a/mapie/tests/test_regression.py +++ b/mapie/tests/test_regression.py @@ -6,8 +6,8 @@ import numpy as np import pandas as pd import pytest +from numpy.typing import NDArray from scipy.stats import ttest_1samp - from sklearn.compose import ColumnTransformer from sklearn.datasets import make_regression from sklearn.dummy import DummyRegressor @@ -17,19 +17,17 @@ from sklearn.model_selection import ( GroupKFold, KFold, + LeaveOneGroupOut, LeaveOneOut, + LeavePGroupsOut, PredefinedSplit, ShuffleSplit, train_test_split, - LeaveOneGroupOut, - LeavePGroupsOut, ) from sklearn.pipeline import Pipeline, make_pipeline from sklearn.preprocessing import OneHotEncoder -from sklearn.utils.validation import check_is_fitted from typing_extensions import TypedDict -from numpy.typing import NDArray from mapie.aggregation_functions import aggregate_all from mapie.conformity_scores import ( AbsoluteConformityScore, @@ -38,14 +36,13 @@ ResidualNormalisedScore, ) from mapie.estimator.regressor import EnsembleRegressor -from mapie.metrics.regression import ( - regression_coverage_score, -) +from mapie.metrics.regression import regression_coverage_score from mapie.regression.regression import ( - _MapieRegressor, JackknifeAfterBootstrapRegressor, + _MapieRegressor, ) from mapie.subsample import Subsample +from mapie.utils import check_is_fitted class TestCheckAndConvertResamplingToCv: @@ -254,7 +251,7 @@ def test_valid_method(method: str) -> None: """Test that valid methods raise no errors.""" mapie_reg = _MapieRegressor(method=method) mapie_reg.fit(X_toy, y_toy) - check_is_fitted(mapie_reg, mapie_reg.fit_attributes) + check_is_fitted(mapie_reg) @pytest.mark.parametrize("agg_function", ["dummy", 0, 1, 2.5, [1, 2]]) diff --git a/mapie/tests/test_utils.py b/mapie/tests/test_utils.py index cb33da518..109fb31fc 100644 --- a/mapie/tests/test_utils.py +++ b/mapie/tests/test_utils.py @@ -8,15 +8,17 @@ import numpy as np import pytest from numpy.random import RandomState +from numpy.typing import ArrayLike, NDArray from sklearn.datasets import make_regression -from sklearn.linear_model import LinearRegression +from sklearn.linear_model import LinearRegression, LogisticRegression from sklearn.model_selection import BaseCrossValidator, KFold, LeaveOneOut, ShuffleSplit -from sklearn.utils.validation import check_is_fitted - -from numpy.typing import ArrayLike, NDArray +from sklearn.pipeline import Pipeline from mapie.regression.quantile_regression import _MapieQuantileRegressor from mapie.utils import ( + NotFittedError, + _cast_point_predictions_to_ndarray, + _cast_predictions_to_ndarray_tuple, _check_alpha, _check_alpha_and_n_samples, _check_array_inf, @@ -24,7 +26,9 @@ _check_arrays_length, _check_binary_zero_one, _check_cv, + _check_cv_not_string, _check_gamma, + _check_if_param_in_allowed_values, _check_lower_upper_bounds, _check_n_features_in, _check_n_jobs, @@ -37,18 +41,16 @@ _compute_quantiles, _fit_estimator, _get_binning_groups, - train_conformalize_test_split, - _transform_confidence_level_to_alpha, - _transform_confidence_level_to_alpha_list, - _check_if_param_in_allowed_values, - _check_cv_not_string, - _cast_point_predictions_to_ndarray, - _cast_predictions_to_ndarray_tuple, - _prepare_params, _prepare_fit_params_and_sample_weight, - _raise_error_if_previous_method_not_called, - _raise_error_if_method_already_called, + _prepare_params, _raise_error_if_fit_called_in_prefit_mode, + _raise_error_if_method_already_called, + _raise_error_if_previous_method_not_called, + _transform_confidence_level_to_alpha, + _transform_confidence_level_to_alpha_list, + check_is_fitted, + check_sklearn_user_model_is_fitted, + train_conformalize_test_split, ) @@ -449,12 +451,15 @@ def test_check_null_weight_with_zeros() -> None: np.testing.assert_almost_equal(np.array(y_out), np.array([7, 9, 11, 13, 15])) +@pytest.mark.filterwarnings( + "ignore:Estimator exposes fitted-like attributes.*:UserWarning" +) @pytest.mark.parametrize("estimator", [LinearRegression(), DumbEstimator()]) @pytest.mark.parametrize("sample_weight", [None, np.ones_like(y_toy)]) def test_fit_estimator(estimator: Any, sample_weight: Optional[NDArray]) -> None: """Test that the returned estimator is always fitted.""" estimator = _fit_estimator(estimator, X_toy, y_toy, sample_weight) - check_is_fitted(estimator) + check_sklearn_user_model_is_fitted(estimator) def test_fit_estimator_sample_weight() -> None: @@ -879,3 +884,70 @@ def test_invalid_n_samples_float(n_samples: float) -> None: ), ): _check_n_samples(X=X, n_samples=n_samples, indices=indices) + + +class DummyModel: + pass + + +def test_check_is_fitted_raises_before_fit(): + model = DummyModel() + with pytest.raises(NotFittedError) as excinfo: + check_is_fitted(model) + assert "DummyModel is not fitted yet" in str(excinfo.value) + + +def test_check_is_fitted_passes_after_fit(): + model = DummyModel() + model.is_fitted = True + check_is_fitted(model) + + +def test_check_user_model_is_fitted_unfitted(): + model = DummyModel() + with pytest.warns(UserWarning, match=r".*Estimator does not appear fitted.*"): + check_sklearn_user_model_is_fitted(model) + + +def test_check_user_model_is_fitted_raises_for_unfitted_model(): + model = LinearRegression() + with pytest.warns(UserWarning, match=r".*Estimator does not appear fitted.*"): + check_sklearn_user_model_is_fitted(model) + + +@pytest.mark.parametrize( + "Model", + [ + LinearRegression(), + LogisticRegression(), + Pipeline([("LinearRegression", LinearRegression())]), + ], +) +def test_check_user_model_is_fitted_sklearn_models(Model): + """Check that sklearn classifiers and regressors pass.""" + X = np.random.randn(20, 4) + y = ( + (np.random.randn(20) > 0).astype(int) + if isinstance(Model, LogisticRegression) + else np.random.randn(20) + ) + model = Model.fit(X, y) + assert check_sklearn_user_model_is_fitted(model) is True + + +class BrokenPredictModel: + """Model with n_features_in_ but predict always fails""" + + n_features_in_ = 3 + + def predict(self, X): + raise RuntimeError("Predict failure") + + +def test_check_user_model_is_fitted_predict_fails(): + model = BrokenPredictModel() + with pytest.raises( + UserWarning, + match=r".*has `n_features_in_` but failed a minimal prediction test.*", + ): + check_sklearn_user_model_is_fitted(model) diff --git a/mapie/utils.py b/mapie/utils.py index 860bc5066..5182a06c8 100644 --- a/mapie/utils.py +++ b/mapie/utils.py @@ -1,9 +1,14 @@ +import copy import logging import warnings +from collections.abc import Iterable as IterableType +from decimal import Decimal from inspect import signature +from math import isclose from typing import Any, Iterable, Optional, Tuple, Union, cast import numpy as np +from numpy.typing import ArrayLike, NDArray from sklearn.base import ClassifierMixin, RegressorMixin from sklearn.linear_model import LogisticRegression from sklearn.model_selection import ( @@ -17,18 +22,7 @@ from sklearn.pipeline import Pipeline from sklearn.utils import _safe_indexing from sklearn.utils.multiclass import type_of_target -from sklearn.utils.validation import ( - _check_sample_weight, - _num_features, - check_is_fitted, - column_or_1d, -) - -from numpy.typing import ArrayLike, NDArray -import copy -from collections.abc import Iterable as IterableType -from decimal import Decimal -from math import isclose +from sklearn.utils.validation import _check_sample_weight, _num_features, column_or_1d # This function is the only public utility of MAPIE as of v1 release @@ -287,12 +281,13 @@ def _fit_estimator( -------- >>> import numpy as np >>> from sklearn.linear_model import LinearRegression - >>> from sklearn.utils.validation import check_is_fitted + >>> from mapie.utils import check_sklearn_user_model_is_fitted >>> X = np.array([[0], [1], [2], [3], [4], [5]]) >>> y = np.array([5, 7, 9, 11, 13, 15]) >>> estimator = LinearRegression() >>> estimator = _fit_estimator(estimator, X, y) - >>> check_is_fitted(estimator) + >>> check_sklearn_user_model_is_fitted(estimator) + True """ fit_parameters = signature(estimator.fit).parameters supports_sw = "sample_weight" in fit_parameters @@ -1012,13 +1007,8 @@ def _check_estimator_classification( "predict, and predict_proba methods." ) if cv == "prefit": - check_is_fitted(est) - if not hasattr(est, "classes_"): - raise AttributeError( - "Invalid classifier. " - "Fitted classifier does not contain " - "'classes_' attribute." - ) + check_sklearn_user_model_is_fitted(est) + return estimator @@ -1637,3 +1627,65 @@ def _raise_error_if_fit_called_in_prefit_mode( "The fit method must be skipped when the prefit parameter is set to True. " "Use the conformalize method directly after instanciation." ) + + +class NotFittedError(ValueError): + pass + + +def check_is_fitted(obj): + """Check that .is_fitted property is True""" + if not getattr(obj, "is_fitted", False): + raise NotFittedError(f"{obj.__class__.__name__} is not fitted yet. ") + + +FIT_INDICATORS = [ + "n_features_in_", + "classes_", + "coef_", + "feature_names_in_", + "tree_", + "estimators_", + "fitted_", +] + + +def check_sklearn_user_model_is_fitted(estimator): + """ + Check whether a user-provided estimator is fitted. + + Logic: + 1. Raise AttributeError for classifiers missing 'classes_'. + 2. Raise warning if no typical fit-related attributes are present. + 3. If `n_features_in_` exists, try a minimal predict-probe. + """ + if isinstance(estimator, ClassifierMixin) and not hasattr(estimator, "classes_"): + raise AttributeError( + "Invalid classifier. " + "Fitted classifier does not contain " + "'classes_' attribute." + ) + + present_attrs = [attr for attr in FIT_INDICATORS if hasattr(estimator, attr)] + + if not present_attrs: + warnings.warn( + "Estimator does not appear fitted. " + f"At least one of the expected attributes is missing in : {FIT_INDICATORS}.", + UserWarning, + ) + + if hasattr(estimator, "n_features_in_"): + try: + if "Pipeline" in str(type(estimator)): + estimator = list(estimator.named_steps.values())[-1] + estimator.predict(np.zeros((1, estimator.n_features_in_))) + return True + except Exception as err: + raise UserWarning( + "Estimator does not appear fitted. " + "It has `n_features_in_` but failed a minimal prediction test " + f"(shape={(1, estimator.n_features_in_)}). Error: {err}", + UserWarning, + ) + return True