|
14 | 14 | from feature_engine._docstrings.methods import _fit_transform_docstring |
15 | 15 | from feature_engine._docstrings.selection._docstring import ( |
16 | 16 | _cv_docstring, |
| 17 | + _groups_docstring, |
17 | 18 | _features_to_drop_docstring, |
18 | 19 | _fit_docstring, |
19 | 20 | _get_support_docstring, |
|
40 | 41 | estimator=_estimator_docstring, |
41 | 42 | scoring=_scoring_docstring, |
42 | 43 | cv=_cv_docstring, |
| 44 | + groups=_groups_docstring, |
43 | 45 | confirm_variables=_confirm_variables_docstring, |
44 | 46 | variables=_variables_numerical_docstring, |
45 | 47 | feature_names_in_=_feature_names_in_docstring, |
@@ -104,6 +106,8 @@ class ProbeFeatureSelection(BaseSelector): |
104 | 106 |
|
105 | 107 | {cv} |
106 | 108 |
|
| 109 | + {groups} |
| 110 | +
|
107 | 111 | Attributes |
108 | 112 | ---------- |
109 | 113 | probe_features_: |
@@ -173,6 +177,7 @@ def __init__( |
173 | 177 | n_probes: int = 1, |
174 | 178 | distribution: str = "normal", |
175 | 179 | cv=5, |
| 180 | + groups=None, |
176 | 181 | random_state: int = 0, |
177 | 182 | confirm_variables: bool = False, |
178 | 183 | ): |
@@ -203,6 +208,7 @@ def __init__( |
203 | 208 | self.scoring = scoring |
204 | 209 | self.distribution = distribution |
205 | 210 | self.cv = cv |
| 211 | + self.groups = groups |
206 | 212 | self.n_probes = n_probes |
207 | 213 | self.random_state = random_state |
208 | 214 |
|
@@ -238,20 +244,26 @@ def fit(self, X: pd.DataFrame, y: pd.Series): |
238 | 244 | if self.collective is True: |
239 | 245 | # train model using entire dataset and derive feature importance |
240 | 246 | f_importance_mean, f_importance_std = find_feature_importance( |
241 | | - X_new, y, self.estimator, self.cv, self.scoring, |
| 247 | + X=X_new, |
| 248 | + y=y, |
| 249 | + estimator=self.estimator, |
| 250 | + cv=self.cv, |
| 251 | + groups=self.groups, |
| 252 | + scoring=self.scoring, |
242 | 253 | ) |
243 | 254 | self.feature_importances_ = f_importance_mean |
244 | 255 | self.feature_importances_std_ = f_importance_std |
245 | 256 |
|
246 | 257 | else: |
247 | 258 | # trains a model per feature (single feature models) |
248 | 259 | f_importance_mean, f_importance_std = single_feature_performance( |
249 | | - X_new, |
250 | | - y, |
251 | | - X_new.columns, |
252 | | - self.estimator, |
253 | | - self.cv, |
254 | | - self.scoring, |
| 260 | + X=X_new, |
| 261 | + y=y, |
| 262 | + variables=X_new.columns, |
| 263 | + estimator=self.estimator, |
| 264 | + cv=self.cv, |
| 265 | + groups=self.groups, |
| 266 | + scoring=self.scoring, |
255 | 267 | ) |
256 | 268 | self.feature_importances_ = pd.Series(f_importance_mean) |
257 | 269 | self.feature_importances_std_ = pd.Series(f_importance_std) |
|
0 commit comments