From a0aa54ea76b5d70384912a9ca6b53933e2f18fd6 Mon Sep 17 00:00:00 2001 From: PaulWestenthanner Date: Sat, 15 Mar 2025 17:07:03 +0100 Subject: [PATCH] Fixes #453. Categorical targets. --- CHANGELOG.md | 6 ++++++ category_encoders/utils.py | 4 ++-- tests/test_encoders.py | 30 ++++++++++++++++++++++++++++++ 3 files changed, 38 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8b3426b1..034eb423 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +v.2.8.1 +======= + +* Fix: Support and test string targets and `pd.Categorical` targets. +* Fix: Docs typo. + v.2.8.0 ======= diff --git a/category_encoders/utils.py b/category_encoders/utils.py index c5830e25..a9adca74 100644 --- a/category_encoders/utils.py +++ b/category_encoders/utils.py @@ -457,7 +457,7 @@ def fit(self, X: X_type, y: y_type | None = None, **kwargs): if self.__sklearn_tags__().target_tags.required: if not is_numeric_dtype(y): self.lab_encoder_ = LabelEncoder() - y = self.lab_encoder_.fit_transform(y) + y = pd.Series(self.lab_encoder_.fit_transform(y), index=y.index) else: self.lab_encoder_ = None @@ -621,7 +621,7 @@ def transform(self, X: X_type, y: y_type | None = None, override_return_df: bool X, y = convert_inputs(X, y, deep=True) self._check_transform_inputs(X) if y is not None and self.lab_encoder_ is not None: - y = self.lab_encoder_.transform(y) + y = pd.Series(self.lab_encoder_.transform(y), index=y.index) if not list(self.cols): return X diff --git a/tests/test_encoders.py b/tests/test_encoders.py index b7131b53..3a08bb98 100644 --- a/tests/test_encoders.py +++ b/tests/test_encoders.py @@ -436,6 +436,36 @@ def test_types(self): encoder = getattr(encoders, encoder_name)() encoder.fit_transform(X, y) + def test_string_targets(self): + """Test encoders with targets of type pd.Categorical or string.""" + X = pd.DataFrame({'feature': ['A', 'B', 'A', 'C']}) + y_string = pd.Series(['yes', 'no', 'yes', 'no']) + + for encoder_name in encoders.__all__: + with self.subTest(encoder_name=encoder_name): + enc = getattr(encoders, encoder_name)() + + # Test with string target + enc.fit(X, y_string) + transformed = enc.transform(X) + th.verify_numeric(transformed) + self.assertEqual(len(transformed), 4) + def test_categorical_targets(self): + """Test encoders with targets of type pd.Categorical or string.""" + X = pd.DataFrame({'feature': ['A', 'B', 'A', 'C']}) + y_categorical = pd.Categorical([1, 0, 1, 0]) + + for encoder_name in encoders.__all__: + with self.subTest(encoder_name=encoder_name): + enc = getattr(encoders, encoder_name)() + + # Test with pd.Categorical target + enc.fit(X, y_categorical) + transformed = enc.transform(X) + th.verify_numeric(transformed) + self.assertEqual(len(transformed), 4) + + def test_preserve_column_order(self): """Test that the encoder preserves the column order.""" binary_cat_example = pd.DataFrame(