From 4d653a9a8d9c21952d242676ce26633cd4e0282b Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Fri, 23 Feb 2024 23:03:16 +0200
Subject: [PATCH 01/24] add group by variables to base forecast transformer

---
 .../forecasting/base_forecast_transformers.py | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/feature_engine/timeseries/forecasting/base_forecast_transformers.py b/feature_engine/timeseries/forecasting/base_forecast_transformers.py
index d6e9fa30f..5f3fb3279 100644
--- a/feature_engine/timeseries/forecasting/base_forecast_transformers.py
+++ b/feature_engine/timeseries/forecasting/base_forecast_transformers.py
@@ -51,6 +51,9 @@ class BaseForecastTransformer(BaseEstimator, TransformerMixin, GetFeatureNamesOu
 
     {drop_original}
 
+    group_by_variables: str, list of str, default=None
+            variable of list of variables to create lag features based on.
+
     Attributes
     ----------
     {feature_names_in_}
@@ -64,6 +67,7 @@ def __init__(
         variables: Union[None, int, str, List[Union[str, int]]] = None,
         missing_values: str = "raise",
         drop_original: bool = False,
+        group_by_variables: Optional[Union[str, List[str]]] = None,
     ) -> None:
 
         if missing_values not in ["raise", "ignore"]:
@@ -78,9 +82,26 @@ def __init__(
                 f"Got {drop_original} instead."
             )
 
+        # check validity if group by variables passed
+        if group_by_variables:
+            # check group by variables data-types
+            if not (
+                isinstance(group_by_variables, str)
+                or isinstance(group_by_variables, list)
+            ):
+                raise ValueError(
+                    "group_by_variables must be an string or a list of strings. "
+                    f"Got {group_by_variables} instead."
+                )
+            # check if passed list has duplicates.
+            if isinstance(group_by_variables, list):
+                if len(set(group_by_variables)) != len(group_by_variables):
+                    raise ValueError(f"group_by_variables contains duplicate values")
+
         self.variables = _check_variables_input_value(variables)
         self.missing_values = missing_values
         self.drop_original = drop_original
+        self.group_by_variables = group_by_variables
 
     def _check_index(self, X: pd.DataFrame):
         """
@@ -165,6 +186,18 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
         if self.missing_values == "raise":
             self._check_na_and_inf(X)
 
+        if self.group_by_variables:
+            # check if input group by variables is in input dataframe variables.
+            # set of differences between input group by variables and dataframe variables
+            # valid if no differences between both
+            if isinstance(self.group_by_variables, list):
+                diff = set(self.group_by_variables).difference(X.columns.tolist())
+                if len(diff) != 0:
+                    raise ValueError(f"{list(diff)} not exist in dataframe")
+            else:
+                if self.group_by_variables not in X.columns.tolist():
+                    raise ValueError(f"{list(diff)} not exists in dataframe")
+
         self._get_feature_names_in(X)
 
         return self

From 4e9d8499bbd632956ebdf4ea0bb70cb84cf95889 Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Fri, 23 Feb 2024 23:04:18 +0200
Subject: [PATCH 02/24] add group by variables to lag_features

---
 .../timeseries/forecasting/lag_features.py    | 81 ++++++++++++++-----
 1 file changed, 63 insertions(+), 18 deletions(-)

diff --git a/feature_engine/timeseries/forecasting/lag_features.py b/feature_engine/timeseries/forecasting/lag_features.py
index 19822ea5f..6a50b621c 100644
--- a/feature_engine/timeseries/forecasting/lag_features.py
+++ b/feature_engine/timeseries/forecasting/lag_features.py
@@ -1,7 +1,7 @@
 # Authors: Morgan Sell <morganpsell@gmail.com>
 # License: BSD 3 clause
 
-from typing import List, Union
+from typing import List, Union, Optional
 
 import pandas as pd
 
@@ -74,6 +74,9 @@ class LagFeatures(BaseForecastTransformer):
 
     {drop_original}
 
+    group_by_variables: str, list of str, default=None
+            variable of list of variables to create lag features based on.
+
     Attributes
     ----------
     variables_:
@@ -117,6 +120,27 @@ class LagFeatures(BaseForecastTransformer):
     2  2022-09-20   3   8       2.0       7.0       1.0       6.0
     3  2022-09-21   4   9       3.0       8.0       2.0       7.0
     4  2022-09-22   5  10       4.0       9.0       3.0       8.0
+
+    create lags based on other variables.
+    >>> import pandas as pd
+    >>> from feature_engine.timeseries.forecasting import LagFeatures
+    >>> X = pd.DataFrame(dict(date = ["2022-09-18",
+    >>>                               "2022-09-19",
+    >>>                               "2022-09-20",
+    >>>                               "2022-09-21",
+    >>>                               "2022-09-22"],
+    >>>                       x1 = [1,2,3,4,5],
+    >>>                       x2 = [6,7,8,9,10],
+    >>>                       x3 = ['a','b','a','b','a']
+    >>>                     ))
+    >>> lf = LagFeatures(periods=[1,2], group_by_variables='x3')
+    >>> lf.fit_transform(X)
+              date  x1  x2 x3  x1_lag_1  x2_lag_1  x1_lag_2  x2_lag_2
+    0  2022-09-18   1   6  a       NaN       NaN       NaN       NaN
+    1  2022-09-19   2   7  b       NaN       NaN       NaN       NaN
+    2  2022-09-20   3   8  a       1.0       6.0       NaN       NaN
+    3  2022-09-21   4   9  b       2.0       7.0       NaN       NaN
+    4  2022-09-22   5  10  a       3.0       8.0       1.0       6.0
     """
 
     def __init__(
@@ -127,6 +151,7 @@ def __init__(
         sort_index: bool = True,
         missing_values: str = "raise",
         drop_original: bool = False,
+        group_by_variables: Optional[Union[str, List[str]]] = None,
     ) -> None:
 
         if not (
@@ -151,7 +176,7 @@ def __init__(
                 "sort_index takes values True and False." f"Got {sort_index} instead."
             )
 
-        super().__init__(variables, missing_values, drop_original)
+        super().__init__(variables, missing_values, drop_original, group_by_variables)
 
         self.periods = periods
         self.freq = freq
@@ -180,35 +205,55 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
             if isinstance(self.freq, list):
                 df_ls = []
                 for fr in self.freq:
-                    tmp = X[self.variables_].shift(
-                        freq=fr,
-                        axis=0,
-                    )
+                    if self.group_by_variables:
+                        tmp = X.groupby(self.group_by_variables)[self.variables_].shift(
+                            freq=fr,
+                        )
+                    else:
+                        tmp = X[self.variables_].shift(
+                            freq=fr,
+                            axis=0,
+                        )
                     df_ls.append(tmp)
                 tmp = pd.concat(df_ls, axis=1)
 
             else:
-                tmp = X[self.variables_].shift(
-                    freq=self.freq,
-                    axis=0,
-                )
+                if self.group_by_variables:
+                    tmp = X.groupby(self.group_by_variables)[self.variables_].shift(
+                        freq=self.freq,
+                    )
+                else:
+                    tmp = X[self.variables_].shift(
+                        freq=self.freq,
+                        axis=0,
+                    )
 
         else:
             if isinstance(self.periods, list):
                 df_ls = []
                 for pr in self.periods:
-                    tmp = X[self.variables_].shift(
-                        periods=pr,
-                        axis=0,
-                    )
+                    if self.group_by_variables:
+                        tmp = X.groupby(self.group_by_variables)[self.variables_].shift(
+                            periods=pr,
+                        )
+                    else:
+                        tmp = X[self.variables_].shift(
+                            periods=pr,
+                            axis=0,
+                        )
                     df_ls.append(tmp)
                 tmp = pd.concat(df_ls, axis=1)
 
             else:
-                tmp = X[self.variables_].shift(
-                    periods=self.periods,
-                    axis=0,
-                )
+                if self.group_by_variables:
+                    tmp = X.groupby(self.group_by_variables)[self.variables_].shift(
+                        periods=self.periods,
+                    )
+                else:
+                    tmp = X[self.variables_].shift(
+                        periods=self.periods,
+                        axis=0,
+                    )
 
         tmp.columns = self._get_new_features_name()
 

From 7f403918215869e0e246d1179ce34ec6ba9980a5 Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Sun, 25 Feb 2024 19:54:13 +0200
Subject: [PATCH 03/24] add group by window features

---
 .../timeseries/forecasting/window_features.py | 73 +++++++++++++++----
 1 file changed, 60 insertions(+), 13 deletions(-)

diff --git a/feature_engine/timeseries/forecasting/window_features.py b/feature_engine/timeseries/forecasting/window_features.py
index 3cb89ccfa..8a8937882 100644
--- a/feature_engine/timeseries/forecasting/window_features.py
+++ b/feature_engine/timeseries/forecasting/window_features.py
@@ -1,4 +1,4 @@
-from typing import Callable, List, Union
+from typing import Callable, List, Union, Optional
 
 import pandas as pd
 
@@ -98,6 +98,9 @@ class WindowFeatures(BaseForecastTransformer):
 
     {drop_original}
 
+    group_by_variables: str, list of str, default=None
+            variable of list of variables to create lag features based on.
+
     Attributes
     ----------
     variables_:
@@ -156,6 +159,7 @@ def __init__(
         sort_index: bool = True,
         missing_values: str = "raise",
         drop_original: bool = False,
+        group_by_variables: Optional[Union[str, List[str]]] = None,
     ) -> None:
 
         if isinstance(window, list) and len(window) != len(set(window)):
@@ -176,7 +180,7 @@ def __init__(
                 f"periods must be a positive integer. Got {periods} instead."
             )
 
-        super().__init__(variables, missing_values, drop_original)
+        super().__init__(variables, missing_values, drop_original, group_by_variables)
 
         self.window = window
         self.min_periods = min_periods
@@ -205,22 +209,34 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         if isinstance(self.window, list):
             df_ls = []
             for win in self.window:
+                if self.group_by_variables:
+                    tmp = self._agg_window_features(
+                        grouped_df=X.groupby(self.group_by_variables),
+                        window=win,
+                    )
+                else:
+                    tmp = (
+                        X[self.variables_]
+                        .rolling(window=win)
+                        .agg(self.functions)
+                        .shift(periods=self.periods, freq=self.freq)
+                    )
+                df_ls.append(tmp)
+            tmp = pd.concat(df_ls, axis=1)
+
+        else:
+            if self.group_by_variables:
+                tmp = self._agg_window_features(
+                    grouped_df=X.groupby(self.group_by_variables),
+                    window=self.window,
+                )
+            else:
                 tmp = (
                     X[self.variables_]
-                    .rolling(window=win)
+                    .rolling(window=self.window)
                     .agg(self.functions)
                     .shift(periods=self.periods, freq=self.freq)
                 )
-                df_ls.append(tmp)
-            tmp = pd.concat(df_ls, axis=1)
-
-        else:
-            tmp = (
-                X[self.variables_]
-                .rolling(window=self.window)
-                .agg(self.functions)
-                .shift(periods=self.periods, freq=self.freq)
-            )
 
         tmp.columns = self._get_new_features_name()
 
@@ -254,3 +270,34 @@ def _get_new_features_name(self) -> List:
             ]
 
         return feature_names
+    
+    def _agg_window_features(
+            self, 
+            grouped_df: pd.core.groupby.generic.DataFrameGroupBy,
+            window: int
+    ) -> Union[pd.Series, pd.DataFrame]:
+        """generate window features based on groups
+        Parameters
+        ----------
+        grouped_df : pd.core.groupby.generic.DataFrameGroupBy
+            dataframe of groups
+
+        window: int
+            Size of the moving window
+
+        Returns
+        -------
+        Union[pd.Series, pd.DataFrame]
+            returned window features
+        """
+        tmp_data = []
+        for _, group in grouped_df:
+            tmp = (
+                group[self.variables_]
+                .rolling(window=window)
+                .agg(self.functions)
+                .shift(periods=self.periods, freq=self.freq)
+            )
+            tmp_data.append(tmp)
+        tmp = pd.concat(tmp_data).sort_index()
+        return tmp

From b476748a83881a7cb08a7044afd25a34753a8187 Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Sun, 25 Feb 2024 20:32:11 +0200
Subject: [PATCH 04/24] add group by expanding window features

---
 .../forecasting/expanding_window_features.py  | 80 +++++++++++++++++--
 1 file changed, 72 insertions(+), 8 deletions(-)

diff --git a/feature_engine/timeseries/forecasting/expanding_window_features.py b/feature_engine/timeseries/forecasting/expanding_window_features.py
index 6a2e5037c..8057f47b7 100644
--- a/feature_engine/timeseries/forecasting/expanding_window_features.py
+++ b/feature_engine/timeseries/forecasting/expanding_window_features.py
@@ -3,7 +3,7 @@
 
 from __future__ import annotations
 
-from typing import List
+from typing import List, Union, Optional
 
 import pandas as pd
 
@@ -139,6 +139,37 @@ class ExpandingWindowFeatures(BaseForecastTransformer):
     2  2022-09-20   3   8                1.5                6.5
     3  2022-09-21   4   9                2.0                7.0
     4  2022-09-22   5  10                2.5                7.5
+
+    create expanding window features based on other variables.
+    >>> import pandas as pd
+    >>> from feature_engine.timeseries.forecasting import ExpandingWindowFeatures
+    >>> X = pd.DataFrame(dict(date = ["2022-09-18",
+    >>>                          "2022-09-19",
+    >>>                          "2022-09-20",
+    >>>                          "2022-09-21",
+    >>>                          "2022-09-22",
+    >>>                          "2022-09-18",
+    >>>                          "2022-09-19",
+    >>>                          "2022-09-20",
+    >>>                          "2022-09-21",
+    >>>                          "2022-09-22"],
+    >>>                  x1 = [1,2,3,4,5, 3,5,6,8,11],
+    >>>                  x2 = [6,7,8,9,10, 2,9,10,15,2],
+    >>>                  x3=['a','a','a','a','a', 'b','b','b','b','b']
+    >>>                ))
+    >>> ewf = ExpandingWindowFeatures(group_by_variables='x3')
+    >>> ewf.fit_transform(X)
+             date  x1  x2 x3  x1_expanding_mean  x2_expanding_mean
+    0  2022-09-18   1   6  a                NaN                NaN
+    1  2022-09-19   2   7  a           1.000000                6.0
+    2  2022-09-20   3   8  a           1.500000                6.5
+    3  2022-09-21   4   9  a           2.000000                7.0
+    4  2022-09-22   5  10  a           2.500000                7.5
+    5  2022-09-18   3   2  b                NaN                NaN
+    6  2022-09-19   5   9  b           3.000000                2.0
+    7  2022-09-20   6  10  b           4.000000                5.5
+    8  2022-09-21   8  15  b           4.666667                7.0
+    9  2022-09-22  11   2  b           5.500000                9.0
     """
 
     def __init__(
@@ -151,6 +182,7 @@ def __init__(
         sort_index: bool = True,
         missing_values: str = "raise",
         drop_original: bool = False,
+        group_by_variables: Optional[Union[str, List[str]]] = None,
     ) -> None:
 
         if not isinstance(functions, (str, list)) or not all(
@@ -168,7 +200,7 @@ def __init__(
                 f"periods must be a non-negative integer. Got {periods} instead."
             )
 
-        super().__init__(variables, missing_values, drop_original)
+        super().__init__(variables, missing_values, drop_original, group_by_variables)
 
         self.min_periods = min_periods
         self.functions = functions
@@ -193,12 +225,17 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         # Common dataframe checks and setting up.
         X = self._check_transform_input_and_state(X)
 
-        tmp = (
-            X[self.variables_]
-            .expanding(min_periods=self.min_periods)
-            .agg(self.functions)
-            .shift(periods=self.periods, freq=self.freq)
-        )
+        if self.group_by_variables:
+            tmp = self._agg_expanding_window_features(
+                grouped_df=X.groupby(self.group_by_variables)
+            )
+        else:
+            tmp = (
+                X[self.variables_]
+                .expanding(min_periods=self.min_periods)
+                .agg(self.functions)
+                .shift(periods=self.periods, freq=self.freq)
+            )
 
         tmp.columns = self._get_new_features_name()
 
@@ -224,3 +261,30 @@ def _get_new_features_name(self) -> List:
         ]
 
         return feature_names
+
+    def _agg_expanding_window_features(
+        self,
+        grouped_df: pd.core.groupby.generic.DataFrameGroupBy,
+    ) -> Union[pd.Series, pd.DataFrame]:
+        """generate expanding window features based on groups
+        Parameters
+        ----------
+        grouped_df : pd.core.groupby.generic.DataFrameGroupBy
+            dataframe of groups
+
+        Returns
+        -------
+        Union[pd.Series, pd.DataFrame]
+            returned expanding window features
+        """
+        tmp_data = []
+        for _, group in grouped_df:
+            tmp = (
+                group[self.variables_]
+                .expanding(min_periods=self.min_periods)
+                .agg(self.functions)
+                .shift(periods=self.periods, freq=self.freq)
+            )
+            tmp_data.append(tmp)
+        tmp = pd.concat(tmp_data).sort_index()
+        return tmp

From 02c59bdf9b1f49dfc9ae63a3192fbe524b0a29d0 Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Sun, 25 Feb 2024 22:10:47 +0200
Subject: [PATCH 05/24] add test cases of groupby timeseries features

---
 .../test_expanding_window_features.py         | 135 ++++++++++++++++++
 .../test_forecasting/test_lag_features.py     | 134 +++++++++++++++++
 .../test_forecasting/test_window_features.py  | 134 +++++++++++++++++
 3 files changed, 403 insertions(+)

diff --git a/tests/test_time_series/test_forecasting/test_expanding_window_features.py b/tests/test_time_series/test_forecasting/test_expanding_window_features.py
index cb33ea8e1..e5cf99670 100644
--- a/tests/test_time_series/test_forecasting/test_expanding_window_features.py
+++ b/tests/test_time_series/test_forecasting/test_expanding_window_features.py
@@ -1,6 +1,7 @@
 import numpy as np
 import pytest
 from pandas.testing import assert_frame_equal
+import pandas as pd
 
 from feature_engine.timeseries.forecasting import ExpandingWindowFeatures
 
@@ -428,3 +429,137 @@ def test_expanding_window_raises_when_periods_negative():
         ValueError, match="periods must be a non-negative integer. Got -1 instead."
     ):
         ExpandingWindowFeatures(periods=-1)
+
+
+def test_correct_groupby_expanding_window_when_using_periods(df_time):
+    date_time = [
+        pd.Timestamp("2020-05-15 12:00:00"),
+        pd.Timestamp("2020-05-15 12:15:00"),
+        pd.Timestamp("2020-05-15 12:30:00"),
+        pd.Timestamp("2020-05-15 12:45:00"),
+        pd.Timestamp("2020-05-15 13:00:00"),
+        pd.Timestamp("2020-05-15 13:15:00"),
+        pd.Timestamp("2020-05-15 13:30:00"),
+        pd.Timestamp("2020-05-15 13:45:00"),
+        pd.Timestamp("2020-05-15 14:00:00"),
+        pd.Timestamp("2020-05-15 14:15:00"),
+        pd.Timestamp("2020-05-15 14:30:00"),
+        pd.Timestamp("2020-05-15 14:45:00"),
+        pd.Timestamp("2020-05-15 15:00:00"),
+        pd.Timestamp("2020-05-15 15:15:00"),
+        pd.Timestamp("2020-05-15 15:30:00"),
+    ]
+    expected_results = {
+        "ambient_temp": [
+            31.31,
+            31.51,
+            32.15,
+            32.39,
+            32.62,
+            32.5,
+            32.52,
+            32.68,
+            33.76,
+            34.13,
+            34.08,
+            33.7,
+            33.89,
+            34.04,
+            34.4,
+        ],
+        "module_temp": [
+            49.18,
+            49.84,
+            52.35,
+            50.63,
+            49.61,
+            47.01,
+            46.67,
+            47.52,
+            49.8,
+            55.03,
+            54.52,
+            47.62,
+            46.03,
+            44.29,
+            46.74,
+        ],
+        "irradiation": [
+            0.51,
+            0.79,
+            0.65,
+            0.76,
+            0.42,
+            0.49,
+            0.57,
+            0.56,
+            0.74,
+            0.89,
+            0.47,
+            0.54,
+            0.4,
+            0.45,
+            0.57,
+        ],
+        "color": [
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "green",
+            "green",
+            "green",
+            "green",
+            "green",
+        ],
+        "ambient_temp_expanding_mean": [
+            np.nan,
+            31.31,
+            31.41,
+            31.656666666666666,
+            31.84,
+            31.996,
+            32.08,
+            32.142857142857146,
+            32.21,
+            32.382222222222225,
+            np.nan,
+            34.08,
+            33.89,
+            33.89,
+            33.9275,
+        ],
+        "irradiation_expanding_mean": [
+            np.nan,
+            0.51,
+            0.65,
+            0.65,
+            0.6775,
+            0.626,
+            0.6033333333333334,
+            0.5985714285714285,
+            0.59375,
+            0.61,
+            np.nan,
+            0.47,
+            0.505,
+            0.47000000000000003,
+            0.465,
+        ],
+    }
+    expected_results_df = pd.DataFrame(
+        data=expected_results,
+        index=date_time,
+    )
+    # When setting group_by_variabels to color
+    transformer = ExpandingWindowFeatures(
+        variables=["ambient_temp", "irradiation"], group_by_variables="color"
+    )
+    df_tr = transformer.fit_transform(df_time)
+    assert df_tr.equals(expected_results_df)
diff --git a/tests/test_time_series/test_forecasting/test_lag_features.py b/tests/test_time_series/test_forecasting/test_lag_features.py
index 8ea349778..4bfb72848 100644
--- a/tests/test_time_series/test_forecasting/test_lag_features.py
+++ b/tests/test_time_series/test_forecasting/test_lag_features.py
@@ -233,3 +233,137 @@ def test_sort_index(df_time):
     A = Xs[transformer.variables_].iloc[0:4].values
     B = X_tr[transformer._get_new_features_name()].iloc[1:5].values
     assert (A == B).all()
+
+
+def test_correct_groupby_lag_when_using_periods(df_time):
+    date_time = [
+        pd.Timestamp("2020-05-15 12:00:00"),
+        pd.Timestamp("2020-05-15 12:15:00"),
+        pd.Timestamp("2020-05-15 12:30:00"),
+        pd.Timestamp("2020-05-15 12:45:00"),
+        pd.Timestamp("2020-05-15 13:00:00"),
+        pd.Timestamp("2020-05-15 13:15:00"),
+        pd.Timestamp("2020-05-15 13:30:00"),
+        pd.Timestamp("2020-05-15 13:45:00"),
+        pd.Timestamp("2020-05-15 14:00:00"),
+        pd.Timestamp("2020-05-15 14:15:00"),
+        pd.Timestamp("2020-05-15 14:30:00"),
+        pd.Timestamp("2020-05-15 14:45:00"),
+        pd.Timestamp("2020-05-15 15:00:00"),
+        pd.Timestamp("2020-05-15 15:15:00"),
+        pd.Timestamp("2020-05-15 15:30:00"),
+    ]
+    expected_results = {
+        "ambient_temp": [
+            31.31,
+            31.51,
+            32.15,
+            32.39,
+            32.62,
+            32.5,
+            32.52,
+            32.68,
+            33.76,
+            34.13,
+            34.08,
+            33.7,
+            33.89,
+            34.04,
+            34.4,
+        ],
+        "module_temp": [
+            49.18,
+            49.84,
+            52.35,
+            50.63,
+            49.61,
+            47.01,
+            46.67,
+            47.52,
+            49.8,
+            55.03,
+            54.52,
+            47.62,
+            46.03,
+            44.29,
+            46.74,
+        ],
+        "irradiation": [
+            0.51,
+            0.79,
+            0.65,
+            0.76,
+            0.42,
+            0.49,
+            0.57,
+            0.56,
+            0.74,
+            0.89,
+            0.47,
+            0.54,
+            0.4,
+            0.45,
+            0.57,
+        ],
+        "color": [
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "green",
+            "green",
+            "green",
+            "green",
+            "green",
+        ],
+        "ambient_temp_lag_3": [
+            np.nan,
+            np.nan,
+            np.nan,
+            31.31,
+            31.51,
+            32.15,
+            32.39,
+            32.62,
+            32.5,
+            32.52,
+            np.nan,
+            np.nan,
+            np.nan,
+            34.08,
+            33.7,
+        ],
+        "irradiation_lag_3": [
+            np.nan,
+            np.nan,
+            np.nan,
+            0.51,
+            0.79,
+            0.65,
+            0.76,
+            0.42,
+            0.49,
+            0.57,
+            np.nan,
+            np.nan,
+            np.nan,
+            0.47,
+            0.54,
+        ],
+    }
+    expected_results_df = pd.DataFrame(
+        data=expected_results,
+        index=date_time,
+    )
+    # When setting group_by_variabels to color
+    transformer = LagFeatures(
+        variables=["ambient_temp", "irradiation"], periods=3, group_by_variables="color"
+    )
+    df_tr = transformer.fit_transform(df_time)
+    assert df_tr.equals(expected_results_df)
diff --git a/tests/test_time_series/test_forecasting/test_window_features.py b/tests/test_time_series/test_forecasting/test_window_features.py
index a03259b7e..50b373e72 100644
--- a/tests/test_time_series/test_forecasting/test_window_features.py
+++ b/tests/test_time_series/test_forecasting/test_window_features.py
@@ -454,3 +454,137 @@ def test_sort_index(df_time):
     assert_frame_equal(
         df_tr[transformer.variables_], Xs[transformer.variables_].sort_index()
     )
+
+
+def test_correct_groupby_window_when_using_periods(df_time):
+    date_time = [
+        pd.Timestamp("2020-05-15 12:00:00"),
+        pd.Timestamp("2020-05-15 12:15:00"),
+        pd.Timestamp("2020-05-15 12:30:00"),
+        pd.Timestamp("2020-05-15 12:45:00"),
+        pd.Timestamp("2020-05-15 13:00:00"),
+        pd.Timestamp("2020-05-15 13:15:00"),
+        pd.Timestamp("2020-05-15 13:30:00"),
+        pd.Timestamp("2020-05-15 13:45:00"),
+        pd.Timestamp("2020-05-15 14:00:00"),
+        pd.Timestamp("2020-05-15 14:15:00"),
+        pd.Timestamp("2020-05-15 14:30:00"),
+        pd.Timestamp("2020-05-15 14:45:00"),
+        pd.Timestamp("2020-05-15 15:00:00"),
+        pd.Timestamp("2020-05-15 15:15:00"),
+        pd.Timestamp("2020-05-15 15:30:00"),
+    ]
+    expected_results = {
+        "ambient_temp": [
+            31.31,
+            31.51,
+            32.15,
+            32.39,
+            32.62,
+            32.5,
+            32.52,
+            32.68,
+            33.76,
+            34.13,
+            34.08,
+            33.7,
+            33.89,
+            34.04,
+            34.4,
+        ],
+        "module_temp": [
+            49.18,
+            49.84,
+            52.35,
+            50.63,
+            49.61,
+            47.01,
+            46.67,
+            47.52,
+            49.8,
+            55.03,
+            54.52,
+            47.62,
+            46.03,
+            44.29,
+            46.74,
+        ],
+        "irradiation": [
+            0.51,
+            0.79,
+            0.65,
+            0.76,
+            0.42,
+            0.49,
+            0.57,
+            0.56,
+            0.74,
+            0.89,
+            0.47,
+            0.54,
+            0.4,
+            0.45,
+            0.57,
+        ],
+        "color": [
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "green",
+            "green",
+            "green",
+            "green",
+            "green",
+        ],
+        "ambient_temp_window_3_mean": [
+            np.nan,
+            np.nan,
+            np.nan,
+            31.656666666666666,
+            32.016666666666666,
+            32.38666666666666,
+            32.50333333333333,
+            32.54666666666667,
+            32.56666666666667,
+            32.98666666666667,
+            np.nan,
+            np.nan,
+            np.nan,
+            33.89,
+            33.876666666666665,
+        ],
+        "irradiation_window_3_mean": [
+            np.nan,
+            np.nan,
+            np.nan,
+            0.65,
+            0.7333333333333334,
+            0.61,
+            0.5566666666666668,
+            0.49333333333333335,
+            0.54,
+            0.6233333333333334,
+            np.nan,
+            np.nan,
+            np.nan,
+            0.47000000000000003,
+            0.4633333333333334,
+        ],
+    }
+    expected_results_df = pd.DataFrame(
+        data=expected_results,
+        index=date_time,
+    )
+    # When setting group_by_variabels to color
+    transformer = WindowFeatures(
+        variables=["ambient_temp", "irradiation"], window=3, group_by_variables="color"
+    )
+    df_tr = transformer.fit_transform(df_time)
+    assert df_tr.equals(expected_results_df)

From 0dd92cc6626ff93c2172f89d447afc931026c3fa Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Sun, 25 Feb 2024 22:35:56 +0200
Subject: [PATCH 06/24] ensure code style tests

---
 .../timeseries/forecasting/base_forecast_transformers.py  | 4 +---
 .../timeseries/forecasting/expanding_window_features.py   | 2 +-
 feature_engine/timeseries/forecasting/lag_features.py     | 2 +-
 feature_engine/timeseries/forecasting/window_features.py  | 8 +++-----
 .../test_forecasting/test_expanding_window_features.py    | 2 +-
 5 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/feature_engine/timeseries/forecasting/base_forecast_transformers.py b/feature_engine/timeseries/forecasting/base_forecast_transformers.py
index 5f3fb3279..db9b196fc 100644
--- a/feature_engine/timeseries/forecasting/base_forecast_transformers.py
+++ b/feature_engine/timeseries/forecasting/base_forecast_transformers.py
@@ -96,7 +96,7 @@ def __init__(
             # check if passed list has duplicates.
             if isinstance(group_by_variables, list):
                 if len(set(group_by_variables)) != len(group_by_variables):
-                    raise ValueError(f"group_by_variables contains duplicate values")
+                    raise ValueError("group_by_variables contains duplicate values")
 
         self.variables = _check_variables_input_value(variables)
         self.missing_values = missing_values
@@ -188,8 +188,6 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
 
         if self.group_by_variables:
             # check if input group by variables is in input dataframe variables.
-            # set of differences between input group by variables and dataframe variables
-            # valid if no differences between both
             if isinstance(self.group_by_variables, list):
                 diff = set(self.group_by_variables).difference(X.columns.tolist())
                 if len(diff) != 0:
diff --git a/feature_engine/timeseries/forecasting/expanding_window_features.py b/feature_engine/timeseries/forecasting/expanding_window_features.py
index 8057f47b7..4163a8be1 100644
--- a/feature_engine/timeseries/forecasting/expanding_window_features.py
+++ b/feature_engine/timeseries/forecasting/expanding_window_features.py
@@ -3,7 +3,7 @@
 
 from __future__ import annotations
 
-from typing import List, Union, Optional
+from typing import List, Optional, Union
 
 import pandas as pd
 
diff --git a/feature_engine/timeseries/forecasting/lag_features.py b/feature_engine/timeseries/forecasting/lag_features.py
index 6a50b621c..399c40a0e 100644
--- a/feature_engine/timeseries/forecasting/lag_features.py
+++ b/feature_engine/timeseries/forecasting/lag_features.py
@@ -1,7 +1,7 @@
 # Authors: Morgan Sell <morganpsell@gmail.com>
 # License: BSD 3 clause
 
-from typing import List, Union, Optional
+from typing import List, Optional, Union
 
 import pandas as pd
 
diff --git a/feature_engine/timeseries/forecasting/window_features.py b/feature_engine/timeseries/forecasting/window_features.py
index 8a8937882..adf8257f7 100644
--- a/feature_engine/timeseries/forecasting/window_features.py
+++ b/feature_engine/timeseries/forecasting/window_features.py
@@ -1,4 +1,4 @@
-from typing import Callable, List, Union, Optional
+from typing import Callable, List, Optional, Union
 
 import pandas as pd
 
@@ -270,11 +270,9 @@ def _get_new_features_name(self) -> List:
             ]
 
         return feature_names
-    
+
     def _agg_window_features(
-            self, 
-            grouped_df: pd.core.groupby.generic.DataFrameGroupBy,
-            window: int
+        self, grouped_df: pd.core.groupby.generic.DataFrameGroupBy, window: int
     ) -> Union[pd.Series, pd.DataFrame]:
         """generate window features based on groups
         Parameters
diff --git a/tests/test_time_series/test_forecasting/test_expanding_window_features.py b/tests/test_time_series/test_forecasting/test_expanding_window_features.py
index e5cf99670..6fa89eacd 100644
--- a/tests/test_time_series/test_forecasting/test_expanding_window_features.py
+++ b/tests/test_time_series/test_forecasting/test_expanding_window_features.py
@@ -1,7 +1,7 @@
 import numpy as np
+import pandas as pd
 import pytest
 from pandas.testing import assert_frame_equal
-import pandas as pd
 
 from feature_engine.timeseries.forecasting import ExpandingWindowFeatures
 

From 47de2d6b9a6821b8a268f68504f3c3f17a42c487 Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Sun, 25 Feb 2024 23:18:43 +0200
Subject: [PATCH 07/24] fixing typehint errors

---
 feature_engine/selection/drop_psi_features.py |  4 +--
 .../forecasting/base_forecast_transformers.py | 33 ++++++++-----------
 .../forecasting/expanding_window_features.py  | 21 ++++--------
 .../timeseries/forecasting/lag_features.py    | 21 ++++--------
 .../timeseries/forecasting/window_features.py | 33 ++++++++-----------
 .../test_check_estimator_forecasting.py       | 10 +++---
 6 files changed, 47 insertions(+), 75 deletions(-)

diff --git a/feature_engine/selection/drop_psi_features.py b/feature_engine/selection/drop_psi_features.py
index e425f674e..aed8a4f21 100644
--- a/feature_engine/selection/drop_psi_features.py
+++ b/feature_engine/selection/drop_psi_features.py
@@ -1,5 +1,5 @@
 import datetime
-from typing import List, Union
+from typing import List, Union, Dict
 
 import numpy as np
 import pandas as pd
@@ -475,7 +475,7 @@ def fit(self, X: pd.DataFrame, y: pd.Series = None):
                 threshold_cat = self.threshold
 
         # Compute the PSI by looping over the features
-        self.psi_values_ = {}
+        self.psi_values_: Dict = {}
         self.features_to_drop_ = []
 
         # Compute PSI for numerical features
diff --git a/feature_engine/timeseries/forecasting/base_forecast_transformers.py b/feature_engine/timeseries/forecasting/base_forecast_transformers.py
index db9b196fc..80a82a520 100644
--- a/feature_engine/timeseries/forecasting/base_forecast_transformers.py
+++ b/feature_engine/timeseries/forecasting/base_forecast_transformers.py
@@ -5,30 +5,21 @@
 from sklearn.utils.validation import check_is_fitted
 
 from feature_engine._base_transformers.mixins import GetFeatureNamesOutMixin
-from feature_engine._check_init_parameters.check_variables import (
-    _check_variables_input_value,
-)
+from feature_engine._check_init_parameters.check_variables import \
+    _check_variables_input_value
 from feature_engine._docstrings.fit_attributes import (
-    _feature_names_in_docstring,
-    _n_features_in_docstring,
-)
+    _feature_names_in_docstring, _n_features_in_docstring)
 from feature_engine._docstrings.init_parameters.all_trasnformers import (
-    _drop_original_docstring,
-    _missing_values_docstring,
-)
+    _drop_original_docstring, _missing_values_docstring)
 from feature_engine._docstrings.methods import _fit_not_learn_docstring
 from feature_engine._docstrings.substitute import Substitution
-from feature_engine.dataframe_checks import (
-    _check_contains_inf,
-    _check_contains_na,
-    _check_X_matches_training_df,
-    check_X,
-)
+from feature_engine.dataframe_checks import (_check_contains_inf,
+                                             _check_contains_na,
+                                             _check_X_matches_training_df,
+                                             check_X)
 from feature_engine.tags import _return_tags
-from feature_engine.variable_handling import (
-    check_numerical_variables,
-    find_numerical_variables,
-)
+from feature_engine.variable_handling import (check_numerical_variables,
+                                              find_numerical_variables)
 
 
 @Substitution(
@@ -194,7 +185,9 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
                     raise ValueError(f"{list(diff)} not exist in dataframe")
             else:
                 if self.group_by_variables not in X.columns.tolist():
-                    raise ValueError(f"{list(diff)} not exists in dataframe")
+                    raise ValueError(
+                        f"{self.group_by_variables} not exists in dataframe"
+                    )
 
         self._get_feature_names_in(X)
 
diff --git a/feature_engine/timeseries/forecasting/expanding_window_features.py b/feature_engine/timeseries/forecasting/expanding_window_features.py
index 4163a8be1..a1981c28c 100644
--- a/feature_engine/timeseries/forecasting/expanding_window_features.py
+++ b/feature_engine/timeseries/forecasting/expanding_window_features.py
@@ -8,22 +8,15 @@
 import pandas as pd
 
 from feature_engine._docstrings.fit_attributes import (
-    _feature_names_in_docstring,
-    _n_features_in_docstring,
-)
+    _feature_names_in_docstring, _n_features_in_docstring)
 from feature_engine._docstrings.init_parameters.all_trasnformers import (
-    _drop_original_docstring,
-    _missing_values_docstring,
-    _variables_numerical_docstring,
-)
-from feature_engine._docstrings.methods import (
-    _fit_not_learn_docstring,
-    _fit_transform_docstring,
-)
+    _drop_original_docstring, _missing_values_docstring,
+    _variables_numerical_docstring)
+from feature_engine._docstrings.methods import (_fit_not_learn_docstring,
+                                                _fit_transform_docstring)
 from feature_engine._docstrings.substitute import Substitution
-from feature_engine.timeseries.forecasting.base_forecast_transformers import (
-    BaseForecastTransformer,
-)
+from feature_engine.timeseries.forecasting.base_forecast_transformers import \
+    BaseForecastTransformer
 
 
 @Substitution(
diff --git a/feature_engine/timeseries/forecasting/lag_features.py b/feature_engine/timeseries/forecasting/lag_features.py
index 399c40a0e..5333491a4 100644
--- a/feature_engine/timeseries/forecasting/lag_features.py
+++ b/feature_engine/timeseries/forecasting/lag_features.py
@@ -6,22 +6,15 @@
 import pandas as pd
 
 from feature_engine._docstrings.fit_attributes import (
-    _feature_names_in_docstring,
-    _n_features_in_docstring,
-)
+    _feature_names_in_docstring, _n_features_in_docstring)
 from feature_engine._docstrings.init_parameters.all_trasnformers import (
-    _drop_original_docstring,
-    _missing_values_docstring,
-    _variables_numerical_docstring,
-)
-from feature_engine._docstrings.methods import (
-    _fit_not_learn_docstring,
-    _fit_transform_docstring,
-)
+    _drop_original_docstring, _missing_values_docstring,
+    _variables_numerical_docstring)
+from feature_engine._docstrings.methods import (_fit_not_learn_docstring,
+                                                _fit_transform_docstring)
 from feature_engine._docstrings.substitute import Substitution
-from feature_engine.timeseries.forecasting.base_forecast_transformers import (
-    BaseForecastTransformer,
-)
+from feature_engine.timeseries.forecasting.base_forecast_transformers import \
+    BaseForecastTransformer
 
 
 @Substitution(
diff --git a/feature_engine/timeseries/forecasting/window_features.py b/feature_engine/timeseries/forecasting/window_features.py
index adf8257f7..def4c7706 100644
--- a/feature_engine/timeseries/forecasting/window_features.py
+++ b/feature_engine/timeseries/forecasting/window_features.py
@@ -3,22 +3,15 @@
 import pandas as pd
 
 from feature_engine._docstrings.fit_attributes import (
-    _feature_names_in_docstring,
-    _n_features_in_docstring,
-)
+    _feature_names_in_docstring, _n_features_in_docstring)
 from feature_engine._docstrings.init_parameters.all_trasnformers import (
-    _drop_original_docstring,
-    _missing_values_docstring,
-    _variables_numerical_docstring,
-)
-from feature_engine._docstrings.methods import (
-    _fit_not_learn_docstring,
-    _fit_transform_docstring,
-)
+    _drop_original_docstring, _missing_values_docstring,
+    _variables_numerical_docstring)
+from feature_engine._docstrings.methods import (_fit_not_learn_docstring,
+                                                _fit_transform_docstring)
 from feature_engine._docstrings.substitute import Substitution
-from feature_engine.timeseries.forecasting.base_forecast_transformers import (
-    BaseForecastTransformer,
-)
+from feature_engine.timeseries.forecasting.base_forecast_transformers import \
+    BaseForecastTransformer
 
 
 @Substitution(
@@ -212,7 +205,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
                 if self.group_by_variables:
                     tmp = self._agg_window_features(
                         grouped_df=X.groupby(self.group_by_variables),
-                        window=win,
+                        win=win,
                     )
                 else:
                     tmp = (
@@ -228,7 +221,7 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
             if self.group_by_variables:
                 tmp = self._agg_window_features(
                     grouped_df=X.groupby(self.group_by_variables),
-                    window=self.window,
+                    win=self.window,
                 )
             else:
                 tmp = (
@@ -272,7 +265,9 @@ def _get_new_features_name(self) -> List:
         return feature_names
 
     def _agg_window_features(
-        self, grouped_df: pd.core.groupby.generic.DataFrameGroupBy, window: int
+        self,
+        grouped_df: pd.core.groupby.generic.DataFrameGroupBy,
+        win: Union[str, int, Callable, List[int], List[str]],
     ) -> Union[pd.Series, pd.DataFrame]:
         """generate window features based on groups
         Parameters
@@ -280,7 +275,7 @@ def _agg_window_features(
         grouped_df : pd.core.groupby.generic.DataFrameGroupBy
             dataframe of groups
 
-        window: int
+        window: Union[str, int, Callable, List[int], List[str]]
             Size of the moving window
 
         Returns
@@ -292,7 +287,7 @@ def _agg_window_features(
         for _, group in grouped_df:
             tmp = (
                 group[self.variables_]
-                .rolling(window=window)
+                .rolling(window=win)
                 .agg(self.functions)
                 .shift(periods=self.periods, freq=self.freq)
             )
diff --git a/tests/test_time_series/test_forecasting/test_check_estimator_forecasting.py b/tests/test_time_series/test_forecasting/test_check_estimator_forecasting.py
index 2ac81edad..978ef9ddf 100644
--- a/tests/test_time_series/test_forecasting/test_check_estimator_forecasting.py
+++ b/tests/test_time_series/test_forecasting/test_check_estimator_forecasting.py
@@ -5,12 +5,10 @@
 from sklearn.pipeline import Pipeline
 from sklearn.utils.estimator_checks import check_estimator
 
-from feature_engine.timeseries.forecasting import (
-    ExpandingWindowFeatures,
-    LagFeatures,
-    WindowFeatures,
-)
-from tests.estimator_checks.estimator_checks import check_feature_engine_estimator
+from feature_engine.timeseries.forecasting import (ExpandingWindowFeatures,
+                                                   LagFeatures, WindowFeatures)
+from tests.estimator_checks.estimator_checks import \
+    check_feature_engine_estimator
 
 _estimators = [
     LagFeatures(missing_values="ignore"),

From dd43c27e7c28712d533365b5d9693874d20e2b76 Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Sun, 25 Feb 2024 23:55:08 +0200
Subject: [PATCH 08/24] fixing docs indentation issue

---
 .../timeseries/forecasting/expanding_window_features.py          | 1 -
 1 file changed, 1 deletion(-)

diff --git a/feature_engine/timeseries/forecasting/expanding_window_features.py b/feature_engine/timeseries/forecasting/expanding_window_features.py
index a1981c28c..01590f583 100644
--- a/feature_engine/timeseries/forecasting/expanding_window_features.py
+++ b/feature_engine/timeseries/forecasting/expanding_window_features.py
@@ -132,7 +132,6 @@ class ExpandingWindowFeatures(BaseForecastTransformer):
     2  2022-09-20   3   8                1.5                6.5
     3  2022-09-21   4   9                2.0                7.0
     4  2022-09-22   5  10                2.5                7.5
-
     create expanding window features based on other variables.
     >>> import pandas as pd
     >>> from feature_engine.timeseries.forecasting import ExpandingWindowFeatures

From 7459811e5d0aaa8fbca178c259884c235217c3fa Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Mon, 26 Feb 2024 00:02:30 +0200
Subject: [PATCH 09/24] fixing docs indentation issue in lag_features

---
 feature_engine/timeseries/forecasting/lag_features.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/feature_engine/timeseries/forecasting/lag_features.py b/feature_engine/timeseries/forecasting/lag_features.py
index 5333491a4..dbbec5443 100644
--- a/feature_engine/timeseries/forecasting/lag_features.py
+++ b/feature_engine/timeseries/forecasting/lag_features.py
@@ -113,7 +113,6 @@ class LagFeatures(BaseForecastTransformer):
     2  2022-09-20   3   8       2.0       7.0       1.0       6.0
     3  2022-09-21   4   9       3.0       8.0       2.0       7.0
     4  2022-09-22   5  10       4.0       9.0       3.0       8.0
-
     create lags based on other variables.
     >>> import pandas as pd
     >>> from feature_engine.timeseries.forecasting import LagFeatures

From 12aa825298befb9b3b42e68a9e3f8e7a6f3f6bd9 Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Thu, 29 Feb 2024 17:42:58 +0200
Subject: [PATCH 10/24] adjust formatting and code style in tests

---
 .../test_forecasting/test_expanding_window_features.py        | 4 ++--
 tests/test_time_series/test_forecasting/test_lag_features.py  | 4 ++--
 .../test_time_series/test_forecasting/test_window_features.py | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/test_time_series/test_forecasting/test_expanding_window_features.py b/tests/test_time_series/test_forecasting/test_expanding_window_features.py
index 6fa89eacd..01e9806ba 100644
--- a/tests/test_time_series/test_forecasting/test_expanding_window_features.py
+++ b/tests/test_time_series/test_forecasting/test_expanding_window_features.py
@@ -557,9 +557,9 @@ def test_correct_groupby_expanding_window_when_using_periods(df_time):
         data=expected_results,
         index=date_time,
     )
-    # When setting group_by_variabels to color
+    # When setting group_by to color
     transformer = ExpandingWindowFeatures(
-        variables=["ambient_temp", "irradiation"], group_by_variables="color"
+        variables=["ambient_temp", "irradiation"], group_by="color"
     )
     df_tr = transformer.fit_transform(df_time)
     assert df_tr.equals(expected_results_df)
diff --git a/tests/test_time_series/test_forecasting/test_lag_features.py b/tests/test_time_series/test_forecasting/test_lag_features.py
index 4bfb72848..79d11f292 100644
--- a/tests/test_time_series/test_forecasting/test_lag_features.py
+++ b/tests/test_time_series/test_forecasting/test_lag_features.py
@@ -361,9 +361,9 @@ def test_correct_groupby_lag_when_using_periods(df_time):
         data=expected_results,
         index=date_time,
     )
-    # When setting group_by_variabels to color
+    # When setting group_by to color
     transformer = LagFeatures(
-        variables=["ambient_temp", "irradiation"], periods=3, group_by_variables="color"
+        variables=["ambient_temp", "irradiation"], periods=3, group_by="color"
     )
     df_tr = transformer.fit_transform(df_time)
     assert df_tr.equals(expected_results_df)
diff --git a/tests/test_time_series/test_forecasting/test_window_features.py b/tests/test_time_series/test_forecasting/test_window_features.py
index 50b373e72..ab213e240 100644
--- a/tests/test_time_series/test_forecasting/test_window_features.py
+++ b/tests/test_time_series/test_forecasting/test_window_features.py
@@ -582,9 +582,9 @@ def test_correct_groupby_window_when_using_periods(df_time):
         data=expected_results,
         index=date_time,
     )
-    # When setting group_by_variabels to color
+    # When setting group_by to color
     transformer = WindowFeatures(
-        variables=["ambient_temp", "irradiation"], window=3, group_by_variables="color"
+        variables=["ambient_temp", "irradiation"], window=3, group_by="color"
     )
     df_tr = transformer.fit_transform(df_time)
     assert df_tr.equals(expected_results_df)

From c3bee668916c9567e470487460774d67dc7f0876 Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Thu, 29 Feb 2024 17:44:42 +0200
Subject: [PATCH 11/24] refactoring timeseries & reformatting the code

---
 feature_engine/selection/drop_psi_features.py |  2 +-
 .../forecasting/base_forecast_transformers.py | 63 +++++++------------
 .../forecasting/expanding_window_features.py  | 36 +++++++----
 .../timeseries/forecasting/lag_features.py    | 45 +++++++------
 .../timeseries/forecasting/window_features.py | 37 ++++++-----
 5 files changed, 94 insertions(+), 89 deletions(-)

diff --git a/feature_engine/selection/drop_psi_features.py b/feature_engine/selection/drop_psi_features.py
index aed8a4f21..3e87adbdb 100644
--- a/feature_engine/selection/drop_psi_features.py
+++ b/feature_engine/selection/drop_psi_features.py
@@ -1,5 +1,5 @@
 import datetime
-from typing import List, Union, Dict
+from typing import Dict, List, Union
 
 import numpy as np
 import pandas as pd
diff --git a/feature_engine/timeseries/forecasting/base_forecast_transformers.py b/feature_engine/timeseries/forecasting/base_forecast_transformers.py
index 80a82a520..3a9506c74 100644
--- a/feature_engine/timeseries/forecasting/base_forecast_transformers.py
+++ b/feature_engine/timeseries/forecasting/base_forecast_transformers.py
@@ -5,21 +5,30 @@
 from sklearn.utils.validation import check_is_fitted
 
 from feature_engine._base_transformers.mixins import GetFeatureNamesOutMixin
-from feature_engine._check_init_parameters.check_variables import \
-    _check_variables_input_value
+from feature_engine._check_init_parameters.check_variables import (
+    _check_variables_input_value,
+)
 from feature_engine._docstrings.fit_attributes import (
-    _feature_names_in_docstring, _n_features_in_docstring)
+    _feature_names_in_docstring,
+    _n_features_in_docstring,
+)
 from feature_engine._docstrings.init_parameters.all_trasnformers import (
-    _drop_original_docstring, _missing_values_docstring)
+    _drop_original_docstring,
+    _missing_values_docstring,
+)
 from feature_engine._docstrings.methods import _fit_not_learn_docstring
 from feature_engine._docstrings.substitute import Substitution
-from feature_engine.dataframe_checks import (_check_contains_inf,
-                                             _check_contains_na,
-                                             _check_X_matches_training_df,
-                                             check_X)
+from feature_engine.dataframe_checks import (
+    _check_contains_inf,
+    _check_contains_na,
+    _check_X_matches_training_df,
+    check_X,
+)
 from feature_engine.tags import _return_tags
-from feature_engine.variable_handling import (check_numerical_variables,
-                                              find_numerical_variables)
+from feature_engine.variable_handling import (
+    check_numerical_variables,
+    find_numerical_variables,
+)
 
 
 @Substitution(
@@ -42,7 +51,7 @@ class BaseForecastTransformer(BaseEstimator, TransformerMixin, GetFeatureNamesOu
 
     {drop_original}
 
-    group_by_variables: str, list of str, default=None
+    group_by: str, str, int, or list of strings or integers, default=None
             variable of list of variables to create lag features based on.
 
     Attributes
@@ -58,7 +67,7 @@ def __init__(
         variables: Union[None, int, str, List[Union[str, int]]] = None,
         missing_values: str = "raise",
         drop_original: bool = False,
-        group_by_variables: Optional[Union[str, List[str]]] = None,
+        group_by: Union[None, int, str, List[Union[str, int]]] = None,
     ) -> None:
 
         if missing_values not in ["raise", "ignore"]:
@@ -73,26 +82,10 @@ def __init__(
                 f"Got {drop_original} instead."
             )
 
-        # check validity if group by variables passed
-        if group_by_variables:
-            # check group by variables data-types
-            if not (
-                isinstance(group_by_variables, str)
-                or isinstance(group_by_variables, list)
-            ):
-                raise ValueError(
-                    "group_by_variables must be an string or a list of strings. "
-                    f"Got {group_by_variables} instead."
-                )
-            # check if passed list has duplicates.
-            if isinstance(group_by_variables, list):
-                if len(set(group_by_variables)) != len(group_by_variables):
-                    raise ValueError("group_by_variables contains duplicate values")
-
         self.variables = _check_variables_input_value(variables)
         self.missing_values = missing_values
         self.drop_original = drop_original
-        self.group_by_variables = group_by_variables
+        self.group_by = _check_variables_input_value(group_by)
 
     def _check_index(self, X: pd.DataFrame):
         """
@@ -177,18 +170,6 @@ def fit(self, X: pd.DataFrame, y: Optional[pd.Series] = None):
         if self.missing_values == "raise":
             self._check_na_and_inf(X)
 
-        if self.group_by_variables:
-            # check if input group by variables is in input dataframe variables.
-            if isinstance(self.group_by_variables, list):
-                diff = set(self.group_by_variables).difference(X.columns.tolist())
-                if len(diff) != 0:
-                    raise ValueError(f"{list(diff)} not exist in dataframe")
-            else:
-                if self.group_by_variables not in X.columns.tolist():
-                    raise ValueError(
-                        f"{self.group_by_variables} not exists in dataframe"
-                    )
-
         self._get_feature_names_in(X)
 
         return self
diff --git a/feature_engine/timeseries/forecasting/expanding_window_features.py b/feature_engine/timeseries/forecasting/expanding_window_features.py
index 01590f583..9b8e83476 100644
--- a/feature_engine/timeseries/forecasting/expanding_window_features.py
+++ b/feature_engine/timeseries/forecasting/expanding_window_features.py
@@ -3,20 +3,27 @@
 
 from __future__ import annotations
 
-from typing import List, Optional, Union
+from typing import List, Union
 
 import pandas as pd
 
 from feature_engine._docstrings.fit_attributes import (
-    _feature_names_in_docstring, _n_features_in_docstring)
+    _feature_names_in_docstring,
+    _n_features_in_docstring,
+)
 from feature_engine._docstrings.init_parameters.all_trasnformers import (
-    _drop_original_docstring, _missing_values_docstring,
-    _variables_numerical_docstring)
-from feature_engine._docstrings.methods import (_fit_not_learn_docstring,
-                                                _fit_transform_docstring)
+    _drop_original_docstring,
+    _missing_values_docstring,
+    _variables_numerical_docstring,
+)
+from feature_engine._docstrings.methods import (
+    _fit_not_learn_docstring,
+    _fit_transform_docstring,
+)
 from feature_engine._docstrings.substitute import Substitution
-from feature_engine.timeseries.forecasting.base_forecast_transformers import \
-    BaseForecastTransformer
+from feature_engine.timeseries.forecasting.base_forecast_transformers import (
+    BaseForecastTransformer,
+)
 
 
 @Substitution(
@@ -86,6 +93,9 @@ class ExpandingWindowFeatures(BaseForecastTransformer):
 
     {drop_original}
 
+    group_by: str, str, int, or list of strings or integers, default=None
+            variable of list of variables to create lag features based on.
+
     Attributes
     ----------
     variables_:
@@ -149,7 +159,7 @@ class ExpandingWindowFeatures(BaseForecastTransformer):
     >>>                  x2 = [6,7,8,9,10, 2,9,10,15,2],
     >>>                  x3=['a','a','a','a','a', 'b','b','b','b','b']
     >>>                ))
-    >>> ewf = ExpandingWindowFeatures(group_by_variables='x3')
+    >>> ewf = ExpandingWindowFeatures(group_by='x3')
     >>> ewf.fit_transform(X)
              date  x1  x2 x3  x1_expanding_mean  x2_expanding_mean
     0  2022-09-18   1   6  a                NaN                NaN
@@ -174,7 +184,7 @@ def __init__(
         sort_index: bool = True,
         missing_values: str = "raise",
         drop_original: bool = False,
-        group_by_variables: Optional[Union[str, List[str]]] = None,
+        group_by: Union[None, int, str, List[Union[str, int]]] = None,
     ) -> None:
 
         if not isinstance(functions, (str, list)) or not all(
@@ -192,7 +202,7 @@ def __init__(
                 f"periods must be a non-negative integer. Got {periods} instead."
             )
 
-        super().__init__(variables, missing_values, drop_original, group_by_variables)
+        super().__init__(variables, missing_values, drop_original, group_by)
 
         self.min_periods = min_periods
         self.functions = functions
@@ -217,9 +227,9 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         # Common dataframe checks and setting up.
         X = self._check_transform_input_and_state(X)
 
-        if self.group_by_variables:
+        if self.group_by:
             tmp = self._agg_expanding_window_features(
-                grouped_df=X.groupby(self.group_by_variables)
+                grouped_df=X.groupby(self.group_by)
             )
         else:
             tmp = (
diff --git a/feature_engine/timeseries/forecasting/lag_features.py b/feature_engine/timeseries/forecasting/lag_features.py
index dbbec5443..86f89a733 100644
--- a/feature_engine/timeseries/forecasting/lag_features.py
+++ b/feature_engine/timeseries/forecasting/lag_features.py
@@ -1,20 +1,27 @@
 # Authors: Morgan Sell <morganpsell@gmail.com>
 # License: BSD 3 clause
 
-from typing import List, Optional, Union
+from typing import List, Union
 
 import pandas as pd
 
 from feature_engine._docstrings.fit_attributes import (
-    _feature_names_in_docstring, _n_features_in_docstring)
+    _feature_names_in_docstring,
+    _n_features_in_docstring,
+)
 from feature_engine._docstrings.init_parameters.all_trasnformers import (
-    _drop_original_docstring, _missing_values_docstring,
-    _variables_numerical_docstring)
-from feature_engine._docstrings.methods import (_fit_not_learn_docstring,
-                                                _fit_transform_docstring)
+    _drop_original_docstring,
+    _missing_values_docstring,
+    _variables_numerical_docstring,
+)
+from feature_engine._docstrings.methods import (
+    _fit_not_learn_docstring,
+    _fit_transform_docstring,
+)
 from feature_engine._docstrings.substitute import Substitution
-from feature_engine.timeseries.forecasting.base_forecast_transformers import \
-    BaseForecastTransformer
+from feature_engine.timeseries.forecasting.base_forecast_transformers import (
+    BaseForecastTransformer,
+)
 
 
 @Substitution(
@@ -67,7 +74,7 @@ class LagFeatures(BaseForecastTransformer):
 
     {drop_original}
 
-    group_by_variables: str, list of str, default=None
+    group_by: str, str, int, or list of strings or integers, default=None
             variable of list of variables to create lag features based on.
 
     Attributes
@@ -143,7 +150,7 @@ def __init__(
         sort_index: bool = True,
         missing_values: str = "raise",
         drop_original: bool = False,
-        group_by_variables: Optional[Union[str, List[str]]] = None,
+        group_by: Union[None, int, str, List[Union[str, int]]] = None,
     ) -> None:
 
         if not (
@@ -168,7 +175,7 @@ def __init__(
                 "sort_index takes values True and False." f"Got {sort_index} instead."
             )
 
-        super().__init__(variables, missing_values, drop_original, group_by_variables)
+        super().__init__(variables, missing_values, drop_original, group_by)
 
         self.periods = periods
         self.freq = freq
@@ -197,8 +204,8 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
             if isinstance(self.freq, list):
                 df_ls = []
                 for fr in self.freq:
-                    if self.group_by_variables:
-                        tmp = X.groupby(self.group_by_variables)[self.variables_].shift(
+                    if self.group_by:
+                        tmp = X.groupby(self.group_by)[self.variables_].shift(
                             freq=fr,
                         )
                     else:
@@ -210,8 +217,8 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
                 tmp = pd.concat(df_ls, axis=1)
 
             else:
-                if self.group_by_variables:
-                    tmp = X.groupby(self.group_by_variables)[self.variables_].shift(
+                if self.group_by:
+                    tmp = X.groupby(self.group_by)[self.variables_].shift(
                         freq=self.freq,
                     )
                 else:
@@ -224,8 +231,8 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
             if isinstance(self.periods, list):
                 df_ls = []
                 for pr in self.periods:
-                    if self.group_by_variables:
-                        tmp = X.groupby(self.group_by_variables)[self.variables_].shift(
+                    if self.group_by:
+                        tmp = X.groupby(self.group_by)[self.variables_].shift(
                             periods=pr,
                         )
                     else:
@@ -237,8 +244,8 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
                 tmp = pd.concat(df_ls, axis=1)
 
             else:
-                if self.group_by_variables:
-                    tmp = X.groupby(self.group_by_variables)[self.variables_].shift(
+                if self.group_by:
+                    tmp = X.groupby(self.group_by)[self.variables_].shift(
                         periods=self.periods,
                     )
                 else:
diff --git a/feature_engine/timeseries/forecasting/window_features.py b/feature_engine/timeseries/forecasting/window_features.py
index def4c7706..df346fb40 100644
--- a/feature_engine/timeseries/forecasting/window_features.py
+++ b/feature_engine/timeseries/forecasting/window_features.py
@@ -1,17 +1,24 @@
-from typing import Callable, List, Optional, Union
+from typing import Callable, List, Union
 
 import pandas as pd
 
 from feature_engine._docstrings.fit_attributes import (
-    _feature_names_in_docstring, _n_features_in_docstring)
+    _feature_names_in_docstring,
+    _n_features_in_docstring,
+)
 from feature_engine._docstrings.init_parameters.all_trasnformers import (
-    _drop_original_docstring, _missing_values_docstring,
-    _variables_numerical_docstring)
-from feature_engine._docstrings.methods import (_fit_not_learn_docstring,
-                                                _fit_transform_docstring)
+    _drop_original_docstring,
+    _missing_values_docstring,
+    _variables_numerical_docstring,
+)
+from feature_engine._docstrings.methods import (
+    _fit_not_learn_docstring,
+    _fit_transform_docstring,
+)
 from feature_engine._docstrings.substitute import Substitution
-from feature_engine.timeseries.forecasting.base_forecast_transformers import \
-    BaseForecastTransformer
+from feature_engine.timeseries.forecasting.base_forecast_transformers import (
+    BaseForecastTransformer,
+)
 
 
 @Substitution(
@@ -91,7 +98,7 @@ class WindowFeatures(BaseForecastTransformer):
 
     {drop_original}
 
-    group_by_variables: str, list of str, default=None
+    group_by: str, str, int, or list of strings or integers, default=None
             variable of list of variables to create lag features based on.
 
     Attributes
@@ -152,7 +159,7 @@ def __init__(
         sort_index: bool = True,
         missing_values: str = "raise",
         drop_original: bool = False,
-        group_by_variables: Optional[Union[str, List[str]]] = None,
+        group_by: Union[None, int, str, List[Union[str, int]]] = None,
     ) -> None:
 
         if isinstance(window, list) and len(window) != len(set(window)):
@@ -173,7 +180,7 @@ def __init__(
                 f"periods must be a positive integer. Got {periods} instead."
             )
 
-        super().__init__(variables, missing_values, drop_original, group_by_variables)
+        super().__init__(variables, missing_values, drop_original, group_by)
 
         self.window = window
         self.min_periods = min_periods
@@ -202,9 +209,9 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         if isinstance(self.window, list):
             df_ls = []
             for win in self.window:
-                if self.group_by_variables:
+                if self.group_by:
                     tmp = self._agg_window_features(
-                        grouped_df=X.groupby(self.group_by_variables),
+                        grouped_df=X.groupby(self.group_by),
                         win=win,
                     )
                 else:
@@ -218,9 +225,9 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
             tmp = pd.concat(df_ls, axis=1)
 
         else:
-            if self.group_by_variables:
+            if self.group_by:
                 tmp = self._agg_window_features(
-                    grouped_df=X.groupby(self.group_by_variables),
+                    grouped_df=X.groupby(self.group_by),
                     win=self.window,
                 )
             else:

From 67725dceddb7bea3b5730347fca0d1b6f25905cb Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Sat, 2 Mar 2024 21:33:48 +0200
Subject: [PATCH 12/24] adjust code formatting & style in tests

---
 .../test_check_estimator_forecasting.py                | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/tests/test_time_series/test_forecasting/test_check_estimator_forecasting.py b/tests/test_time_series/test_forecasting/test_check_estimator_forecasting.py
index 978ef9ddf..2ac81edad 100644
--- a/tests/test_time_series/test_forecasting/test_check_estimator_forecasting.py
+++ b/tests/test_time_series/test_forecasting/test_check_estimator_forecasting.py
@@ -5,10 +5,12 @@
 from sklearn.pipeline import Pipeline
 from sklearn.utils.estimator_checks import check_estimator
 
-from feature_engine.timeseries.forecasting import (ExpandingWindowFeatures,
-                                                   LagFeatures, WindowFeatures)
-from tests.estimator_checks.estimator_checks import \
-    check_feature_engine_estimator
+from feature_engine.timeseries.forecasting import (
+    ExpandingWindowFeatures,
+    LagFeatures,
+    WindowFeatures,
+)
+from tests.estimator_checks.estimator_checks import check_feature_engine_estimator
 
 _estimators = [
     LagFeatures(missing_values="ignore"),

From 9cb01ea852b67ea8a12fa773e0bf03366b545462 Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Sat, 2 Mar 2024 23:29:25 +0200
Subject: [PATCH 13/24] fix create lag features using groupby & freq parameters

---
 .../timeseries/forecasting/lag_features.py    | 39 ++++++++++++++++++-
 1 file changed, 37 insertions(+), 2 deletions(-)

diff --git a/feature_engine/timeseries/forecasting/lag_features.py b/feature_engine/timeseries/forecasting/lag_features.py
index 86f89a733..6df173170 100644
--- a/feature_engine/timeseries/forecasting/lag_features.py
+++ b/feature_engine/timeseries/forecasting/lag_features.py
@@ -205,7 +205,8 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
                 df_ls = []
                 for fr in self.freq:
                     if self.group_by:
-                        tmp = X.groupby(self.group_by)[self.variables_].shift(
+                        tmp = self._agg_freq_lags(
+                            grouped_df=X.groupby(self.group_by),
                             freq=fr,
                         )
                     else:
@@ -218,7 +219,8 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
 
             else:
                 if self.group_by:
-                    tmp = X.groupby(self.group_by)[self.variables_].shift(
+                    tmp = self._agg_freq_lags(
+                        grouped_df=X.groupby(self.group_by),
                         freq=self.freq,
                     )
                 else:
@@ -287,3 +289,36 @@ def _get_new_features_name(self) -> List:
             ]
 
         return feature_names
+    
+    def _agg_freq_lags(
+            self, 
+            grouped_df: pd.core.groupby.generic.DataFrameGroupBy, 
+            freq: Union[str, List[str]],
+    ) -> Union[pd.Series, pd.DataFrame]:
+        """_summary_
+
+        Parameters
+        ----------
+        grouped_df : pd.core.groupby.generic.DataFrameGroupBy
+            dataframe of groups
+        freq : Union[str, List[str]]
+            Offset to use from the tseries module or time rule. See parameter `freq` in
+            pandas `shift()`. It is the same functionality. If freq is a list, lag features
+            will be created for each one of the frequency values in the list.
+
+        Returns
+        -------
+        Union[pd.Series, pd.DataFrame]
+            lag feature or dataframe of lag features
+        """
+        tmp_data = []
+        for _, group in grouped_df:
+            original_idx = group.index
+            tmp = (
+                group[self.variables_]
+                .shift(freq=freq)
+                .reindex(original_idx)
+            )
+            tmp_data.append(tmp)
+        tmp = pd.concat(tmp_data).sort_index()
+        return tmp
\ No newline at end of file

From 72ce43cba3be6f902af8c1415c2c57b313c2773b Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Sun, 3 Mar 2024 01:06:36 +0200
Subject: [PATCH 14/24] adjust code style

---
 .../timeseries/forecasting/lag_features.py    | 20 +++++++------------
 1 file changed, 7 insertions(+), 13 deletions(-)

diff --git a/feature_engine/timeseries/forecasting/lag_features.py b/feature_engine/timeseries/forecasting/lag_features.py
index 6df173170..76f3563b6 100644
--- a/feature_engine/timeseries/forecasting/lag_features.py
+++ b/feature_engine/timeseries/forecasting/lag_features.py
@@ -289,11 +289,11 @@ def _get_new_features_name(self) -> List:
             ]
 
         return feature_names
-    
+
     def _agg_freq_lags(
-            self, 
-            grouped_df: pd.core.groupby.generic.DataFrameGroupBy, 
-            freq: Union[str, List[str]],
+        self,
+        grouped_df: pd.core.groupby.generic.DataFrameGroupBy,
+        freq: Union[str, List[str]],
     ) -> Union[pd.Series, pd.DataFrame]:
         """_summary_
 
@@ -302,9 +302,7 @@ def _agg_freq_lags(
         grouped_df : pd.core.groupby.generic.DataFrameGroupBy
             dataframe of groups
         freq : Union[str, List[str]]
-            Offset to use from the tseries module or time rule. See parameter `freq` in
-            pandas `shift()`. It is the same functionality. If freq is a list, lag features
-            will be created for each one of the frequency values in the list.
+            Offset to use from the tseries module or time rule.
 
         Returns
         -------
@@ -314,11 +312,7 @@ def _agg_freq_lags(
         tmp_data = []
         for _, group in grouped_df:
             original_idx = group.index
-            tmp = (
-                group[self.variables_]
-                .shift(freq=freq)
-                .reindex(original_idx)
-            )
+            tmp = group[self.variables_].shift(freq=freq).reindex(original_idx)
             tmp_data.append(tmp)
         tmp = pd.concat(tmp_data).sort_index()
-        return tmp
\ No newline at end of file
+        return tmp

From 9d999b070531984205742ad8247332f8afa3fdae Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Sun, 3 Mar 2024 01:09:26 +0200
Subject: [PATCH 15/24] add test cases to ensure code coverage

---
 .../test_forecasting/test_lag_features.py     | 472 ++++++++++++++++++
 .../test_forecasting/test_window_features.py  | 168 +++++++
 2 files changed, 640 insertions(+)

diff --git a/tests/test_time_series/test_forecasting/test_lag_features.py b/tests/test_time_series/test_forecasting/test_lag_features.py
index 79d11f292..f55ff168c 100644
--- a/tests/test_time_series/test_forecasting/test_lag_features.py
+++ b/tests/test_time_series/test_forecasting/test_lag_features.py
@@ -367,3 +367,475 @@ def test_correct_groupby_lag_when_using_periods(df_time):
     )
     df_tr = transformer.fit_transform(df_time)
     assert df_tr.equals(expected_results_df)
+
+
+def test_multiple_periods_with_groupby(df_time):
+    date_time = [
+        pd.Timestamp("2020-05-15 12:00:00"),
+        pd.Timestamp("2020-05-15 12:15:00"),
+        pd.Timestamp("2020-05-15 12:30:00"),
+        pd.Timestamp("2020-05-15 12:45:00"),
+        pd.Timestamp("2020-05-15 13:00:00"),
+        pd.Timestamp("2020-05-15 13:15:00"),
+        pd.Timestamp("2020-05-15 13:30:00"),
+        pd.Timestamp("2020-05-15 13:45:00"),
+        pd.Timestamp("2020-05-15 14:00:00"),
+        pd.Timestamp("2020-05-15 14:15:00"),
+        pd.Timestamp("2020-05-15 14:30:00"),
+        pd.Timestamp("2020-05-15 14:45:00"),
+        pd.Timestamp("2020-05-15 15:00:00"),
+        pd.Timestamp("2020-05-15 15:15:00"),
+        pd.Timestamp("2020-05-15 15:30:00"),
+    ]
+    expected_results = {
+        "ambient_temp": [
+            31.31,
+            31.51,
+            32.15,
+            32.39,
+            32.62,
+            32.5,
+            32.52,
+            32.68,
+            33.76,
+            34.13,
+            34.08,
+            33.7,
+            33.89,
+            34.04,
+            34.4,
+        ],
+        "module_temp": [
+            49.18,
+            49.84,
+            52.35,
+            50.63,
+            49.61,
+            47.01,
+            46.67,
+            47.52,
+            49.8,
+            55.03,
+            54.52,
+            47.62,
+            46.03,
+            44.29,
+            46.74,
+        ],
+        "irradiation": [
+            0.51,
+            0.79,
+            0.65,
+            0.76,
+            0.42,
+            0.49,
+            0.57,
+            0.56,
+            0.74,
+            0.89,
+            0.47,
+            0.54,
+            0.4,
+            0.45,
+            0.57,
+        ],
+        "color": [
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "green",
+            "green",
+            "green",
+            "green",
+            "green",
+        ],
+        "ambient_temp_lag_2": [
+            np.nan,
+            np.nan,
+            31.31,
+            31.51,
+            32.15,
+            32.39,
+            32.62,
+            32.5,
+            32.52,
+            32.68,
+            np.nan,
+            np.nan,
+            34.08,
+            33.7,
+            33.89,
+        ],
+        "irradiation_lag_2": [
+            np.nan,
+            np.nan,
+            0.51,
+            0.79,
+            0.65,
+            0.76,
+            0.42,
+            0.49,
+            0.57,
+            0.56,
+            np.nan,
+            np.nan,
+            0.47,
+            0.54,
+            0.4,
+        ],
+        "ambient_temp_lag_3": [
+            np.nan,
+            np.nan,
+            np.nan,
+            31.31,
+            31.51,
+            32.15,
+            32.39,
+            32.62,
+            32.5,
+            32.52,
+            np.nan,
+            np.nan,
+            np.nan,
+            34.08,
+            33.7,
+        ],
+        "irradiation_lag_3": [
+            np.nan,
+            np.nan,
+            np.nan,
+            0.51,
+            0.79,
+            0.65,
+            0.76,
+            0.42,
+            0.49,
+            0.57,
+            np.nan,
+            np.nan,
+            np.nan,
+            0.47,
+            0.54,
+        ],
+    }
+    expected_results_df = pd.DataFrame(
+        data=expected_results,
+        index=date_time,
+    )
+    # When setting group_by to color
+    transformer = LagFeatures(
+        variables=["ambient_temp", "irradiation"], periods=[2, 3], group_by="color"
+    )
+    df_tr = transformer.fit_transform(df_time)
+    assert df_tr.equals(expected_results_df)
+
+
+def test_correct_groupby_lag_when_using_freq(df_time):
+    date_time = [
+        pd.Timestamp("2020-05-15 12:00:00"),
+        pd.Timestamp("2020-05-15 12:15:00"),
+        pd.Timestamp("2020-05-15 12:30:00"),
+        pd.Timestamp("2020-05-15 12:45:00"),
+        pd.Timestamp("2020-05-15 13:00:00"),
+        pd.Timestamp("2020-05-15 13:15:00"),
+        pd.Timestamp("2020-05-15 13:30:00"),
+        pd.Timestamp("2020-05-15 13:45:00"),
+        pd.Timestamp("2020-05-15 14:00:00"),
+        pd.Timestamp("2020-05-15 14:15:00"),
+        pd.Timestamp("2020-05-15 14:30:00"),
+        pd.Timestamp("2020-05-15 14:45:00"),
+        pd.Timestamp("2020-05-15 15:00:00"),
+        pd.Timestamp("2020-05-15 15:15:00"),
+        pd.Timestamp("2020-05-15 15:30:00"),
+    ]
+    expected_results = {
+        "ambient_temp": [
+            31.31,
+            31.51,
+            32.15,
+            32.39,
+            32.62,
+            32.5,
+            32.52,
+            32.68,
+            33.76,
+            34.13,
+            34.08,
+            33.7,
+            33.89,
+            34.04,
+            34.4,
+        ],
+        "module_temp": [
+            49.18,
+            49.84,
+            52.35,
+            50.63,
+            49.61,
+            47.01,
+            46.67,
+            47.52,
+            49.8,
+            55.03,
+            54.52,
+            47.62,
+            46.03,
+            44.29,
+            46.74,
+        ],
+        "irradiation": [
+            0.51,
+            0.79,
+            0.65,
+            0.76,
+            0.42,
+            0.49,
+            0.57,
+            0.56,
+            0.74,
+            0.89,
+            0.47,
+            0.54,
+            0.4,
+            0.45,
+            0.57,
+        ],
+        "color": [
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "green",
+            "green",
+            "green",
+            "green",
+            "green",
+        ],
+        "irradiation_lag_15min": [
+            np.nan,
+            0.51,
+            0.79,
+            0.65,
+            0.76,
+            0.42,
+            0.49,
+            0.57,
+            0.56,
+            0.74,
+            np.nan,
+            0.47,
+            0.54,
+            0.4,
+            0.45,
+        ],
+        "ambient_temp_lag_15min": [
+            np.nan,
+            31.31,
+            31.51,
+            32.15,
+            32.39,
+            32.62,
+            32.5,
+            32.52,
+            32.68,
+            33.76,
+            np.nan,
+            34.08,
+            33.7,
+            33.89,
+            34.04,
+        ],
+    }
+    expected_results_df = pd.DataFrame(
+        data=expected_results,
+        index=date_time,
+    )
+    # When setting group_by to color
+    transformer = LagFeatures(
+        variables=["irradiation", "ambient_temp"], freq="15min", group_by="color"
+    )
+    df_tr = transformer.fit_transform(df_time)
+    assert df_tr.equals(expected_results_df)
+
+
+def test_multiple_freq_with_groupby(df_time):
+    date_time = [
+        pd.Timestamp("2020-05-15 12:00:00"),
+        pd.Timestamp("2020-05-15 12:15:00"),
+        pd.Timestamp("2020-05-15 12:30:00"),
+        pd.Timestamp("2020-05-15 12:45:00"),
+        pd.Timestamp("2020-05-15 13:00:00"),
+        pd.Timestamp("2020-05-15 13:15:00"),
+        pd.Timestamp("2020-05-15 13:30:00"),
+        pd.Timestamp("2020-05-15 13:45:00"),
+        pd.Timestamp("2020-05-15 14:00:00"),
+        pd.Timestamp("2020-05-15 14:15:00"),
+        pd.Timestamp("2020-05-15 14:30:00"),
+        pd.Timestamp("2020-05-15 14:45:00"),
+        pd.Timestamp("2020-05-15 15:00:00"),
+        pd.Timestamp("2020-05-15 15:15:00"),
+        pd.Timestamp("2020-05-15 15:30:00"),
+    ]
+    expected_results = {
+        "ambient_temp": [
+            31.31,
+            31.51,
+            32.15,
+            32.39,
+            32.62,
+            32.5,
+            32.52,
+            32.68,
+            33.76,
+            34.13,
+            34.08,
+            33.7,
+            33.89,
+            34.04,
+            34.4,
+        ],
+        "module_temp": [
+            49.18,
+            49.84,
+            52.35,
+            50.63,
+            49.61,
+            47.01,
+            46.67,
+            47.52,
+            49.8,
+            55.03,
+            54.52,
+            47.62,
+            46.03,
+            44.29,
+            46.74,
+        ],
+        "irradiation": [
+            0.51,
+            0.79,
+            0.65,
+            0.76,
+            0.42,
+            0.49,
+            0.57,
+            0.56,
+            0.74,
+            0.89,
+            0.47,
+            0.54,
+            0.4,
+            0.45,
+            0.57,
+        ],
+        "color": [
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "green",
+            "green",
+            "green",
+            "green",
+            "green",
+        ],
+        "irradiation_lag_15min": [
+            np.nan,
+            0.51,
+            0.79,
+            0.65,
+            0.76,
+            0.42,
+            0.49,
+            0.57,
+            0.56,
+            0.74,
+            np.nan,
+            0.47,
+            0.54,
+            0.4,
+            0.45,
+        ],
+        "ambient_temp_lag_15min": [
+            np.nan,
+            31.31,
+            31.51,
+            32.15,
+            32.39,
+            32.62,
+            32.5,
+            32.52,
+            32.68,
+            33.76,
+            np.nan,
+            34.08,
+            33.7,
+            33.89,
+            34.04,
+        ],
+        "irradiation_lag_30min": [
+            np.nan,
+            np.nan,
+            0.51,
+            0.79,
+            0.65,
+            0.76,
+            0.42,
+            0.49,
+            0.57,
+            0.56,
+            np.nan,
+            np.nan,
+            0.47,
+            0.54,
+            0.4,
+        ],
+        "ambient_temp_lag_30min": [
+            np.nan,
+            np.nan,
+            31.31,
+            31.51,
+            32.15,
+            32.39,
+            32.62,
+            32.5,
+            32.52,
+            32.68,
+            np.nan,
+            np.nan,
+            34.08,
+            33.7,
+            33.89,
+        ],
+    }
+    expected_results_df = pd.DataFrame(
+        data=expected_results,
+        index=date_time,
+    )
+    # When setting group_by to color
+    transformer = LagFeatures(
+        variables=["irradiation", "ambient_temp"],
+        freq=["15min", "30min"],
+        group_by="color",
+    )
+    df_tr = transformer.fit_transform(df_time)
+    assert df_tr.equals(expected_results_df)
diff --git a/tests/test_time_series/test_forecasting/test_window_features.py b/tests/test_time_series/test_forecasting/test_window_features.py
index ab213e240..40fdbc6fa 100644
--- a/tests/test_time_series/test_forecasting/test_window_features.py
+++ b/tests/test_time_series/test_forecasting/test_window_features.py
@@ -588,3 +588,171 @@ def test_correct_groupby_window_when_using_periods(df_time):
     )
     df_tr = transformer.fit_transform(df_time)
     assert df_tr.equals(expected_results_df)
+
+
+def test_multiple_windows_with_groupby(df_time):
+    date_time = [
+        pd.Timestamp("2020-05-15 12:00:00"),
+        pd.Timestamp("2020-05-15 12:15:00"),
+        pd.Timestamp("2020-05-15 12:30:00"),
+        pd.Timestamp("2020-05-15 12:45:00"),
+        pd.Timestamp("2020-05-15 13:00:00"),
+        pd.Timestamp("2020-05-15 13:15:00"),
+        pd.Timestamp("2020-05-15 13:30:00"),
+        pd.Timestamp("2020-05-15 13:45:00"),
+        pd.Timestamp("2020-05-15 14:00:00"),
+        pd.Timestamp("2020-05-15 14:15:00"),
+        pd.Timestamp("2020-05-15 14:30:00"),
+        pd.Timestamp("2020-05-15 14:45:00"),
+        pd.Timestamp("2020-05-15 15:00:00"),
+        pd.Timestamp("2020-05-15 15:15:00"),
+        pd.Timestamp("2020-05-15 15:30:00"),
+    ]
+    expected_results = {
+        "ambient_temp": [
+            31.31,
+            31.51,
+            32.15,
+            32.39,
+            32.62,
+            32.5,
+            32.52,
+            32.68,
+            33.76,
+            34.13,
+            34.08,
+            33.7,
+            33.89,
+            34.04,
+            34.4,
+        ],
+        "module_temp": [
+            49.18,
+            49.84,
+            52.35,
+            50.63,
+            49.61,
+            47.01,
+            46.67,
+            47.52,
+            49.8,
+            55.03,
+            54.52,
+            47.62,
+            46.03,
+            44.29,
+            46.74,
+        ],
+        "irradiation": [
+            0.51,
+            0.79,
+            0.65,
+            0.76,
+            0.42,
+            0.49,
+            0.57,
+            0.56,
+            0.74,
+            0.89,
+            0.47,
+            0.54,
+            0.4,
+            0.45,
+            0.57,
+        ],
+        "color": [
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "blue",
+            "green",
+            "green",
+            "green",
+            "green",
+            "green",
+        ],
+        "ambient_temp_window_2_mean": [
+            np.nan,
+            np.nan,
+            31.41,
+            31.83,
+            32.269999999999996,
+            32.505,
+            32.56,
+            32.510000000000005,
+            32.60000000000001,
+            33.22,
+            np.nan,
+            np.nan,
+            33.89,
+            33.795,
+            33.965,
+        ],
+        "irradiation_window_2_mean": [
+            np.nan,
+            np.nan,
+            0.65,
+            0.72,
+            0.7050000000000001,
+            0.59,
+            0.45499999999999996,
+            0.53,
+            0.5650000000000001,
+            0.6500000000000001,
+            np.nan,
+            np.nan,
+            0.505,
+            0.47000000000000003,
+            0.42500000000000004,
+        ],
+        "ambient_temp_window_3_mean": [
+            np.nan,
+            np.nan,
+            np.nan,
+            31.656666666666666,
+            32.016666666666666,
+            32.38666666666666,
+            32.50333333333333,
+            32.54666666666667,
+            32.56666666666667,
+            32.98666666666667,
+            np.nan,
+            np.nan,
+            np.nan,
+            33.89,
+            33.876666666666665,
+        ],
+        "irradiation_window_3_mean": [
+            np.nan,
+            np.nan,
+            np.nan,
+            0.65,
+            0.7333333333333334,
+            0.61,
+            0.5566666666666668,
+            0.49333333333333335,
+            0.54,
+            0.6233333333333334,
+            np.nan,
+            np.nan,
+            np.nan,
+            0.47000000000000003,
+            0.4633333333333334,
+        ],
+    }
+    expected_results_df = pd.DataFrame(
+        data=expected_results,
+        index=date_time,
+    )
+    # When setting group_by to color
+    transformer = WindowFeatures(
+        variables=["ambient_temp", "irradiation"], window=[2, 3], group_by="color"
+    )
+    df_tr = transformer.fit_transform(df_time)
+    assert df_tr.equals(expected_results_df)

From b7b8bc9f4365f10f7c39480b5a039a8af378a55b Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Mon, 1 Apr 2024 17:02:39 +0200
Subject: [PATCH 16/24] add group_by docstring to _docstring

---
 .../_docstrings/init_parameters/all_trasnformers.py        | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/feature_engine/_docstrings/init_parameters/all_trasnformers.py b/feature_engine/_docstrings/init_parameters/all_trasnformers.py
index 5c699d3de..ad9ff71e5 100644
--- a/feature_engine/_docstrings/init_parameters/all_trasnformers.py
+++ b/feature_engine/_docstrings/init_parameters/all_trasnformers.py
@@ -22,3 +22,10 @@
         contain missing values. If `'ignore'`, missing data will be ignored when
         learning parameters or performing the transformation.
         """.rstrip()
+
+_group_by_docstring = """str, int, or list of strings or integers, default=None.
+        A group_by operation involves some combination of splitting the object, 
+        applying a function, and combining the results. 
+        This can be used to group large amounts of data and 
+        compute operations on these groups.
+        """.rstrip()
\ No newline at end of file

From ba375a420070a78c1dba3f7e26846b5a56aafe2f Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Mon, 1 Apr 2024 17:57:55 +0200
Subject: [PATCH 17/24] remove check input of group_by

---
 .../timeseries/forecasting/base_forecast_transformers.py   | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/feature_engine/timeseries/forecasting/base_forecast_transformers.py b/feature_engine/timeseries/forecasting/base_forecast_transformers.py
index 3a9506c74..1c3c9289e 100644
--- a/feature_engine/timeseries/forecasting/base_forecast_transformers.py
+++ b/feature_engine/timeseries/forecasting/base_forecast_transformers.py
@@ -15,6 +15,7 @@
 from feature_engine._docstrings.init_parameters.all_trasnformers import (
     _drop_original_docstring,
     _missing_values_docstring,
+    _group_by_docstring,
 )
 from feature_engine._docstrings.methods import _fit_not_learn_docstring
 from feature_engine._docstrings.substitute import Substitution
@@ -37,6 +38,7 @@
     feature_names_in_=_feature_names_in_docstring,
     fit=_fit_not_learn_docstring,
     n_features_in_=_n_features_in_docstring,
+    group_by=_group_by_docstring,
 )
 class BaseForecastTransformer(BaseEstimator, TransformerMixin, GetFeatureNamesOutMixin):
     """
@@ -51,8 +53,7 @@ class BaseForecastTransformer(BaseEstimator, TransformerMixin, GetFeatureNamesOu
 
     {drop_original}
 
-    group_by: str, str, int, or list of strings or integers, default=None
-            variable of list of variables to create lag features based on.
+    {group_by}
 
     Attributes
     ----------
@@ -85,7 +86,7 @@ def __init__(
         self.variables = _check_variables_input_value(variables)
         self.missing_values = missing_values
         self.drop_original = drop_original
-        self.group_by = _check_variables_input_value(group_by)
+        self.group_by = group_by
 
     def _check_index(self, X: pd.DataFrame):
         """

From 90f08f46f5f5e19196a4bfdc373c58bb042e81c4 Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Mon, 1 Apr 2024 17:59:54 +0200
Subject: [PATCH 18/24] enhance performance of group_by window features
 operations

---
 .../timeseries/forecasting/window_features.py | 34 +++++++++----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/feature_engine/timeseries/forecasting/window_features.py b/feature_engine/timeseries/forecasting/window_features.py
index df346fb40..99c45d4b7 100644
--- a/feature_engine/timeseries/forecasting/window_features.py
+++ b/feature_engine/timeseries/forecasting/window_features.py
@@ -10,6 +10,7 @@
     _drop_original_docstring,
     _missing_values_docstring,
     _variables_numerical_docstring,
+    _group_by_docstring,
 )
 from feature_engine._docstrings.methods import (
     _fit_not_learn_docstring,
@@ -29,6 +30,7 @@
     n_features_in_=_n_features_in_docstring,
     fit=_fit_not_learn_docstring,
     fit_transform=_fit_transform_docstring,
+    group_by=_group_by_docstring,
 )
 class WindowFeatures(BaseForecastTransformer):
     """
@@ -98,8 +100,7 @@ class WindowFeatures(BaseForecastTransformer):
 
     {drop_original}
 
-    group_by: str, str, int, or list of strings or integers, default=None
-            variable of list of variables to create lag features based on.
+    {group_by}
 
     Attributes
     ----------
@@ -210,10 +211,12 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
             df_ls = []
             for win in self.window:
                 if self.group_by:
-                    tmp = self._agg_window_features(
-                        grouped_df=X.groupby(self.group_by),
+                    tmp = X.groupby(self.group_by, as_index=False).apply(
+                        self._agg_window_features,
                         win=win,
+                        include_groups=False,
                     )
+                    tmp = tmp.reset_index(drop = True)
                 else:
                     tmp = (
                         X[self.variables_]
@@ -226,10 +229,12 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
 
         else:
             if self.group_by:
-                tmp = self._agg_window_features(
-                    grouped_df=X.groupby(self.group_by),
+                tmp = X.groupby(self.group_by, as_index=False).apply(
+                    self._agg_window_features,
                     win=self.window,
+                    include_groups=False,
                 )
+                tmp = tmp.reset_index(drop = True)
             else:
                 tmp = (
                     X[self.variables_]
@@ -290,14 +295,9 @@ def _agg_window_features(
         Union[pd.Series, pd.DataFrame]
             returned window features
         """
-        tmp_data = []
-        for _, group in grouped_df:
-            tmp = (
-                group[self.variables_]
-                .rolling(window=win)
-                .agg(self.functions)
-                .shift(periods=self.periods, freq=self.freq)
-            )
-            tmp_data.append(tmp)
-        tmp = pd.concat(tmp_data).sort_index()
-        return tmp
+        return (
+            grouped_df[self.variables_]
+            .rolling(window=win)
+            .agg(self.functions)
+            .shift(periods=self.periods, freq=self.freq)
+        )

From 66baa750ae6b874a65d534617c0250343cbb5968 Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Mon, 1 Apr 2024 18:00:26 +0200
Subject: [PATCH 19/24] enhance performance of group_by expanding window
 features operations

---
 .../forecasting/expanding_window_features.py  | 58 +++++--------------
 1 file changed, 13 insertions(+), 45 deletions(-)

diff --git a/feature_engine/timeseries/forecasting/expanding_window_features.py b/feature_engine/timeseries/forecasting/expanding_window_features.py
index 9b8e83476..561111f9d 100644
--- a/feature_engine/timeseries/forecasting/expanding_window_features.py
+++ b/feature_engine/timeseries/forecasting/expanding_window_features.py
@@ -15,6 +15,7 @@
     _drop_original_docstring,
     _missing_values_docstring,
     _variables_numerical_docstring,
+    _group_by_docstring,
 )
 from feature_engine._docstrings.methods import (
     _fit_not_learn_docstring,
@@ -34,6 +35,7 @@
     n_features_in_=_n_features_in_docstring,
     fit=_fit_not_learn_docstring,
     fit_transform=_fit_transform_docstring,
+    group_by=_group_by_docstring,
 )
 class ExpandingWindowFeatures(BaseForecastTransformer):
     """
@@ -93,8 +95,7 @@ class ExpandingWindowFeatures(BaseForecastTransformer):
 
     {drop_original}
 
-    group_by: str, str, int, or list of strings or integers, default=None
-            variable of list of variables to create lag features based on.
+    {group_by}
 
     Attributes
     ----------
@@ -142,36 +143,6 @@ class ExpandingWindowFeatures(BaseForecastTransformer):
     2  2022-09-20   3   8                1.5                6.5
     3  2022-09-21   4   9                2.0                7.0
     4  2022-09-22   5  10                2.5                7.5
-    create expanding window features based on other variables.
-    >>> import pandas as pd
-    >>> from feature_engine.timeseries.forecasting import ExpandingWindowFeatures
-    >>> X = pd.DataFrame(dict(date = ["2022-09-18",
-    >>>                          "2022-09-19",
-    >>>                          "2022-09-20",
-    >>>                          "2022-09-21",
-    >>>                          "2022-09-22",
-    >>>                          "2022-09-18",
-    >>>                          "2022-09-19",
-    >>>                          "2022-09-20",
-    >>>                          "2022-09-21",
-    >>>                          "2022-09-22"],
-    >>>                  x1 = [1,2,3,4,5, 3,5,6,8,11],
-    >>>                  x2 = [6,7,8,9,10, 2,9,10,15,2],
-    >>>                  x3=['a','a','a','a','a', 'b','b','b','b','b']
-    >>>                ))
-    >>> ewf = ExpandingWindowFeatures(group_by='x3')
-    >>> ewf.fit_transform(X)
-             date  x1  x2 x3  x1_expanding_mean  x2_expanding_mean
-    0  2022-09-18   1   6  a                NaN                NaN
-    1  2022-09-19   2   7  a           1.000000                6.0
-    2  2022-09-20   3   8  a           1.500000                6.5
-    3  2022-09-21   4   9  a           2.000000                7.0
-    4  2022-09-22   5  10  a           2.500000                7.5
-    5  2022-09-18   3   2  b                NaN                NaN
-    6  2022-09-19   5   9  b           3.000000                2.0
-    7  2022-09-20   6  10  b           4.000000                5.5
-    8  2022-09-21   8  15  b           4.666667                7.0
-    9  2022-09-22  11   2  b           5.500000                9.0
     """
 
     def __init__(
@@ -228,9 +199,11 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         X = self._check_transform_input_and_state(X)
 
         if self.group_by:
-            tmp = self._agg_expanding_window_features(
-                grouped_df=X.groupby(self.group_by)
+            tmp = X.groupby(self.group_by, as_index=False).apply(
+                self._agg_expanding_window_features,
+                include_groups=False,
             )
+            tmp = tmp.reset_index(drop = True)
         else:
             tmp = (
                 X[self.variables_]
@@ -279,14 +252,9 @@ def _agg_expanding_window_features(
         Union[pd.Series, pd.DataFrame]
             returned expanding window features
         """
-        tmp_data = []
-        for _, group in grouped_df:
-            tmp = (
-                group[self.variables_]
-                .expanding(min_periods=self.min_periods)
-                .agg(self.functions)
-                .shift(periods=self.periods, freq=self.freq)
-            )
-            tmp_data.append(tmp)
-        tmp = pd.concat(tmp_data).sort_index()
-        return tmp
+        return (
+            grouped_df[self.variables_]
+            .expanding(min_periods=self.min_periods)
+            .agg(self.functions)
+            .shift(periods=self.periods, freq=self.freq)
+        )

From 92f996d3a823eb589ebd7b6b40009d0680f2267a Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Mon, 1 Apr 2024 21:08:38 +0200
Subject: [PATCH 20/24] fix reindexing to original index after grouping bug

---
 feature_engine/timeseries/forecasting/window_features.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/feature_engine/timeseries/forecasting/window_features.py b/feature_engine/timeseries/forecasting/window_features.py
index 99c45d4b7..f12a040a9 100644
--- a/feature_engine/timeseries/forecasting/window_features.py
+++ b/feature_engine/timeseries/forecasting/window_features.py
@@ -211,12 +211,14 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
             df_ls = []
             for win in self.window:
                 if self.group_by:
+                    original_index = X.index
                     tmp = X.groupby(self.group_by, as_index=False).apply(
                         self._agg_window_features,
                         win=win,
                         include_groups=False,
                     )
-                    tmp = tmp.reset_index(drop = True)
+                    tmp = tmp.set_index(original_index)
+                    tmp = tmp.reindex(original_index)
                 else:
                     tmp = (
                         X[self.variables_]
@@ -229,12 +231,14 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
 
         else:
             if self.group_by:
+                original_index = X.index
                 tmp = X.groupby(self.group_by, as_index=False).apply(
                     self._agg_window_features,
                     win=self.window,
                     include_groups=False,
                 )
-                tmp = tmp.reset_index(drop = True)
+                tmp = tmp.set_index(original_index)
+                tmp = tmp.reindex(original_index)
             else:
                 tmp = (
                     X[self.variables_]

From 152c037f8d60ac93857718b2779a47327df397c3 Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Mon, 1 Apr 2024 21:13:57 +0200
Subject: [PATCH 21/24] fix reindexing to original index after grouping
 operation bug

---
 .../timeseries/forecasting/expanding_window_features.py       | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/feature_engine/timeseries/forecasting/expanding_window_features.py b/feature_engine/timeseries/forecasting/expanding_window_features.py
index 561111f9d..a8fe60aa7 100644
--- a/feature_engine/timeseries/forecasting/expanding_window_features.py
+++ b/feature_engine/timeseries/forecasting/expanding_window_features.py
@@ -199,11 +199,13 @@ def transform(self, X: pd.DataFrame) -> pd.DataFrame:
         X = self._check_transform_input_and_state(X)
 
         if self.group_by:
+            original_index = X.index
             tmp = X.groupby(self.group_by, as_index=False).apply(
                 self._agg_expanding_window_features,
                 include_groups=False,
             )
-            tmp = tmp.reset_index(drop = True)
+            tmp = tmp.set_index(original_index)
+            tmp = tmp.reindex(original_index)
         else:
             tmp = (
                 X[self.variables_]

From 5343e50f7d7663fb95789eb6686c26db3aa716af Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Mon, 1 Apr 2024 22:01:30 +0200
Subject: [PATCH 22/24] replacing group_by docstring with group_by_docstring

---
 .../timeseries/forecasting/lag_features.py    | 25 +++----------------
 1 file changed, 3 insertions(+), 22 deletions(-)

diff --git a/feature_engine/timeseries/forecasting/lag_features.py b/feature_engine/timeseries/forecasting/lag_features.py
index 76f3563b6..9d4186fdc 100644
--- a/feature_engine/timeseries/forecasting/lag_features.py
+++ b/feature_engine/timeseries/forecasting/lag_features.py
@@ -13,6 +13,7 @@
     _drop_original_docstring,
     _missing_values_docstring,
     _variables_numerical_docstring,
+    _group_by_docstring,
 )
 from feature_engine._docstrings.methods import (
     _fit_not_learn_docstring,
@@ -32,6 +33,7 @@
     n_features_in_=_n_features_in_docstring,
     fit=_fit_not_learn_docstring,
     fit_transform=_fit_transform_docstring,
+    group_by=_group_by_docstring,
 )
 class LagFeatures(BaseForecastTransformer):
     """
@@ -74,8 +76,7 @@ class LagFeatures(BaseForecastTransformer):
 
     {drop_original}
 
-    group_by: str, str, int, or list of strings or integers, default=None
-            variable of list of variables to create lag features based on.
+    {group_by}
 
     Attributes
     ----------
@@ -120,26 +121,6 @@ class LagFeatures(BaseForecastTransformer):
     2  2022-09-20   3   8       2.0       7.0       1.0       6.0
     3  2022-09-21   4   9       3.0       8.0       2.0       7.0
     4  2022-09-22   5  10       4.0       9.0       3.0       8.0
-    create lags based on other variables.
-    >>> import pandas as pd
-    >>> from feature_engine.timeseries.forecasting import LagFeatures
-    >>> X = pd.DataFrame(dict(date = ["2022-09-18",
-    >>>                               "2022-09-19",
-    >>>                               "2022-09-20",
-    >>>                               "2022-09-21",
-    >>>                               "2022-09-22"],
-    >>>                       x1 = [1,2,3,4,5],
-    >>>                       x2 = [6,7,8,9,10],
-    >>>                       x3 = ['a','b','a','b','a']
-    >>>                     ))
-    >>> lf = LagFeatures(periods=[1,2], group_by_variables='x3')
-    >>> lf.fit_transform(X)
-              date  x1  x2 x3  x1_lag_1  x2_lag_1  x1_lag_2  x2_lag_2
-    0  2022-09-18   1   6  a       NaN       NaN       NaN       NaN
-    1  2022-09-19   2   7  b       NaN       NaN       NaN       NaN
-    2  2022-09-20   3   8  a       1.0       6.0       NaN       NaN
-    3  2022-09-21   4   9  b       2.0       7.0       NaN       NaN
-    4  2022-09-22   5  10  a       3.0       8.0       1.0       6.0
     """
 
     def __init__(

From ef1eaa8dff05c8886fcbe1235bb8da5103a7af40 Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Mon, 1 Apr 2024 22:05:43 +0200
Subject: [PATCH 23/24] adjust code-style and formatting

---
 .../timeseries/forecasting/base_forecast_transformers.py        | 2 +-
 .../timeseries/forecasting/expanding_window_features.py         | 2 +-
 feature_engine/timeseries/forecasting/lag_features.py           | 2 +-
 feature_engine/timeseries/forecasting/window_features.py        | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/feature_engine/timeseries/forecasting/base_forecast_transformers.py b/feature_engine/timeseries/forecasting/base_forecast_transformers.py
index 1c3c9289e..aee003833 100644
--- a/feature_engine/timeseries/forecasting/base_forecast_transformers.py
+++ b/feature_engine/timeseries/forecasting/base_forecast_transformers.py
@@ -14,8 +14,8 @@
 )
 from feature_engine._docstrings.init_parameters.all_trasnformers import (
     _drop_original_docstring,
-    _missing_values_docstring,
     _group_by_docstring,
+    _missing_values_docstring,
 )
 from feature_engine._docstrings.methods import _fit_not_learn_docstring
 from feature_engine._docstrings.substitute import Substitution
diff --git a/feature_engine/timeseries/forecasting/expanding_window_features.py b/feature_engine/timeseries/forecasting/expanding_window_features.py
index a8fe60aa7..3061f10fc 100644
--- a/feature_engine/timeseries/forecasting/expanding_window_features.py
+++ b/feature_engine/timeseries/forecasting/expanding_window_features.py
@@ -13,9 +13,9 @@
 )
 from feature_engine._docstrings.init_parameters.all_trasnformers import (
     _drop_original_docstring,
+    _group_by_docstring,
     _missing_values_docstring,
     _variables_numerical_docstring,
-    _group_by_docstring,
 )
 from feature_engine._docstrings.methods import (
     _fit_not_learn_docstring,
diff --git a/feature_engine/timeseries/forecasting/lag_features.py b/feature_engine/timeseries/forecasting/lag_features.py
index 9d4186fdc..65c2c3d38 100644
--- a/feature_engine/timeseries/forecasting/lag_features.py
+++ b/feature_engine/timeseries/forecasting/lag_features.py
@@ -11,9 +11,9 @@
 )
 from feature_engine._docstrings.init_parameters.all_trasnformers import (
     _drop_original_docstring,
+    _group_by_docstring,
     _missing_values_docstring,
     _variables_numerical_docstring,
-    _group_by_docstring,
 )
 from feature_engine._docstrings.methods import (
     _fit_not_learn_docstring,
diff --git a/feature_engine/timeseries/forecasting/window_features.py b/feature_engine/timeseries/forecasting/window_features.py
index f12a040a9..0e7d316cb 100644
--- a/feature_engine/timeseries/forecasting/window_features.py
+++ b/feature_engine/timeseries/forecasting/window_features.py
@@ -8,9 +8,9 @@
 )
 from feature_engine._docstrings.init_parameters.all_trasnformers import (
     _drop_original_docstring,
+    _group_by_docstring,
     _missing_values_docstring,
     _variables_numerical_docstring,
-    _group_by_docstring,
 )
 from feature_engine._docstrings.methods import (
     _fit_not_learn_docstring,

From 09db782c0e57bfcbdf010d9e55d21755ef94a2f5 Mon Sep 17 00:00:00 2001
From: Ezzaldin97 <kingtal632@yahoo.com>
Date: Tue, 2 Apr 2024 02:38:48 +0200
Subject: [PATCH 24/24] remove white spaces

---
 .../_docstrings/init_parameters/all_trasnformers.py    | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/feature_engine/_docstrings/init_parameters/all_trasnformers.py b/feature_engine/_docstrings/init_parameters/all_trasnformers.py
index ad9ff71e5..510180463 100644
--- a/feature_engine/_docstrings/init_parameters/all_trasnformers.py
+++ b/feature_engine/_docstrings/init_parameters/all_trasnformers.py
@@ -23,9 +23,9 @@
         learning parameters or performing the transformation.
         """.rstrip()
 
-_group_by_docstring = """str, int, or list of strings or integers, default=None.
-        A group_by operation involves some combination of splitting the object, 
-        applying a function, and combining the results. 
-        This can be used to group large amounts of data and 
+_group_by_docstring = """group_by: str, int, or list of strings or integers,default=None
+        A group_by operation involves some combination of splitting the object,
+        applying a function, and combining the results.
+        This can be used to group large amounts of data and
         compute operations on these groups.
-        """.rstrip()
\ No newline at end of file
+        """.rstrip()