Merge pull request #175 from scikit-learn-contrib/feature/tdqm

JulienRoussel77 · web-flow · commit 4491c7a36717 · 2025-08-30T19:46:34.000+02:00
Long EM/RPCA operations wrapped with tqdm
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -2,7 +2,11 @@
 History
 =======
 
-0.1.8 (2024-08-29)
+0.1.10 (2024-??-??)
+------------------
+* Long EM and RPCA operations wrapped with tqdm progress bars
+
+0.1.9 (2024-08-29)
 ------------------
 * Tutorials reproducibility improved with random_state parameters
 * RPCA now accepts random_state parameters
diff --git a/README.rst b/README.rst
@@ -70,17 +70,18 @@ With just these few lines of code, you can see how easy it is to
   from qolmat.utils import data
 
   # load and prepare csv data
+
   df_data = data.get_data("Beijing")
   columns = ["TEMP", "PRES", "WSPM"]
   df_data = df_data[columns]
   df_with_nan = data.add_holes(df_data, ratio_masked=0.2, mean_size=120)
 
   # impute and compare
-  imputer_mean = imputers.ImputerSimple(strategy="mean", groups=("station",))
+  imputer_median = imputers.ImputerSimple(groups=("station",))
   imputer_interpol = imputers.ImputerInterpolation(method="linear", groups=("station",))
   imputer_var1 = imputers.ImputerEM(model="VAR", groups=("station",), method="mle", max_iter_em=50, n_iter_ou=15, dt=1e-3, p=1)
   dict_imputers = {
-        "mean": imputer_mean,
+        "median": imputer_median,
         "interpolation": imputer_interpol,
         "VAR(1) process": imputer_var1
     }
diff --git a/pyproject.toml b/pyproject.toml
@@ -45,6 +45,7 @@ statsmodels = ">= 0.14.0"
 typed-ast = { version = "*", optional = true }
 category-encoders = "^2.6.3"
 dcor = ">= 0.6"
+tqdm = "*"
 
 [tool.poetry.group.torch.dependencies]
 torch = "< 2.5"
diff --git a/qolmat/imputations/em_sampler.py b/qolmat/imputations/em_sampler.py
@@ -11,6 +11,7 @@
 from scipy import optimize as spo
 from sklearn import utils as sku
 from sklearn.base import BaseEstimator, TransformerMixin
+from tqdm import tqdm
 
 from qolmat.utils import utils
 from qolmat.utils.utils import RandomSetting
@@ -433,7 +434,11 @@ def fit_X(self, X: NDArray) -> None:
 
         X = self._maximize_likelihood(X_imp, mask_na)
 
-        for iter_em in range(self.max_iter_em):
+        for iter_em in tqdm(
+            range(self.max_iter_em),
+            desc="EM parameters estimation",
+            disable=not self.verbose,
+        ):
             X = self._sample_ou(X, mask_na)
 
             self.combine_parameters()
@@ -474,6 +479,7 @@ def fit(self, X: NDArray) -> "EM":
         if hasattr(self, "p_to_fit") and self.p_to_fit:
             aics: List[float] = []
             for p in range(self.max_lagp + 1):
+                print("p=", p)
                 self.p = p
                 self.fit_X(X)
                 n1, n2 = self.X.shape
diff --git a/qolmat/imputations/rpca/rpca_noisy.py b/qolmat/imputations/rpca/rpca_noisy.py
@@ -11,6 +11,7 @@
 from scipy.sparse import dok_matrix, identity
 from scipy.sparse.linalg import spsolve
 from sklearn import utils as sku
+from tqdm import tqdm
 
 from qolmat.imputations.rpca import rpca_utils
 from qolmat.imputations.rpca.rpca import RPCA
@@ -200,6 +201,7 @@ def decompose_with_basis(
             max_iterations=self.max_iterations,
             tolerance=self.tolerance,
             norm=self.norm,
+            verbose=self.verbose,
         )
 
         self._check_cost_function_minimized(D, M, A, Omega, tau, lam)
@@ -219,6 +221,7 @@ def minimise_loss(
         max_iterations: int = 10000,
         tolerance: float = 1e-6,
         norm: str = "L2",
+        verbose: bool = False,
     ) -> Tuple:
         """Compute the noisy RPCA with a L2 time penalisation.
 
@@ -255,6 +258,9 @@ def minimise_loss(
             consecutive iterations. Defaults to 1e-6.
         norm : str, optional
             Error norm, can be "L1" or "L2". Defaults to "L2".
+        verbose : bool, optional
+            Verbosity level, if False the warnings are silenced. Defaults to
+            False.
 
         Returns
         -------
@@ -311,7 +317,11 @@ def minimise_loss(
         Ir = np.eye(rank)
         In = identity(n_rows)
 
-        for _ in range(max_iterations):
+        for _ in tqdm(
+            range(max_iterations),
+            desc="Noisy RPCA loss minimization",
+            disable=not verbose,
+        ):
             M_temp = M.copy()
             A_temp = A.copy()
             L_temp = L.copy()
diff --git a/qolmat/imputations/rpca/rpca_pcp.py b/qolmat/imputations/rpca/rpca_pcp.py
@@ -8,6 +8,7 @@
 import numpy as np
 from numpy.typing import NDArray
 from sklearn import utils as sku
+from tqdm import tqdm
 
 from qolmat.imputations.rpca import rpca_utils
 from qolmat.imputations.rpca.rpca import RPCA
@@ -125,7 +126,11 @@ def decompose(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
         errors: NDArray = np.full((self.max_iterations,), fill_value=np.nan)
 
         M: NDArray = D - A
-        for iteration in range(self.max_iterations):
+        for iteration in tqdm(
+            range(self.max_iterations),
+            desc="RPCA PCP decomposition",
+            disable=not self.verbose,
+        ):
             M = rpca_utils.svd_thresholding(D - A + Y / mu, 1 / mu)
             A = rpca_utils.soft_thresholding(D - M + Y / mu, lam / mu)
             A[~Omega] = (D - M)[~Omega]
diff --git a/qolmat/imputations/softimpute.py b/qolmat/imputations/softimpute.py
@@ -10,6 +10,7 @@
 from numpy.typing import NDArray
 from sklearn import utils as sku
 from sklearn.base import BaseEstimator, TransformerMixin
+from tqdm import tqdm
 
 from qolmat.imputations.rpca import rpca_utils
 from qolmat.utils import utils
@@ -146,7 +147,11 @@ def decompose(self, X: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
         B = V * D
         M = A @ B.T
         cost_start = SoftImpute.cost_function(X, M, A, Omega, tau)
-        for iter_ in range(self.max_iterations):
+        for iter_ in tqdm(
+            range(self.max_iterations),
+            desc="Soft Impute decomposition",
+            disable=not self.verbose,
+        ):
             U_old = U
             V_old = V
             D_old = D