Skip to content

Commit 4491c7a

Browse files
Merge pull request #175 from scikit-learn-contrib/feature/tdqm
Long EM/RPCA operations wrapped with tqdm
2 parents ac31408 + 7fb0c56 commit 4491c7a

File tree

7 files changed

+39
-7
lines changed

7 files changed

+39
-7
lines changed

HISTORY.rst

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,11 @@
22
History
33
=======
44

5-
0.1.8 (2024-08-29)
5+
0.1.10 (2024-??-??)
6+
------------------
7+
* Long EM and RPCA operations wrapped with tqdm progress bars
8+
9+
0.1.9 (2024-08-29)
610
------------------
711
* Tutorials reproducibility improved with random_state parameters
812
* RPCA now accepts random_state parameters

README.rst

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -70,17 +70,18 @@ With just these few lines of code, you can see how easy it is to
7070
from qolmat.utils import data
7171
7272
# load and prepare csv data
73+
7374
df_data = data.get_data("Beijing")
7475
columns = ["TEMP", "PRES", "WSPM"]
7576
df_data = df_data[columns]
7677
df_with_nan = data.add_holes(df_data, ratio_masked=0.2, mean_size=120)
7778
7879
# impute and compare
79-
imputer_mean = imputers.ImputerSimple(strategy="mean", groups=("station",))
80+
imputer_median = imputers.ImputerSimple(groups=("station",))
8081
imputer_interpol = imputers.ImputerInterpolation(method="linear", groups=("station",))
8182
imputer_var1 = imputers.ImputerEM(model="VAR", groups=("station",), method="mle", max_iter_em=50, n_iter_ou=15, dt=1e-3, p=1)
8283
dict_imputers = {
83-
"mean": imputer_mean,
84+
"median": imputer_median,
8485
"interpolation": imputer_interpol,
8586
"VAR(1) process": imputer_var1
8687
}

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ statsmodels = ">= 0.14.0"
4545
typed-ast = { version = "*", optional = true }
4646
category-encoders = "^2.6.3"
4747
dcor = ">= 0.6"
48+
tqdm = "*"
4849

4950
[tool.poetry.group.torch.dependencies]
5051
torch = "< 2.5"

qolmat/imputations/em_sampler.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from scipy import optimize as spo
1212
from sklearn import utils as sku
1313
from sklearn.base import BaseEstimator, TransformerMixin
14+
from tqdm import tqdm
1415

1516
from qolmat.utils import utils
1617
from qolmat.utils.utils import RandomSetting
@@ -433,7 +434,11 @@ def fit_X(self, X: NDArray) -> None:
433434

434435
X = self._maximize_likelihood(X_imp, mask_na)
435436

436-
for iter_em in range(self.max_iter_em):
437+
for iter_em in tqdm(
438+
range(self.max_iter_em),
439+
desc="EM parameters estimation",
440+
disable=not self.verbose,
441+
):
437442
X = self._sample_ou(X, mask_na)
438443

439444
self.combine_parameters()
@@ -474,6 +479,7 @@ def fit(self, X: NDArray) -> "EM":
474479
if hasattr(self, "p_to_fit") and self.p_to_fit:
475480
aics: List[float] = []
476481
for p in range(self.max_lagp + 1):
482+
print("p=", p)
477483
self.p = p
478484
self.fit_X(X)
479485
n1, n2 = self.X.shape

qolmat/imputations/rpca/rpca_noisy.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from scipy.sparse import dok_matrix, identity
1212
from scipy.sparse.linalg import spsolve
1313
from sklearn import utils as sku
14+
from tqdm import tqdm
1415

1516
from qolmat.imputations.rpca import rpca_utils
1617
from qolmat.imputations.rpca.rpca import RPCA
@@ -200,6 +201,7 @@ def decompose_with_basis(
200201
max_iterations=self.max_iterations,
201202
tolerance=self.tolerance,
202203
norm=self.norm,
204+
verbose=self.verbose,
203205
)
204206

205207
self._check_cost_function_minimized(D, M, A, Omega, tau, lam)
@@ -219,6 +221,7 @@ def minimise_loss(
219221
max_iterations: int = 10000,
220222
tolerance: float = 1e-6,
221223
norm: str = "L2",
224+
verbose: bool = False,
222225
) -> Tuple:
223226
"""Compute the noisy RPCA with a L2 time penalisation.
224227
@@ -255,6 +258,9 @@ def minimise_loss(
255258
consecutive iterations. Defaults to 1e-6.
256259
norm : str, optional
257260
Error norm, can be "L1" or "L2". Defaults to "L2".
261+
verbose : bool, optional
262+
Verbosity level, if False the warnings are silenced. Defaults to
263+
False.
258264
259265
Returns
260266
-------
@@ -311,7 +317,11 @@ def minimise_loss(
311317
Ir = np.eye(rank)
312318
In = identity(n_rows)
313319

314-
for _ in range(max_iterations):
320+
for _ in tqdm(
321+
range(max_iterations),
322+
desc="Noisy RPCA loss minimization",
323+
disable=not verbose,
324+
):
315325
M_temp = M.copy()
316326
A_temp = A.copy()
317327
L_temp = L.copy()

qolmat/imputations/rpca/rpca_pcp.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import numpy as np
99
from numpy.typing import NDArray
1010
from sklearn import utils as sku
11+
from tqdm import tqdm
1112

1213
from qolmat.imputations.rpca import rpca_utils
1314
from qolmat.imputations.rpca.rpca import RPCA
@@ -125,7 +126,11 @@ def decompose(self, D: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
125126
errors: NDArray = np.full((self.max_iterations,), fill_value=np.nan)
126127

127128
M: NDArray = D - A
128-
for iteration in range(self.max_iterations):
129+
for iteration in tqdm(
130+
range(self.max_iterations),
131+
desc="RPCA PCP decomposition",
132+
disable=not self.verbose,
133+
):
129134
M = rpca_utils.svd_thresholding(D - A + Y / mu, 1 / mu)
130135
A = rpca_utils.soft_thresholding(D - M + Y / mu, lam / mu)
131136
A[~Omega] = (D - M)[~Omega]

qolmat/imputations/softimpute.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
from numpy.typing import NDArray
1111
from sklearn import utils as sku
1212
from sklearn.base import BaseEstimator, TransformerMixin
13+
from tqdm import tqdm
1314

1415
from qolmat.imputations.rpca import rpca_utils
1516
from qolmat.utils import utils
@@ -146,7 +147,11 @@ def decompose(self, X: NDArray, Omega: NDArray) -> Tuple[NDArray, NDArray]:
146147
B = V * D
147148
M = A @ B.T
148149
cost_start = SoftImpute.cost_function(X, M, A, Omega, tau)
149-
for iter_ in range(self.max_iterations):
150+
for iter_ in tqdm(
151+
range(self.max_iterations),
152+
desc="Soft Impute decomposition",
153+
disable=not self.verbose,
154+
):
150155
U_old = U
151156
V_old = V
152157
D_old = D

0 commit comments

Comments
 (0)