From 5e0fa46052adfe119b1e1ce1e6e05d38efd3bb89 Mon Sep 17 00:00:00 2001 From: enricofallacara Date: Tue, 15 Dec 2020 15:08:12 +0100 Subject: [PATCH 1/3] Parallelized time series k means barycenter update procedure --- tslearn/barycenters.py | 185 +++++++------------------- tslearn/clustering.py | 101 ++++++++------ tslearn/metrics.py | 296 ++--------------------------------------- 3 files changed, 121 insertions(+), 461 deletions(-) diff --git a/tslearn/barycenters.py b/tslearn/barycenters.py index edf0d6bf8..162ae9f14 100644 --- a/tslearn/barycenters.py +++ b/tslearn/barycenters.py @@ -1,16 +1,12 @@ """ The :mod:`tslearn.barycenters` module gathers algorithms for time series barycenter computation. - A barycenter (or *Fréchet mean*) is a time series :math:`b` which minimizes the sum of squared distances to the time series of a given data set :math:`x`: - .. math:: \\min \\sum_i d( b, x_i )^2 - Only the methods :func:`dtw_barycenter_averaging` and :func:`softdtw_barycenter` can operate on variable-length time-series (see :ref:`here`). - See the :ref:`barycenter examples` for an overview. """ @@ -23,6 +19,8 @@ from sklearn.exceptions import ConvergenceWarning import warnings from sklearn.utils import check_random_state +from joblib import parallel_backend, Parallel, delayed +import psutil from tslearn.utils import to_time_series_dataset, check_equal_size, \ to_time_series, ts_size @@ -44,25 +42,20 @@ def _set_weights(w, n): def euclidean_barycenter(X, weights=None): """Standard Euclidean barycenter computed from a set of time series. - Parameters ---------- X : array-like, shape=(n_ts, sz, d) Time series dataset. - weights: None or array Weights of each X[i]. Must be the same size as len(X). If None, uniform weights are used. - Returns ------- numpy.array of shape (sz, d) Barycenter of the provided time series dataset. - Notes ----- This method requires a dataset of equal-sized time series - Examples -------- >>> time_series = [[1, 2, 3, 4], [1, 2, 4, 5]] @@ -132,52 +125,41 @@ def dtw_barycenter_averaging_petitjean(X, barycenter_size=None, metric_params=None, verbose=False): """DTW Barycenter Averaging (DBA) method. - DBA was originally presented in [1]_. This implementation is not the one documented in the API, but is kept in the codebase to check the documented one for non-regression. - Parameters ---------- X : array-like, shape=(n_ts, sz, d) Time series dataset. - barycenter_size : int or None (default: None) Size of the barycenter to generate. If None, the size of the barycenter is that of the data provided at fit time or that of the initial barycenter if specified. - init_barycenter : array or None (default: None) Initial barycenter to start from for the optimization process. - max_iter : int (default: 30) Number of iterations of the Expectation-Maximization optimization procedure. - tol : float (default: 1e-5) Tolerance to use for early stopping: if the decrease in cost is lower than this value, the Expectation-Maximization procedure stops. - weights: None or array Weights of each X[i]. Must be the same size as len(X). If None, uniform weights are used. - metric_params: dict or None (default: None) DTW constraint parameters to be used. See :ref:`tslearn.metrics.dtw_path ` for a list of accepted parameters If None, no constraint is used for DTW computations. - verbose : boolean (default: False) Whether to print information about the cost at each iteration or not. - Returns ------- numpy.array of shape (barycenter_size, d) or (sz, d) if barycenter_size \ is None DBA barycenter of the provided time series dataset. - Examples -------- >>> time_series = [[1, 2, 3, 4], [1, 2, 4, 5]] @@ -210,14 +192,14 @@ def dtw_barycenter_averaging_petitjean(X, barycenter_size=None, array([[2.5], [2.5], [2.5]]) - References ---------- .. [1] F. Petitjean, A. Ketterlin & P. Gancarski. A global averaging method for dynamic time warping, with applications to clustering. Pattern Recognition, Elsevier, 2011, Vol. 44, Num. 3, pp. 678-693 """ - X_ = to_time_series_dataset(X) + #X_ = to_time_series_dataset(X) + X_ = X if barycenter_size is None: barycenter_size = X_.shape[1] weights = _set_weights(weights, X_.shape[0]) @@ -248,29 +230,23 @@ def dtw_barycenter_averaging_petitjean(X, barycenter_size=None, def _mm_assignment(X, barycenter, weights, metric_params=None): """Computes item assignement based on DTW alignments and return cost as a bonus. - Parameters ---------- X : numpy.array of shape (n, sz, d) Time-series to be averaged - barycenter : numpy.array of shape (barycenter_size, d) Barycenter as computed at the current step of the algorithm. - weights: array Weights of each X[i]. Must be the same size as len(X). - metric_params: dict or None (default: None) DTW constraint parameters to be used. See :ref:`tslearn.metrics.dtw_path ` for a list of accepted parameters If None, no constraint is used for DTW computations. - Returns ------- list of index pairs Warping paths - float Current alignment cost """ @@ -279,92 +255,85 @@ def _mm_assignment(X, barycenter, weights, metric_params=None): n = X.shape[0] cost = 0. list_p_k = [] - for i in range(n): - path, dist_i = dtw_path(barycenter, X[i], **metric_params) - cost += dist_i ** 2 * weights[i] - list_p_k.append(path) + #with parallel_backend('threading'): + res = Parallel(backend='threading',prefer="threads",require='sharedmem',n_jobs=15,verbose=10)(delayed(dtw_path)(barycenter, X[i], global_constraint="sakoe_chiba",sakoe_chiba_radius=1) for i in range(n)) + paths, dists = zip(*res) + paths = list(paths) + dists = list(dists) + cost = numpy.sum(numpy.multiply(numpy.power(dists, 2),weights)) cost /= weights.sum() + list_p_k = paths + #for i in range(n): + # path, dist_i = dtw_path(barycenter, X[i], **metric_params) + # cost += dist_i ** 2 * weights[i] + # list_p_k.append(path) + #cost /= weights.sum() return list_p_k, cost def _subgradient_valence_warping(list_p_k, barycenter_size, weights): """Compute Valence and Warping matrices from paths. - Valence matrices are denoted :math:`V^{(k)}` and Warping matrices are :math:`W^{(k)}` in [1]_. - This function returns a list of :math:`V^{(k)}` diagonals (as a vector) and a list of :math:`W^{(k)}` matrices. - Parameters ---------- list_p_k : list of index pairs Warping paths - barycenter_size : int Size of the barycenter to generate. - weights: array Weights of each X[i]. Must be the same size as len(X). - Returns ------- list of numpy.array of shape (barycenter_size, ) list of weighted :math:`V^{(k)}` diagonals (as a vector) - list of numpy.array of shape (barycenter_size, sz_k) list of weighted :math:`W^{(k)}` matrices - References ---------- - .. [1] D. Schultz and B. Jain. Nonsmooth Analysis and Subgradient Methods for Averaging in Dynamic Time Warping Spaces. Pattern Recognition, 74, 340-358. """ - list_v_k = [] - list_w_k = [] - for k, p_k in enumerate(list_p_k): - sz_k = p_k[-1][1] + 1 - w_k = numpy.zeros((barycenter_size, sz_k)) + def create_w_k(p_k, barycenter_size): + w_k = numpy.zeros((barycenter_size, p_k[-1][1] + 1)) for i, j in p_k: w_k[i, j] = 1. - list_w_k.append(w_k * weights[k]) - list_v_k.append(w_k.sum(axis=1) * weights[k]) + return w_k + w_k_ones = Parallel(backend='threading',prefer='threads',require='sharedmem', n_jobs=15, verbose = True)(delayed(create_w_k)(p_k,barycenter_size) for p_k in list_p_k) + def mult_w_k_weights(w_k, weight): + return w_k * weight + list_w_k = Parallel(backend='threading',prefer='threads',require='sharedmem', n_jobs=15, verbose = True)(delayed(mult_w_k_weights)(w_k,weights[k]) for k,w_k in enumerate(w_k_ones)) + def mult_w_k_sum_wieghts(w_k, weight): + return w_k.sum(axis=1) * weight + list_v_k = Parallel(backend='threading',prefer='threads',require='sharedmem', n_jobs=15, verbose = True)(delayed(mult_w_k_sum_wieghts)(w_k,weight) for (w_k, weight) in zip(w_k_ones, weights)) return list_v_k, list_w_k def _mm_valence_warping(list_p_k, barycenter_size, weights): """Compute Valence and Warping matrices from paths. - Valence matrices are denoted :math:`V^{(k)}` and Warping matrices are :math:`W^{(k)}` in [1]_. - This function returns the sum of :math:`V^{(k)}` diagonals (as a vector) and a list of :math:`W^{(k)}` matrices. - Parameters ---------- list_p_k : list of index pairs Warping paths - barycenter_size : int Size of the barycenter to generate. - weights: array Weights of each X[i]. Must be the same size as len(X). - Returns ------- numpy.array of shape (barycenter_size, ) sum of weighted :math:`V^{(k)}` diagonals (as a vector) - list of numpy.array of shape (barycenter_size, sz_k) list of weighted :math:`W^{(k)}` matrices - References ---------- - .. [1] D. Schultz and B. Jain. Nonsmooth Analysis and Subgradient Methods for Averaging in Dynamic Time Warping Spaces. Pattern Recognition, 74, 340-358. @@ -373,79 +342,71 @@ def _mm_valence_warping(list_p_k, barycenter_size, weights): list_p_k=list_p_k, barycenter_size=barycenter_size, weights=weights) - diag_sum_v_k = numpy.zeros(list_v_k[0].shape) - for v_k in list_v_k: - diag_sum_v_k += v_k + #diag_sum_v_k = numpy.zeros(list_v_k[0].shape) + #for v_k in list_v_k: + # diag_sum_v_k += v_k + diag_sum_v_k = Parallel(backend='threading',prefer='threads',require='sharedmem', n_jobs=15, verbose = True)(delayed(numpy.sum)(x) for x in zip(*numpy.array(list_v_k))) + diag_sum_v_k = numpy.array(diag_sum_v_k) return diag_sum_v_k, list_w_k def _mm_update_barycenter(X, diag_sum_v_k, list_w_k): """Update barycenters using the formula from Algorithm 2 in [1]_. - Parameters ---------- X : numpy.array of shape (n, sz, d) Time-series to be averaged - diag_sum_v_k : numpy.array of shape (barycenter_size, ) sum of weighted :math:`V^{(k)}` diagonals (as a vector) - list_w_k : list of numpy.array of shape (barycenter_size, sz_k) list of weighted :math:`W^{(k)}` matrices - Returns ------- numpy.array of shape (barycenter_size, d) Updated barycenter - References ---------- - .. [1] D. Schultz and B. Jain. Nonsmooth Analysis and Subgradient Methods for Averaging in Dynamic Time Warping Spaces. Pattern Recognition, 74, 340-358. """ - d = X.shape[2] - barycenter_size = diag_sum_v_k.shape[0] - sum_w_x = numpy.zeros((barycenter_size, d)) - for k, (w_k, x_k) in enumerate(zip(list_w_k, X)): - sum_w_x += w_k.dot(x_k[:ts_size(x_k)]) - barycenter = numpy.diag(1. / diag_sum_v_k).dot(sum_w_x) + #d = X.shape[2] + #barycenter_size = diag_sum_v_k.shape[0] + #sum_w_x = numpy.zeros((barycenter_size, d)) + #for k, (w_k, x_k) in enumerate(zip(list_w_k, X)): + # sum_w_x += w_k.dot(x_k[:ts_size(x_k)]) + sum_w_x = Parallel(backend='threading',prefer='threads',require='sharedmem', n_jobs=15, verbose = True)(delayed(numpy.dot)(w_k,x_k[:ts_size(x_k)]) for (w_k, x_k) in zip(list_w_k, X)) + sum_w_x = Parallel(backend='threading',prefer='threads',require='sharedmem',n_jobs=15, verbose = True)(delayed(numpy.sum)(x) for x in zip(*numpy.array(sum_w_x))) + sum_w_x = numpy.array(sum_w_x).reshape((len(sum_w_x), 1)) + inverse_diag_sum_v_k = numpy.reciprocal(diag_sum_v_k) + inverse_diag_sum_v_k = numpy.diag(inverse_diag_sum_v_k) + barycenter = inverse_diag_sum_v_k.dot(sum_w_x) return barycenter def _subgradient_update_barycenter(X, list_diag_v_k, list_w_k, weights_sum, barycenter, eta): """Update barycenters using the formula from Algorithm 1 in [1]_. - Parameters ---------- X : numpy.array of shape (n, sz, d) Time-series to be averaged - list_diag_v_k : list of numpy.array of shape (barycenter_size, ) list of weighted :math:`V^{(k)}` diagonals (as vectors) - list_w_k : list of numpy.array of shape (barycenter_size, sz_k) list of weighted :math:`W^{(k)}` matrices - weights_sum : float sum of weights applied to matrices :math:`V^{(k)}` and :math:`W^{(k)}` - barycenter : numpy.array of shape (barycenter_size, d) Barycenter as computed at the previous iteration of the algorithm - eta : float Step-size for the subgradient descent algorithm - Returns ------- numpy.array of shape (barycenter_size, d) Updated barycenter - References ---------- - .. [1] D. Schultz and B. Jain. Nonsmooth Analysis and Subgradient Methods for Averaging in Dynamic Time Warping Spaces. Pattern Recognition, 74, 340-358. @@ -466,57 +427,45 @@ def dtw_barycenter_averaging(X, barycenter_size=None, init_barycenter=None, verbose=False, n_init=1): """DTW Barycenter Averaging (DBA) method estimated through Expectation-Maximization algorithm. - DBA was originally presented in [1]_. This implementation is based on a idea from [2]_ (Majorize-Minimize Mean Algorithm). - Parameters ---------- X : array-like, shape=(n_ts, sz, d) Time series dataset. - barycenter_size : int or None (default: None) Size of the barycenter to generate. If None, the size of the barycenter is that of the data provided at fit time or that of the initial barycenter if specified. - init_barycenter : array or None (default: None) Initial barycenter to start from for the optimization process. - max_iter : int (default: 30) Number of iterations of the Expectation-Maximization optimization procedure. - tol : float (default: 1e-5) Tolerance to use for early stopping: if the decrease in cost is lower than this value, the Expectation-Maximization procedure stops. - weights: None or array Weights of each X[i]. Must be the same size as len(X). If None, uniform weights are used. - metric_params: dict or None (default: None) DTW constraint parameters to be used. See :ref:`tslearn.metrics.dtw_path ` for a list of accepted parameters If None, no constraint is used for DTW computations. - verbose : boolean (default: False) Whether to print information about the cost at each iteration or not. - n_init : int (default: 1) Number of different initializations to be tried (useful only is init_barycenter is set to None, otherwise, all trials will reach the same performance) - Returns ------- numpy.array of shape (barycenter_size, d) or (sz, d) if barycenter_size \ is None DBA barycenter of the provided time series dataset. - Examples -------- >>> time_series = [[1, 2, 3, 4], [1, 2, 4, 5]] @@ -548,13 +497,11 @@ def dtw_barycenter_averaging(X, barycenter_size=None, init_barycenter=None, array([[2.5], [2.5], [2.5]]) - References ---------- .. [1] F. Petitjean, A. Ketterlin & P. Gancarski. A global averaging method for dynamic time warping, with applications to clustering. Pattern Recognition, Elsevier, 2011, Vol. 44, Num. 3, pp. 678-693 - .. [2] D. Schultz and B. Jain. Nonsmooth Analysis and Subgradient Methods for Averaging in Dynamic Time Warping Spaces. Pattern Recognition, 74, 340-358. @@ -587,46 +534,36 @@ def dtw_barycenter_averaging_one_init(X, barycenter_size=None, verbose=False): """DTW Barycenter Averaging (DBA) method estimated through Expectation-Maximization algorithm. - DBA was originally presented in [1]_. This implementation is based on a idea from [2]_ (Majorize-Minimize Mean Algorithm). - Parameters ---------- X : array-like, shape=(n_ts, sz, d) Time series dataset. - barycenter_size : int or None (default: None) Size of the barycenter to generate. If None, the size of the barycenter is that of the data provided at fit time or that of the initial barycenter if specified. - init_barycenter : array or None (default: None) Initial barycenter to start from for the optimization process. - max_iter : int (default: 30) Number of iterations of the Expectation-Maximization optimization procedure. - tol : float (default: 1e-5) Tolerance to use for early stopping: if the decrease in cost is lower than this value, the Expectation-Maximization procedure stops. - weights: None or array Weights of each X[i]. Must be the same size as len(X). If None, uniform weights are used. - metric_params: dict or None (default: None) DTW constraint parameters to be used. See :ref:`tslearn.metrics.dtw_path ` for a list of accepted parameters If None, no constraint is used for DTW computations. - verbose : boolean (default: False) Whether to print information about the cost at each iteration or not. - Returns ------- numpy.array of shape (barycenter_size, d) or (sz, d) if barycenter_size \ @@ -634,34 +571,32 @@ def dtw_barycenter_averaging_one_init(X, barycenter_size=None, DBA barycenter of the provided time series dataset. float Associated inertia - References ---------- .. [1] F. Petitjean, A. Ketterlin & P. Gancarski. A global averaging method for dynamic time warping, with applications to clustering. Pattern Recognition, Elsevier, 2011, Vol. 44, Num. 3, pp. 678-693 - .. [2] D. Schultz and B. Jain. Nonsmooth Analysis and Subgradient Methods for Averaging in Dynamic Time Warping Spaces. Pattern Recognition, 74, 340-358. """ - X_ = to_time_series_dataset(X) + #X_ = to_time_series_dataset(X) if barycenter_size is None: - barycenter_size = X_.shape[1] - weights = _set_weights(weights, X_.shape[0]) + barycenter_size = X.shape[1] + weights = _set_weights(weights, X.shape[0]) if init_barycenter is None: - barycenter = _init_avg(X_, barycenter_size) + barycenter = _init_avg(X, barycenter_size) else: barycenter_size = init_barycenter.shape[0] barycenter = init_barycenter cost_prev, cost = numpy.inf, numpy.inf for it in range(max_iter): - list_p_k, cost = _mm_assignment(X_, barycenter, weights, metric_params) + list_p_k, cost = _mm_assignment(X, barycenter, weights, metric_params) diag_sum_v_k, list_w_k = _mm_valence_warping(list_p_k, barycenter_size, weights) if verbose: print("[DBA] epoch %d, cost: %.3f" % (it + 1, cost)) - barycenter = _mm_update_barycenter(X_, diag_sum_v_k, list_w_k) + barycenter = _mm_update_barycenter(X, diag_sum_v_k, list_w_k) if abs(cost_prev - cost) < tol: break elif cost_prev < cost: @@ -682,66 +617,52 @@ def dtw_barycenter_averaging_subgradient(X, barycenter_size=None, metric_params=None, verbose=False): """DTW Barycenter Averaging (DBA) method estimated through subgradient descent algorithm. - DBA was originally presented in [1]_. This implementation is based on a idea from [2]_ (Stochastic Subgradient Mean Algorithm). - Parameters ---------- X : array-like, shape=(n_ts, sz, d) Time series dataset. - barycenter_size : int or None (default: None) Size of the barycenter to generate. If None, the size of the barycenter is that of the data provided at fit time or that of the initial barycenter if specified. - init_barycenter : array or None (default: None) Initial barycenter to start from for the optimization process. - max_iter : int (default: 30) Number of iterations of the Expectation-Maximization optimization procedure. - initial_step_size : float (default: 0.05) Initial step size for the subgradient descent algorithm. Default value is the one suggested in [2]_. - final_step_size : float (default: 0.005) Final step size for the subgradient descent algorithm. Default value is the one suggested in [2]_. - tol : float (default: 1e-5) Tolerance to use for early stopping: if the decrease in cost is lower than this value, the Expectation-Maximization procedure stops. - random_state : int, RandomState instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. - weights: None or array Weights of each X[i]. Must be the same size as len(X). If None, uniform weights are used. - metric_params: dict or None (default: None) DTW constraint parameters to be used. See :ref:`tslearn.metrics.dtw_path ` for a list of accepted parameters If None, no constraint is used for DTW computations. - verbose : boolean (default: False) Whether to print information about the cost at each iteration or not. - Returns ------- numpy.array of shape (barycenter_size, d) or (sz, d) if barycenter_size \ is None DBA barycenter of the provided time series dataset. - Examples -------- >>> time_series = [[1, 2, 3, 4], [1, 2, 4, 5]] @@ -754,20 +675,19 @@ def dtw_barycenter_averaging_subgradient(X, barycenter_size=None, [2. ], [3.5...], [4.5...]]) - References ---------- .. [1] F. Petitjean, A. Ketterlin & P. Gancarski. A global averaging method for dynamic time warping, with applications to clustering. Pattern Recognition, Elsevier, 2011, Vol. 44, Num. 3, pp. 678-693 - .. [2] D. Schultz and B. Jain. Nonsmooth Analysis and Subgradient Methods for Averaging in Dynamic Time Warping Spaces. Pattern Recognition, 74, 340-358. """ rng = check_random_state(random_state) - X_ = to_time_series_dataset(X) + #X_ = to_time_series_dataset(X) + X_ = X if barycenter_size is None: barycenter_size = X_.shape[1] weights = _set_weights(weights, X_.shape[0]) @@ -832,9 +752,7 @@ def softdtw_barycenter(X, gamma=1.0, weights=None, method="L-BFGS-B", tol=1e-3, max_iter=50, init=None): """Compute barycenter (time series averaging) under the soft-DTW [1] geometry. - Soft-DTW was originally presented in [1]_. - Parameters ---------- X : array-like, shape=(n_ts, sz, d) @@ -855,13 +773,11 @@ def softdtw_barycenter(X, gamma=1.0, weights=None, method="L-BFGS-B", tol=1e-3, init: array or None (default: None) Initial barycenter to start from for the optimization process. If `None`, euclidean barycenter is used as a starting point. - Returns ------- numpy.array of shape (bsz, d) where `bsz` is the size of the `init` array \ if provided or `sz` otherwise Soft-DTW barycenter of the provided time series dataset. - Examples -------- >>> time_series = [[1, 2, 3, 4], [1, 2, 4, 5]] @@ -877,7 +793,6 @@ def softdtw_barycenter(X, gamma=1.0, weights=None, method="L-BFGS-B", tol=1e-3, [2.67573269], [3.51057026], [4.33645802]]) - References ---------- .. [1] M. Cuturi, M. Blondel "Soft-DTW: a Differentiable Loss Function for diff --git a/tslearn/clustering.py b/tslearn/clustering.py index e94238b07..e249e311f 100644 --- a/tslearn/clustering.py +++ b/tslearn/clustering.py @@ -2,7 +2,7 @@ The :mod:`tslearn.clustering` module gathers time series specific clustering algorithms. -**User guide:** See the :ref:`Clustering ` section for further +**User guide:** See the :ref:`Clustering ` section for further details. """ @@ -22,6 +22,7 @@ from scipy.spatial.distance import cdist import numpy import warnings +from joblib import parallel_backend, Parallel, delayed from tslearn.metrics import cdist_gak, cdist_dtw, cdist_soft_dtw, \ cdist_soft_dtw_normalized, sigma_gak @@ -181,14 +182,14 @@ def _compute_inertia(distances, assignments, squared=True): def silhouette_score(X, labels, metric=None, sample_size=None, - metric_params=None, n_jobs=None, verbose=0, + metric_params=None, n_jobs=None, verbose=0, random_state=None, **kwds): """Compute the mean Silhouette Coefficient of all samples (cf. [1]_ and [2]_). Read more in the `scikit-learn documentation `_. + # silhouette-coefficient>`_. Parameters ---------- @@ -228,7 +229,7 @@ def silhouette_score(X, labels, metric=None, sample_size=None, verbose : int (default: 0) If nonzero, print information about the inertia while learning - the model and joblib progress messages are printed. + the model and joblib progress messages are printed. random_state : int, RandomState instance or None, optional (default: None) The generator used to randomly select a subset of samples. If int, @@ -287,7 +288,7 @@ def silhouette_score(X, labels, metric=None, sample_size=None, if metric == "precomputed": sklearn_X = X elif metric == "dtw" or metric is None: - sklearn_X = cdist_dtw(X, n_jobs=n_jobs, verbose=verbose, + sklearn_X = cdist_dtw(X, n_jobs=n_jobs, verbose=verbose, global_constraint='sakoe_chiba', **metric_params_) elif metric == "softdtw": sklearn_X = cdist_soft_dtw_normalized(X, **metric_params_) @@ -328,7 +329,7 @@ class KernelKMeans(ClusterMixin, BaseModelPackage, TimeSeriesBaseEstimator): ---------- n_clusters : int (default: 3) Number of clusters to form. - + kernel : string, or callable (default: "gak") The kernel should either be "gak", in which case the Global Alignment Kernel from [2]_ is used or a value that is accepted as a metric @@ -349,12 +350,12 @@ class KernelKMeans(ClusterMixin, BaseModelPackage, TimeSeriesBaseEstimator): Number of time the k-means algorithm will be run with different centroid seeds. The final results will be the best output of n_init consecutive runs in terms of inertia. - + kernel_params : dict or None (default: None) Kernel parameters to be passed to the kernel function. None means no kernel parameter is set. - For Global Alignment Kernel, the only parameter of interest is `sigma`. - If set to 'auto', it is computed based on a sampling of the training + For Global Alignment Kernel, the only parameter of interest is `sigma`. + If set to 'auto', it is computed based on a sampling of the training set (cf :ref:`tslearn.metrics.sigma_gak `). If no specific value is set for `sigma`, its defaults to 1. @@ -363,10 +364,10 @@ class KernelKMeans(ClusterMixin, BaseModelPackage, TimeSeriesBaseEstimator): Bandwidth parameter for the Global Alignment kernel. If set to 'auto', it is computed based on a sampling of the training set (cf :ref:`tslearn.metrics.sigma_gak `) - + .. deprecated:: 0.4 - Setting `sigma` directly as a parameter for KernelKMeans and - GlobalAlignmentKernelKMeans is deprecated in version 0.4 and will + Setting `sigma` directly as a parameter for KernelKMeans and + GlobalAlignmentKernelKMeans is deprecated in version 0.4 and will be removed in 0.6. Use `kernel_params` instead. n_jobs : int or None, optional (default=None) @@ -378,7 +379,7 @@ class KernelKMeans(ClusterMixin, BaseModelPackage, TimeSeriesBaseEstimator): for more details. verbose : int (default: 0) - If nonzero, joblib progress messages are printed. + If nonzero, joblib progress messages are printed. random_state : integer or numpy.RandomState, optional Generator used to initialize the centers. If an integer is given, it @@ -658,6 +659,7 @@ def __init__(self, **kwargs): class TimeSeriesCentroidBasedClusteringMixin: """Mixin class for centroid-based clustering of time series.""" + def _post_fit(self, X_fitted, centroids, inertia): if numpy.isfinite(inertia) and (centroids is not None): self.cluster_centers_ = centroids @@ -722,7 +724,7 @@ class TimeSeriesKMeans(TransformerMixin, ClusterMixin, verbose : int (default: 0) If nonzero, print information about the inertia while learning - the model and joblib progress messages are printed. + the model and joblib progress messages are printed. random_state : integer or numpy.RandomState, optional Generator used to initialize the centers. If an integer is given, it @@ -836,7 +838,7 @@ def _fit_one_init(self, X, x_squared_norms, rs): if self.metric == "dtw": def metric_fun(x, y): return cdist_dtw(x, y, n_jobs=self.n_jobs, - verbose=self.verbose, **metric_params) + verbose=self.verbose, global_constraint='sakoe_chiba', **metric_params) elif self.metric == "softdtw": def metric_fun(x, y): @@ -857,32 +859,30 @@ def metric_fun(x, y): "is invalid" % self.init) self.cluster_centers_ = _check_full_length(self.cluster_centers_) old_inertia = numpy.inf - for it in range(self.max_iter): + print('Iteration number: ' + str(it)) self._assign(X) if self.verbose: print("%.3f" % self.inertia_, end=" --> ") self._update_centroids(X) - if numpy.abs(old_inertia - self.inertia_) < self.tol: break old_inertia = self.inertia_ - if self.verbose: - print("") + if self.verbose: + print("") self._iter = it + 1 - return self def _transform(self, X): metric_params = self._get_metric_params() if self.metric == "euclidean": return cdist(X.reshape((X.shape[0], -1)), - self.cluster_centers_.reshape((self.n_clusters, -1)), - metric="euclidean") + self.cluster_centers_.reshape((self.n_clusters, -1)), + metric="euclidean") elif self.metric == "dtw": - return cdist_dtw(X, self.cluster_centers_, n_jobs=self.n_jobs, - verbose=self.verbose, **metric_params) + return cdist_dtw(X, self.cluster_centers_, n_jobs=self.n_jobs, global_constraint='sakoe_chiba', + verbose=self.verbose, **metric_params) elif self.metric == "softdtw": return cdist_soft_dtw(X, self.cluster_centers_, **metric_params) else: @@ -897,7 +897,7 @@ def _assign(self, X, update_class_attributes=True): _check_no_empty_cluster(self.labels_, self.n_clusters) if self.dtw_inertia and self.metric != "dtw": inertia_dists = cdist_dtw(X, self.cluster_centers_, - n_jobs=self.n_jobs, + n_jobs=self.n_jobs, global_constraint='sakoe_chiba', verbose=self.verbose) else: inertia_dists = dists @@ -908,23 +908,37 @@ def _assign(self, X, update_class_attributes=True): def _update_centroids(self, X): metric_params = self._get_metric_params() + X_list = [] for k in range(self.n_clusters): - if self.metric == "dtw": - self.cluster_centers_[k] = dtw_barycenter_averaging( - X=X[self.labels_ == k], - barycenter_size=None, - init_barycenter=self.cluster_centers_[k], - metric_params=metric_params, - verbose=False) - elif self.metric == "softdtw": - self.cluster_centers_[k] = softdtw_barycenter( - X=X[self.labels_ == k], - max_iter=self.max_iter_barycenter, - init=self.cluster_centers_[k], - **metric_params) - else: - self.cluster_centers_[k] = euclidean_barycenter( - X=X[self.labels_ == k]) + X_list.append(X[self.labels_ == k]) + cluster_centers = Parallel(backend='threading', prefer='threads', require='sharedmem', n_jobs=self.n_clusters, verbose=5)(delayed(dtw_barycenter_averaging)( + X=X_list[k], + barycenter_size=None, + init_barycenter=self.cluster_centers_[k], + metric_params=metric_params, max_iter = 3, + verbose=True) for k in range(self.n_clusters)) + for k in range(self.n_clusters): + self.cluster_centers_[k] = cluster_centers[k] +# def _update_centroids(self, X): +# metric_params = self._get_metric_params() +# for k in range(self.n_clusters): +# if self.metric == "dtw": +# self.cluster_centers_[k] = (dtw_barycenter_averaging( +# X=X[self.labels_ == k], +# barycenter_size=None, +# init_barycenter=self.cluster_centers_[k], +# metric_params=metric_params, max_iter = 3, +# verbose=False)) +# elif self.metric == "softdtw": +# self.cluster_centers_[k] = softdtw_barycenter( +# X=X[self.labels_ == k], +# max_iter=self.max_iter_barycenter, +# init=self.cluster_centers_[k], +# **metric_params) +# else: +# self.cluster_centers_[k] = euclidean_barycenter( +# X=X[self.labels_ == k]) +# print(self.cluster_centers_[0]) def fit(self, X, y=None): """Compute k-means clustering. @@ -945,7 +959,6 @@ def fit(self, X, y=None): extend=True, check_n_features_only=(self.metric != "euclidean")) - self.labels_ = None self.inertia_ = numpy.inf self.cluster_centers_ = None @@ -956,7 +969,7 @@ def fit(self, X, y=None): max_attempts = max(self.n_init, 10) - X_ = to_time_series_dataset(X) + X_ = X rs = check_random_state(self.random_state) if self.init == "k-means++" and self.metric == "euclidean": @@ -1034,7 +1047,7 @@ def predict(self, X): def transform(self, X): """Transform X to a cluster-distance space. - + In the new space, each dimension is the distance to the cluster centers. diff --git a/tslearn/metrics.py b/tslearn/metrics.py index 78d32947f..e87d603fe 100644 --- a/tslearn/metrics.py +++ b/tslearn/metrics.py @@ -1,7 +1,6 @@ """ The :mod:`tslearn.metrics` module delivers time-series specific metrics to be used at the core of machine learning algorithms. - **User guide:** See the :ref:`Dynamic Time Warping (DTW) ` section for further details. """ @@ -20,6 +19,7 @@ from tslearn.utils import to_time_series, to_time_series_dataset, ts_size, \ check_equal_size +from joblib import parallel_backend __author__ = 'Romain Tavenard romain.tavenard[at]univ-rennes2.fr' @@ -40,23 +40,18 @@ def _local_squared_dist(x, y): @njit() def njit_accumulated_matrix(s1, s2, mask): """Compute the accumulated cost matrix score between two time series. - Parameters ---------- s1 : array, shape = (sz1,) First time series. - s2 : array, shape = (sz2,) Second time series - mask : array, shape = (sz1, sz2) Mask. Unconsidered cells must have infinite values. - Returns ------- mat : array, shape = (sz1, sz2) Accumulated cost matrix. - """ l1 = s1.shape[0] l2 = s2.shape[0] @@ -76,23 +71,18 @@ def njit_accumulated_matrix(s1, s2, mask): @njit(nogil=True) def njit_dtw(s1, s2, mask): """Compute the dynamic time warping score between two time series. - Parameters ---------- s1 : array, shape = (sz1,) First time series. - s2 : array, shape = (sz2,) Second time series - mask : array, shape = (sz1, sz2) Mask. Unconsidered cells must have infinite values. - Returns ------- dtw_score : float Dynamic Time Warping score between both time series. - """ cum_sum = njit_accumulated_matrix(s1, s2, mask) return numpy.sqrt(cum_sum[-1, -1]) @@ -127,30 +117,22 @@ def dtw_path(s1, s2, global_constraint=None, sakoe_chiba_radius=None, r"""Compute Dynamic Time Warping (DTW) similarity measure between (possibly multidimensional) time series and return both the path and the similarity. - DTW is computed as the Euclidean distance between aligned time series, i.e., if :math:`\pi` is the alignment path: - .. math:: - DTW(X, Y) = \sqrt{\sum_{(i, j) \in \pi} (X_{i} - Y_{j})^2} - It is not required that both time series share the same size, but they must be the same dimension. DTW was originally presented in [1]_ and is discussed in more details in our :ref:`dedicated user-guide page `. - Parameters ---------- s1 A time series. - s2 Another time series. If not given, self-similarity of dataset1 is returned. - global_constraint : {"itakura", "sakoe_chiba"} or None (default: None) Global constraint to restrict admissible paths for DTW. - sakoe_chiba_radius : int or None (default: None) Radius to be used for Sakoe-Chiba band global constraint. If None and `global_constraint` is set to "sakoe_chiba", a radius of @@ -160,7 +142,6 @@ def dtw_path(s1, s2, global_constraint=None, sakoe_chiba_radius=None, two. In this case, if `global_constraint` corresponds to no global constraint, a `RuntimeWarning` is raised and no global constraint is used. - itakura_max_slope : float or None (default: None) Maximum slope for the Itakura parallelogram constraint. If None and `global_constraint` is set to "itakura", a maximum slope @@ -170,16 +151,13 @@ def dtw_path(s1, s2, global_constraint=None, sakoe_chiba_radius=None, two. In this case, if `global_constraint` corresponds to no global constraint, a `RuntimeWarning` is raised and no global constraint is used. - Returns ------- list of integer pairs Matching path represented as a list of index pairs. In each pair, the first index corresponds to s1 and the second one corresponds to s2 - float Similarity score - Examples -------- >>> path, dist = dtw_path([1, 2, 3], [1., 2., 2., 3.]) @@ -189,19 +167,16 @@ def dtw_path(s1, s2, global_constraint=None, sakoe_chiba_radius=None, 0.0 >>> dtw_path([1, 2, 3], [1., 2., 2., 3., 4.])[1] 1.0 - See Also -------- dtw : Get only the similarity score for DTW cdist_dtw : Cross similarity matrix between time series datasets dtw_path_from_metric : Compute a DTW using a user-defined distance metric - References ---------- .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for spoken word recognition," IEEE Transactions on Acoustics, Speech and Signal Processing, vol. 26(1), pp. 43--49, 1978. - """ s1 = to_time_series(s1, remove_nans=True) s2 = to_time_series(s2, remove_nans=True) @@ -219,20 +194,16 @@ def dtw_path(s1, s2, global_constraint=None, sakoe_chiba_radius=None, def njit_accumulated_matrix_from_dist_matrix(dist_matrix, mask): """Compute the accumulated cost matrix score between two time series using a precomputed distance matrix. - Parameters ---------- dist_matrix : array, shape = (sz1, sz2) Array containing the pairwise distances. - mask : array, shape = (sz1, sz2) Mask. Unconsidered cells must have infinite values. - Returns ------- mat : array, shape = (sz1, sz2) Accumulated cost matrix. - """ l1, l2 = dist_matrix.shape cum_sum = numpy.full((l1 + 1, l2 + 1), numpy.inf) @@ -254,44 +225,33 @@ def dtw_path_from_metric(s1, s2=None, metric="euclidean", r"""Compute Dynamic Time Warping (DTW) similarity measure between (possibly multidimensional) time series using a distance metric defined by the user and return both the path and the similarity. - Similarity is computed as the cumulative cost along the aligned time series. - It is not required that both time series share the same size, but they must be the same dimension. DTW was originally presented in [1]_. - Valid values for metric are the same as for scikit-learn `pairwise_distances`_ function i.e. a string (e.g. "euclidean", "sqeuclidean", "hamming") or a function that is used to compute the pairwise distances. See `scikit`_ and `scipy`_ documentations for more information about the available metrics. - Parameters ---------- s1 : array, shape = (sz1, d) if metric!="precomputed", (sz1, sz2) otherwise A time series or an array of pairwise distances between samples. - s2 : array, shape = (sz2, d), optional (default: None) A second time series, only allowed if metric != "precomputed". - metric : string or callable (default: "euclidean") Function used to compute the pairwise distances between each points of `s1` and `s2`. - If metric is "precomputed", `s1` is assumed to be a distance matrix. - If metric is an other string, it must be one of the options compatible with sklearn.metrics.pairwise_distances. - Alternatively, if metric is a callable function, it is called on pairs of rows of `s1` and `s2`. The callable should take two 1 dimensional arrays as input and return a value indicating the distance between them. - global_constraint : {"itakura", "sakoe_chiba"} or None (default: None) Global constraint to restrict admissible paths for DTW. - sakoe_chiba_radius : int or None (default: None) Radius to be used for Sakoe-Chiba band global constraint. If None and `global_constraint` is set to "sakoe_chiba", a radius of @@ -301,7 +261,6 @@ def dtw_path_from_metric(s1, s2=None, metric="euclidean", two. In this case, if `global_constraint` corresponds to no global constraint, a `RuntimeWarning` is raised and no global constraint is used. - itakura_max_slope : float or None (default: None) Maximum slope for the Itakura parallelogram constraint. If None and `global_constraint` is set to "itakura", a maximum slope @@ -311,71 +270,53 @@ def dtw_path_from_metric(s1, s2=None, metric="euclidean", two. In this case, if `global_constraint` corresponds to no global constraint, a `RuntimeWarning` is raised and no global constraint is used. - **kwds Additional arguments to pass to sklearn pairwise_distances to compute the pairwise distances. - Returns ------- list of integer pairs Matching path represented as a list of index pairs. In each pair, the first index corresponds to s1 and the second one corresponds to s2. - float Similarity score (sum of metric along the wrapped time series). - Examples -------- Lets create 2 numpy arrays to wrap: - >>> import numpy as np >>> rng = np.random.RandomState(0) >>> s1, s2 = rng.rand(5, 2), rng.rand(6, 2) - The wrapping can be done by passing a string indicating the metric to pass to scikit-learn pairwise_distances: - >>> dtw_path_from_metric(s1, s2, ... metric="sqeuclidean") # doctest: +ELLIPSIS ([(0, 0), (0, 1), (1, 2), (2, 3), (3, 4), (4, 5)], 1.117...) - Or by defining a custom distance function: - >>> sqeuclidean = lambda x, y: np.sum((x-y)**2) >>> dtw_path_from_metric(s1, s2, metric=sqeuclidean) # doctest: +ELLIPSIS ([(0, 0), (0, 1), (1, 2), (2, 3), (3, 4), (4, 5)], 1.117...) - Or by using a precomputed distance matrix as input: - >>> from sklearn.metrics.pairwise import pairwise_distances >>> dist_matrix = pairwise_distances(s1, s2, metric="sqeuclidean") >>> dtw_path_from_metric(dist_matrix, ... metric="precomputed") # doctest: +ELLIPSIS ([(0, 0), (0, 1), (1, 2), (2, 3), (3, 4), (4, 5)], 1.117...) - Notes -------- By using a squared euclidean distance metric as shown above, the output path is the same as the one obtained by using dtw_path but the similarity score is the sum of squared distances instead of the euclidean distance. - See Also -------- dtw_path : Get both the matching path and the similarity score for DTW - References ---------- .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for spoken word recognition," IEEE Transactions on Acoustics, Speech and Signal Processing, vol. 26(1), pp. 43--49, 1978. - .. _pairwise_distances: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise_distances.html - .. _scikit: https://scikit-learn.org/stable/modules/metrics.html - .. _scipy: https://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.distance.pdist.html - """ # noqa: E501 if metric == "precomputed": # Pairwise distance given as input sz1, sz2 = s1.shape @@ -402,31 +343,22 @@ def dtw(s1, s2, global_constraint=None, sakoe_chiba_radius=None, itakura_max_slope=None): r"""Compute Dynamic Time Warping (DTW) similarity measure between (possibly multidimensional) time series and return it. - DTW is computed as the Euclidean distance between aligned time series, i.e., if :math:`\pi` is the optimal alignment path: - .. math:: - DTW(X, Y) = \sqrt{\sum_{(i, j) \in \pi} \|X_{i} - Y_{j}\|^2} - Note that this formula is still valid for the multivariate case. - It is not required that both time series share the same size, but they must be the same dimension. DTW was originally presented in [1]_ and is discussed in more details in our :ref:`dedicated user-guide page `. - Parameters ---------- s1 A time series. - s2 Another time series. - global_constraint : {"itakura", "sakoe_chiba"} or None (default: None) Global constraint to restrict admissible paths for DTW. - sakoe_chiba_radius : int or None (default: None) Radius to be used for Sakoe-Chiba band global constraint. If None and `global_constraint` is set to "sakoe_chiba", a radius of @@ -436,7 +368,6 @@ def dtw(s1, s2, global_constraint=None, sakoe_chiba_radius=None, two. In this case, if `global_constraint` corresponds to no global constraint, a `RuntimeWarning` is raised and no global constraint is used. - itakura_max_slope : float or None (default: None) Maximum slope for the Itakura parallelogram constraint. If None and `global_constraint` is set to "itakura", a maximum slope @@ -446,30 +377,25 @@ def dtw(s1, s2, global_constraint=None, sakoe_chiba_radius=None, two. In this case, if `global_constraint` corresponds to no global constraint, a `RuntimeWarning` is raised and no global constraint is used. - Returns ------- float Similarity score - Examples -------- >>> dtw([1, 2, 3], [1., 2., 2., 3.]) 0.0 >>> dtw([1, 2, 3], [1., 2., 2., 3., 4.]) 1.0 - See Also -------- dtw_path : Get both the matching path and the similarity score for DTW cdist_dtw : Cross similarity matrix between time series datasets - References ---------- .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for spoken word recognition," IEEE Transactions on Acoustics, Speech and Signal Processing, vol. 26(1), pp. 43--49, 1978. - """ s1 = to_time_series(s1, remove_nans=True) s2 = to_time_series(s2, remove_nans=True) @@ -485,24 +411,18 @@ def dtw(s1, s2, global_constraint=None, sakoe_chiba_radius=None, def _max_steps(i, j, max_length, length_1, length_2): """Maximum number of steps required in a L-DTW process to reach a given cell. - Parameters ---------- i : int Cell row index - j : int Cell column index - max_length : int Maximum allowed length - length_1 : int Length of the first time series - length_2 : int Length of the second time series - Returns ------- int @@ -515,19 +435,15 @@ def _max_steps(i, j, max_length, length_1, length_2): def _limited_warping_length_cost(s1, s2, max_length): r"""Compute accumulated scores necessary fo L-DTW. - Parameters ---------- s1 A time series. - s2 Another time series. - max_length : int Maximum allowed warping path length. Should be an integer between XXX and YYY. # TODO - Returns ------- dict @@ -567,55 +483,43 @@ def dtw_limited_warping_length(s1, s2, max_length): r"""Compute Dynamic Time Warping (DTW) similarity measure between (possibly multidimensional) time series under an upper bound constraint on the resulting path length and return the similarity cost. - DTW is computed as the Euclidean distance between aligned time series, i.e., if :math:`\pi` is the optimal alignment path: - .. math:: - DTW(X, Y) = \sqrt{\sum_{(i, j) \in \pi} \|X_{i} - Y_{j}\|^2} - Note that this formula is still valid for the multivariate case. - It is not required that both time series share the same size, but they must be the same dimension. DTW was originally presented in [1]_. This constrained-length variant was introduced in [2]_. Both bariants are discussed in more details in our :ref:`dedicated user-guide page ` - Parameters ---------- s1 A time series. - s2 Another time series. - max_length : int Maximum allowed warping path length. If greater than len(s1) + len(s2), then it is equivalent to unconstrained DTW. If lower than max(len(s1), len(s2)), no path can be found and a ValueError is raised. - Returns ------- float Similarity score - Examples -------- >>> dtw_limited_warping_length([1, 2, 3], [1., 2., 2., 3.], 5) 0.0 >>> dtw_limited_warping_length([1, 2, 3], [1., 2., 2., 3., 4.], 5) 1.0 - See Also -------- dtw : Get the similarity score for DTW dtw_path_limited_warping_length : Get both the warping path and the similarity score for DTW with limited warping path length - References ---------- .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for @@ -670,45 +574,34 @@ def dtw_path_limited_warping_length(s1, s2, max_length): (possibly multidimensional) time series under an upper bound constraint on the resulting path length and return the path as well as the similarity cost. - DTW is computed as the Euclidean distance between aligned time series, i.e., if :math:`\pi` is the optimal alignment path: - .. math:: - DTW(X, Y) = \sqrt{\sum_{(i, j) \in \pi} \|X_{i} - Y_{j}\|^2} - Note that this formula is still valid for the multivariate case. - It is not required that both time series share the same size, but they must be the same dimension. DTW was originally presented in [1]_. This constrained-length variant was introduced in [2]_. Both variants are discussed in more details in our :ref:`dedicated user-guide page ` - Parameters ---------- s1 A time series. - s2 Another time series. - max_length : int Maximum allowed warping path length. If greater than len(s1) + len(s2), then it is equivalent to unconstrained DTW. If lower than max(len(s1), len(s2)), no path can be found and a ValueError is raised. - Returns ------- list of integer pairs Optimal path - float Similarity score - Examples -------- >>> path, cost = dtw_path_limited_warping_length([1, 2, 3], @@ -723,13 +616,11 @@ def dtw_path_limited_warping_length(s1, s2, max_length): 1.0 >>> path [(0, 0), (1, 1), (1, 2), (2, 3), (2, 4)] - See Also -------- dtw_limited_warping_length : Get the similarity score for DTW with limited warping path length dtw_path : Get both the matching path and the similarity score for DTW - References ---------- .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for @@ -779,15 +670,12 @@ def _subsequence_cost_matrix(subseq, longseq): def subsequence_cost_matrix(subseq, longseq): """Compute the accumulated cost matrix score between a subsequence and a reference time series. - Parameters ---------- subseq : array, shape = (sz1, d) Subsequence time series. - longseq : array, shape = (sz2, d) Reference time series - Returns ------- mat : array, shape = (sz1, sz2) @@ -823,7 +711,6 @@ def _subsequence_path(acc_cost_mat, idx_path_end): def subsequence_path(acc_cost_mat, idx_path_end): r"""Compute the optimal path through a accumulated cost matrix given the endpoint of the sequence. - Parameters ---------- acc_cost_mat: array, shape = (sz1, sz2) @@ -831,7 +718,6 @@ def subsequence_path(acc_cost_mat, idx_path_end): sequence. idx_path_end: int The end position of the matched subsequence in the longer sequence. - Returns ------- path: list of tuples of integer pairs @@ -839,10 +725,8 @@ def subsequence_path(acc_cost_mat, idx_path_end): first index corresponds to `subseq` and the second one corresponds to `longseq`. The startpoint of the Path is :math:`P_0 = (0, ?)` and it ends at :math:`P_L = (len(subseq)-1, idx\_path\_end)` - Examples -------- - >>> acc_cost_mat = numpy.array([[1., 0., 0., 1., 4.], ... [5., 1., 1., 0., 1.]]) >>> # calculate the globally optimal path @@ -850,12 +734,10 @@ def subsequence_path(acc_cost_mat, idx_path_end): >>> path = subsequence_path(acc_cost_mat, optimal_end_point) >>> path [(0, 2), (1, 3)] - See Also -------- dtw_subsequence_path : Get the similarity score for DTW subsequence_cost_matrix: Calculate the required cost matrix - """ return _subsequence_path(acc_cost_mat, idx_path_end) @@ -864,30 +746,23 @@ def dtw_subsequence_path(subseq, longseq): r"""Compute sub-sequence Dynamic Time Warping (DTW) similarity measure between a (possibly multidimensional) query and a long time series and return both the path and the similarity. - DTW is computed as the Euclidean distance between aligned time series, i.e., if :math:`\pi` is the alignment path: - .. math:: - DTW(X, Y) = \sqrt{\sum_{(i, j) \in \pi} \|X_{i} - Y_{j}\|^2} - Compared to traditional DTW, here, border constraints on admissible paths :math:`\pi` are relaxed such that :math:`\pi_0 = (0, ?)` and :math:`\pi_L = (N-1, ?)` where :math:`L` is the length of the considered path and :math:`N` is the length of the subsequence time series. - It is not required that both time series share the same size, but they must be the same dimension. This implementation finds the best matching starting and ending positions for `subseq` inside `longseq`. - Parameters ---------- subseq : array, shape = (sz1, d) A query time series. longseq : array, shape = (sz2, d) A reference (supposed to be longer than `subseq`) time series. - Returns ------- list of integer pairs @@ -896,7 +771,6 @@ def dtw_subsequence_path(subseq, longseq): `longseq`. float Similarity score - Examples -------- >>> path, dist = dtw_subsequence_path([2., 3.], [1., 2., 2., 3., 4.]) @@ -904,7 +778,6 @@ def dtw_subsequence_path(subseq, longseq): [(0, 2), (1, 3)] >>> dist 0.0 - See Also -------- dtw : Get the similarity score for DTW @@ -923,23 +796,18 @@ def dtw_subsequence_path(subseq, longseq): @njit() def sakoe_chiba_mask(sz1, sz2, radius=1): """Compute the Sakoe-Chiba mask. - Parameters ---------- sz1 : int The size of the first time series - sz2 : int The size of the second time series. - radius : int The radius of the band. - Returns ------- mask : array, shape = (sz1, sz2) Sakoe-Chiba mask. - Examples -------- >>> sakoe_chiba_mask(4, 4, 1) @@ -976,18 +844,14 @@ def sakoe_chiba_mask(sz1, sz2, radius=1): def _njit_itakura_mask(sz1, sz2, max_slope=2.): """Compute the Itakura mask without checking that the constraints are feasible. In most cases, you should use itakura_mask instead. - Parameters ---------- sz1 : int The size of the first time series - sz2 : int The size of the second time series. - max_slope : float (default = 2) The maximum slope of the parallelogram. - Returns ------- mask : array, shape = (sz1, sz2) @@ -1025,23 +889,18 @@ def _njit_itakura_mask(sz1, sz2, max_slope=2.): def itakura_mask(sz1, sz2, max_slope=2.): """Compute the Itakura mask. - Parameters ---------- sz1 : int The size of the first time series - sz2 : int The size of the second time series. - max_slope : float (default = 2) The maximum slope of the parallelogram. - Returns ------- mask : array, shape = (sz1, sz2) Itakura mask. - Examples -------- >>> itakura_mask(6, 6) @@ -1077,21 +936,17 @@ def itakura_mask(sz1, sz2, max_slope=2.): def compute_mask(s1, s2, global_constraint=0, sakoe_chiba_radius=None, itakura_max_slope=None): """Compute the mask (region constraint). - Parameters ---------- s1 : array A time series or integer. - s2: array Another time series or integer. - global_constraint : {0, 1, 2} (default: 0) Global constraint to restrict admissible paths for DTW: - "itakura" if 1 - "sakoe_chiba" if 2 - no constraint otherwise - sakoe_chiba_radius : int or None (default: None) Radius to be used for Sakoe-Chiba band global constraint. If None and `global_constraint` is set to 2 (sakoe-chiba), a radius of @@ -1101,7 +956,6 @@ def compute_mask(s1, s2, global_constraint=0, two. In this case, if `global_constraint` corresponds to no global constraint, a `RuntimeWarning` is raised and no global constraint is used. - itakura_max_slope : float or None (default: None) Maximum slope for the Itakura parallelogram constraint. If None and `global_constraint` is set to 1 (itakura), a maximum slope @@ -1111,7 +965,6 @@ def compute_mask(s1, s2, global_constraint=0, two. In this case, if `global_constraint` corresponds to no global constraint, a `RuntimeWarning` is raised and no global constraint is used. - Returns ------- mask : array @@ -1148,26 +1001,21 @@ def _cdist_generic(dist_fun, dataset1, dataset2, n_jobs, verbose, compute_diagonal=True, dtype=numpy.float, *args, **kwargs): """Compute cross-similarity matrix with joblib parallelization for a given similarity function. - Parameters ---------- dist_fun : function Similarity function to be used - dataset1 : array-like A dataset of time series - dataset2 : array-like (default: None) Another dataset of time series. If `None`, self-similarity of `dataset1` is returned. - n_jobs : int or None, optional (default=None) The number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See scikit-learns' `Glossary `__ for more details. - verbose : int, optional (default=0) The verbosity level: if non zero, progress messages are printed. Above 50, the output is sent to stdout. @@ -1175,21 +1023,17 @@ def _cdist_generic(dist_fun, dataset1, dataset2, n_jobs, verbose, If it more than 10, all iterations are reported. `Glossary `__ for more details. - compute_diagonal : bool (default: True) Whether diagonal terms should be computed or assumed to be 0 in the self-similarity case. Used only if `dataset2` is `None`. - *args and **kwargs : Optional additional parameters to be passed to the similarity function. - - Returns ------- cdist : numpy.ndarray Cross-similarity matrix """ # noqa: E501 - dataset1 = to_time_series_dataset(dataset1, dtype=dtype) + #dataset1 = to_time_series_dataset(dataset1, dtype=dtype) if dataset2 is None: # Inspired from code by @GillesVandewiele: @@ -1199,7 +1043,7 @@ def _cdist_generic(dist_fun, dataset1, dataset2, n_jobs, verbose, k=0 if compute_diagonal else 1, m=len(dataset1)) matrix[indices] = Parallel(n_jobs=n_jobs, - prefer="threads", + prefer="processes", verbose=verbose)( delayed(dist_fun)( dataset1[i], dataset1[j], @@ -1213,14 +1057,15 @@ def _cdist_generic(dist_fun, dataset1, dataset2, n_jobs, verbose, matrix[indices] = matrix.T[indices] return matrix else: - dataset2 = to_time_series_dataset(dataset2, dtype=dtype) - matrix = Parallel(n_jobs=n_jobs, prefer="threads", verbose=verbose)( - delayed(dist_fun)( - dataset1[i], dataset2[j], - *args, **kwargs + #dataset2 = to_time_series_dataset(dataset2, dtype=dtype) + with parallel_backend('loky'): + matrix = Parallel(backend='loky', n_jobs=n_jobs, prefer="processes", verbose=verbose)( + delayed(dist_fun)( + dataset1[i], dataset2[j], + *args, **kwargs + ) + for i in range(len(dataset1)) for j in range(len(dataset2)) ) - for i in range(len(dataset1)) for j in range(len(dataset2)) - ) return numpy.array(matrix).reshape((len(dataset1), -1)) @@ -1229,29 +1074,21 @@ def cdist_dtw(dataset1, dataset2=None, global_constraint=None, verbose=0): r"""Compute cross-similarity matrix using Dynamic Time Warping (DTW) similarity measure. - DTW is computed as the Euclidean distance between aligned time series, i.e., if :math:`\pi` is the alignment path: - .. math:: - DTW(X, Y) = \sqrt{\sum_{(i, j) \in \pi} (X_{i} - Y_{j})^2} - DTW was originally presented in [1]_ and is discussed in more details in our :ref:`dedicated user-guide page `. - Parameters ---------- dataset1 : array-like A dataset of time series - dataset2 : array-like (default: None) Another dataset of time series. If `None`, self-similarity of `dataset1` is returned. - global_constraint : {"itakura", "sakoe_chiba"} or None (default: None) Global constraint to restrict admissible paths for DTW. - sakoe_chiba_radius : int or None (default: None) Radius to be used for Sakoe-Chiba band global constraint. If None and `global_constraint` is set to "sakoe_chiba", a radius of @@ -1261,7 +1098,6 @@ def cdist_dtw(dataset1, dataset2=None, global_constraint=None, two. In this case, if `global_constraint` corresponds to no global constraint, a `RuntimeWarning` is raised and no global constraint is used. - itakura_max_slope : float or None (default: None) Maximum slope for the Itakura parallelogram constraint. If None and `global_constraint` is set to "itakura", a maximum slope @@ -1271,14 +1107,12 @@ def cdist_dtw(dataset1, dataset2=None, global_constraint=None, two. In this case, if `global_constraint` corresponds to no global constraint, a `RuntimeWarning` is raised and no global constraint is used. - n_jobs : int or None, optional (default=None) The number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See scikit-learns' `Glossary `__ for more details. - verbose : int, optional (default=0) The verbosity level: if non zero, progress messages are printed. Above 50, the output is sent to stdout. @@ -1286,12 +1120,10 @@ def cdist_dtw(dataset1, dataset2=None, global_constraint=None, If it more than 10, all iterations are reported. `Glossary `__ for more details. - Returns ------- cdist : numpy.ndarray Cross-similarity matrix - Examples -------- >>> cdist_dtw([[1, 2, 2, 3], [1., 2., 3., 4.]]) @@ -1300,11 +1132,9 @@ def cdist_dtw(dataset1, dataset2=None, global_constraint=None, >>> cdist_dtw([[1, 2, 2, 3], [1., 2., 3., 4.]], [[1, 2, 3], [2, 3, 4, 5]]) array([[0. , 2.44948974], [1. , 1.41421356]]) - See Also -------- dtw : Get DTW similarity score - References ---------- .. [1] H. Sakoe, S. Chiba, "Dynamic programming algorithm optimization for @@ -1345,12 +1175,10 @@ def _gak_gram(s1, s2, sigma=1.): def unnormalized_gak(s1, s2, sigma=1.): r"""Compute Global Alignment Kernel (GAK) between (possibly multidimensional) time series and return it. - It is not required that both time series share the same size, but they must be the same dimension. GAK was originally presented in [1]_. This is an unnormalized version. - Parameters ---------- s1 @@ -1359,12 +1187,10 @@ def unnormalized_gak(s1, s2, sigma=1.): Another time series sigma : float (default 1.) Bandwidth of the internal gaussian kernel used for GAK - Returns ------- float Kernel value - Examples -------- >>> unnormalized_gak([1, 2, 3], @@ -1374,12 +1200,10 @@ def unnormalized_gak(s1, s2, sigma=1.): >>> unnormalized_gak([1, 2, 3], ... [1., 2., 2., 3., 4.]) # doctest: +ELLIPSIS 3.166... - See Also -------- gak : normalized version of GAK that ensures that k(x,x) = 1 cdist_gak : Compute cross-similarity matrix using Global Alignment kernel - References ---------- .. [1] M. Cuturi, "Fast global alignment kernels," ICML 2011. @@ -1396,13 +1220,11 @@ def unnormalized_gak(s1, s2, sigma=1.): def gak(s1, s2, sigma=1.): # TODO: better doc (formula for the kernel) r"""Compute Global Alignment Kernel (GAK) between (possibly multidimensional) time series and return it. - It is not required that both time series share the same size, but they must be the same dimension. GAK was originally presented in [1]_. This is a normalized version that ensures that :math:`k(x,x)=1` for all :math:`x` and :math:`k(x,y) \in [0, 1]` for all :math:`x, y`. - Parameters ---------- s1 @@ -1411,23 +1233,19 @@ def gak(s1, s2, sigma=1.): # TODO: better doc (formula for the kernel) Another time series sigma : float (default 1.) Bandwidth of the internal gaussian kernel used for GAK - Returns ------- float Kernel value - Examples -------- >>> gak([1, 2, 3], [1., 2., 2., 3.], sigma=2.) # doctest: +ELLIPSIS 0.839... >>> gak([1, 2, 3], [1., 2., 2., 3., 4.]) # doctest: +ELLIPSIS 0.273... - See Also -------- cdist_gak : Compute cross-similarity matrix using Global Alignment kernel - References ---------- .. [1] M. Cuturi, "Fast global alignment kernels," ICML 2011. @@ -1439,9 +1257,7 @@ def gak(s1, s2, sigma=1.): # TODO: better doc (formula for the kernel) def cdist_gak(dataset1, dataset2=None, sigma=1., n_jobs=None, verbose=0): r"""Compute cross-similarity matrix using Global Alignment kernel (GAK). - GAK was originally presented in [1]_. - Parameters ---------- dataset1 @@ -1463,12 +1279,10 @@ def cdist_gak(dataset1, dataset2=None, sigma=1., n_jobs=None, verbose=0): If it more than 10, all iterations are reported. `Glossary `__ for more details. - Returns ------- numpy.ndarray Cross-similarity matrix - Examples -------- >>> cdist_gak([[1, 2, 2, 3], [1., 2., 3., 4.]], sigma=2.) @@ -1479,11 +1293,9 @@ def cdist_gak(dataset1, dataset2=None, sigma=1., n_jobs=None, verbose=0): ... sigma=2.) array([[0.71059484, 0.29722877, 0.71059484], [0.65629661, 1. , 0.65629661]]) - See Also -------- gak : Compute Global Alignment kernel - References ---------- .. [1] M. Cuturi, "Fast global alignment kernels," ICML 2011. @@ -1502,13 +1314,13 @@ def cdist_gak(dataset1, dataset2=None, sigma=1., n_jobs=None, verbose=0): else: dataset2 = to_time_series_dataset(dataset2) diagonal_left = Parallel(n_jobs=n_jobs, - prefer="threads", + prefer="processes", verbose=verbose)( delayed(unnormalized_gak)(dataset1[i], dataset1[i], sigma=sigma) for i in range(len(dataset1)) ) diagonal_right = Parallel(n_jobs=n_jobs, - prefer="threads", + prefer="processes", verbose=verbose)( delayed(unnormalized_gak)(dataset2[j], dataset2[j], sigma=sigma) for j in range(len(dataset2)) @@ -1520,9 +1332,7 @@ def cdist_gak(dataset1, dataset2=None, sigma=1., n_jobs=None, verbose=0): def sigma_gak(dataset, n_samples=100, random_state=None): r"""Compute sigma value to be used for GAK. - This method was originally presented in [1]_. - Parameters ---------- dataset @@ -1532,12 +1342,10 @@ def sigma_gak(dataset, n_samples=100, random_state=None): random_state : integer or numpy.RandomState or None (default: None) The generator used to draw the samples. If an integer is given, it fixes the seed. Defaults to the global numpy random number generator. - Returns ------- float Suggested bandwidth (:math:`\sigma`) for the Global Alignment kernel - Examples -------- >>> dataset = [[1, 2, 2, 3], [1., 2., 3., 4.]] @@ -1545,12 +1353,10 @@ def sigma_gak(dataset, n_samples=100, random_state=None): ... n_samples=200, ... random_state=0) # doctest: +ELLIPSIS 2.0... - See Also -------- gak : Compute Global Alignment kernel cdist_gak : Compute cross-similarity matrix using Global Alignment kernel - References ---------- .. [1] M. Cuturi, "Fast global alignment kernels," ICML 2011. @@ -1574,9 +1380,7 @@ def sigma_gak(dataset, n_samples=100, random_state=None): def gamma_soft_dtw(dataset, n_samples=100, random_state=None): r"""Compute gamma value to be used for GAK/Soft-DTW. - This method was originally presented in [1]_. - Parameters ---------- dataset @@ -1586,12 +1390,10 @@ def gamma_soft_dtw(dataset, n_samples=100, random_state=None): random_state : integer or numpy.RandomState or None (default: None) The generator used to draw the samples. If an integer is given, it fixes the seed. Defaults to the global numpy random number generator. - Returns ------- float Suggested :math:`\gamma` parameter for the Soft-DTW - Examples -------- >>> dataset = [[1, 2, 2, 3], [1., 2., 3., 4.]] @@ -1599,11 +1401,9 @@ def gamma_soft_dtw(dataset, n_samples=100, random_state=None): ... n_samples=200, ... random_state=0) # doctest: +ELLIPSIS 8.0... - See Also -------- sigma_gak : Compute sigma parameter for Global Alignment kernel - References ---------- .. [1] M. Cuturi, "Fast global alignment kernels," ICML 2011. @@ -1619,30 +1419,24 @@ def cdist_sax(dataset1, breakpoints_avg, size_fitted, dataset2=None, as presented in [1]_. It is important to note that this function expects the timeseries in dataset1 and dataset2 to be normalized to each have zero mean and unit variance. - Parameters ---------- dataset1 : array-like A dataset of time series - breakpoints_avg : array-like The breakpoints used to assign the alphabet symbols. - size_fitted: int The original timesteps in the timeseries, before discretizing through SAX. - dataset2 : array-like (default: None) Another dataset of time series. If `None`, self-similarity of `dataset1` is returned. - n_jobs : int or None, optional (default=None) The number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See scikit-learns' `Glossary `__ for more details. - verbose : int, optional (default=0) The verbosity level: if non zero, progress messages are printed. Above 50, the output is sent to stdout. @@ -1650,18 +1444,15 @@ def cdist_sax(dataset1, breakpoints_avg, size_fitted, dataset2=None, If it more than 10, all iterations are reported. `Glossary `__ for more details. - Returns ------- cdist : numpy.ndarray Cross-similarity matrix - References ---------- .. [1] Lin, Jessica, et al. "Experiencing SAX: a novel symbolic representation of time series." Data Mining and knowledge discovery 15.2 (2007): 107-144. - """ # noqa: E501 return _cdist_generic(cydist_sax, dataset1, dataset2, n_jobs, verbose, False, int, breakpoints_avg, size_fitted) @@ -1669,9 +1460,7 @@ def cdist_sax(dataset1, breakpoints_avg, size_fitted, dataset2=None, def lb_keogh(ts_query, ts_candidate=None, radius=1, envelope_candidate=None): r"""Compute LB_Keogh. - LB_Keogh was originally presented in [1]_. - Parameters ---------- ts_query : array-like @@ -1690,18 +1479,15 @@ def lb_keogh(ts_query, ts_candidate=None, radius=1, envelope_candidate=None): (default: None) Pre-computed envelope of the candidate time series. If set to None, it is computed based on `ts_candidate`. - Notes ----- This method requires a `ts_query` and `ts_candidate` (or `envelope_candidate`, depending on the call) to be of equal size. - Returns ------- float Distance between the query time series and the envelope of the candidate time series. - Examples -------- >>> ts1 = [1, 2, 3, 2, 1] @@ -1714,11 +1500,9 @@ def lb_keogh(ts_query, ts_candidate=None, radius=1, envelope_candidate=None): ... ts_candidate=ts1, ... radius=1) # doctest: +ELLIPSIS 2.8284... - See also -------- lb_envelope : Compute LB_Keogh-related envelope - References ---------- .. [1] Keogh, E. Exact indexing of dynamic time warping. In International @@ -1764,9 +1548,7 @@ def njit_lb_envelope(time_series, radius): def lb_envelope(ts, radius=1): r"""Compute time-series envelope as required by LB_Keogh. - LB_Keogh was originally presented in [1]_. - Parameters ---------- ts : array-like @@ -1776,14 +1558,12 @@ def lb_envelope(ts, radius=1): index i will be generated based on all observations from the time series at indices comprised between i-radius and i+radius). - Returns ------- array-like Lower-side of the envelope. array-like Upper-side of the envelope. - Examples -------- >>> ts1 = [1, 2, 3, 2, 1] @@ -1800,11 +1580,9 @@ def lb_envelope(ts, radius=1): [3.], [3.], [2.]]) - See also -------- lb_keogh : Compute LB_Keogh similarity - References ---------- .. [1] Keogh, E. Exact indexing of dynamic time warping. In International @@ -1815,25 +1593,18 @@ def lb_envelope(ts, radius=1): def soft_dtw(ts1, ts2, gamma=1.): r"""Compute Soft-DTW metric between two time series. - Soft-DTW was originally presented in [1]_ and is discussed in more details in our :ref:`user-guide page on DTW and its variants`. - Soft-DTW is computed as: - .. math:: - \text{soft-DTW}_{\gamma}(X, Y) = \min_{\pi}{}^\gamma \sum_{(i, j) \in \pi} \|X_i, Y_j\|^2 - where :math:`\min^\gamma` is the soft-min operator of parameter :math:`\gamma`. - In the limit case :math:`\gamma = 0`, :math:`\min^\gamma` reduces to a hard-min operator and soft-DTW is defined as the square of the DTW similarity measure. - Parameters ---------- ts1 @@ -1842,12 +1613,10 @@ def soft_dtw(ts1, ts2, gamma=1.): Another time series gamma : float (default 1.) Gamma paraneter for Soft-DTW - Returns ------- float Similarity - Examples -------- >>> soft_dtw([1, 2, 2, 3], @@ -1858,11 +1627,9 @@ def soft_dtw(ts1, ts2, gamma=1.): ... [1., 2., 2.1, 3.2], ... gamma=0.01) # doctest: +NORMALIZE_WHITESPACE +ELLIPSIS 0.089... - See Also -------- cdist_soft_dtw : Cross similarity matrix between time series datasets - References ---------- .. [1] M. Cuturi, M. Blondel "Soft-DTW: a Differentiable Loss Function for @@ -1876,25 +1643,18 @@ def soft_dtw(ts1, ts2, gamma=1.): def cdist_soft_dtw(dataset1, dataset2=None, gamma=1.): r"""Compute cross-similarity matrix using Soft-DTW metric. - Soft-DTW was originally presented in [1]_ and is discussed in more details in our :ref:`user-guide page on DTW and its variants`. - Soft-DTW is computed as: - .. math:: - \text{soft-DTW}_{\gamma}(X, Y) = \min_{\pi}{}^\gamma \sum_{(i, j) \in \pi} \|X_i, Y_j\|^2 - where :math:`\min^\gamma` is the soft-min operator of parameter :math:`\gamma`. - In the limit case :math:`\gamma = 0`, :math:`\min^\gamma` reduces to a hard-min operator and soft-DTW is defined as the square of the DTW similarity measure. - Parameters ---------- dataset1 @@ -1903,12 +1663,10 @@ def cdist_soft_dtw(dataset1, dataset2=None, gamma=1.): Another dataset of time series gamma : float (default 1.) Gamma paraneter for Soft-DTW - Returns ------- numpy.ndarray Cross-similarity matrix - Examples -------- >>> cdist_soft_dtw([[1, 2, 2, 3], [1., 2., 3., 4.]], gamma=.01) @@ -1918,13 +1676,11 @@ def cdist_soft_dtw(dataset1, dataset2=None, gamma=1.): ... [[1, 2, 2, 3], [1., 2., 3., 4.]], gamma=.01) array([[-0.01098612, 1. ], [ 1. , 0. ]]) - See Also -------- soft_dtw : Compute Soft-DTW cdist_soft_dtw_normalized : Cross similarity matrix between time series datasets using a normalized version of Soft-DTW - References ---------- .. [1] M. Cuturi, M. Blondel "Soft-DTW: a Differentiable Loss Function for @@ -1961,65 +1717,48 @@ def cdist_soft_dtw(dataset1, dataset2=None, gamma=1.): def cdist_soft_dtw_normalized(dataset1, dataset2=None, gamma=1.): r"""Compute cross-similarity matrix using a normalized version of the Soft-DTW metric. - Soft-DTW was originally presented in [1]_ and is discussed in more details in our :ref:`user-guide page on DTW and its variants`. - Soft-DTW is computed as: - .. math:: - \text{soft-DTW}_{\gamma}(X, Y) = \min_{\pi}{}^\gamma \sum_{(i, j) \in \pi} \|X_i, Y_j\|^2 - where :math:`\min^\gamma` is the soft-min operator of parameter :math:`\gamma`. - In the limit case :math:`\gamma = 0`, :math:`\min^\gamma` reduces to a hard-min operator and soft-DTW is defined as the square of the DTW similarity measure. - This normalized version is defined as: - .. math:: - \text{norm-soft-DTW}_{\gamma}(X, Y) = \text{soft-DTW}_{\gamma}(X, Y) - \frac{1}{2} \left(\text{soft-DTW}_{\gamma}(X, X) + \text{soft-DTW}_{\gamma}(Y, Y)\right) - and ensures that all returned values are positive and that :math:`\text{norm-soft-DTW}_{\gamma}(X, X) = 0`. - Parameters ---------- dataset1 A dataset of time series - dataset2 Another dataset of time series - gamma : float (default 1.) Gamma paraneter for Soft-DTW - Returns ------- numpy.ndarray Cross-similarity matrix - Examples -------- >>> time_series = numpy.random.randn(10, 15, 1) >>> numpy.alltrue(cdist_soft_dtw_normalized(time_series) >= 0.) True - See Also -------- soft_dtw : Compute Soft-DTW cdist_soft_dtw : Cross similarity matrix between time series datasets using the unnormalized version of Soft-DTW - References ---------- .. [1] M. Cuturi, M. Blondel "Soft-DTW: a Differentiable Loss Function for @@ -2039,7 +1778,6 @@ def __init__(self, D, gamma=1.): gamma: float Regularization parameter. Lower is less smoothed (closer to true DTW). - Attributes ---------- self.R_: array, shape = [m + 2, n + 2] @@ -2062,7 +1800,6 @@ def __init__(self, D, gamma=1.): def compute(self): """Compute soft-DTW by dynamic programming. - Returns ------- sdtw: float @@ -2078,7 +1815,6 @@ def compute(self): def grad(self): """Compute gradient of soft-DTW w.r.t. D by dynamic programming. - Returns ------- grad: array, shape = [m, n] @@ -2113,7 +1849,6 @@ def __init__(self, X, Y): First time series. Y: array, shape = [n, d] Second time series. - Examples -------- >>> SquaredEuclidean([1, 2, 2, 3], [1, 2, 3, 4]).compute() @@ -2127,7 +1862,6 @@ def __init__(self, X, Y): def compute(self): """Compute distance matrix. - Returns ------- D: array, shape = [m, n] @@ -2138,12 +1872,10 @@ def compute(self): def jacobian_product(self, E): """Compute the product between the Jacobian (a linear map from m x d to m x n) and a matrix E. - Parameters ---------- E: array, shape = [m, n] Second time series. - Returns ------- G: array, shape = [m, d] @@ -2154,4 +1886,4 @@ def jacobian_product(self, E): _jacobian_product_sq_euc(self.X, self.Y, E.astype(numpy.float64), G) - return G + return G \ No newline at end of file From f7bb66b9263d3cb4c4e96f338a7caa838b2daa2b Mon Sep 17 00:00:00 2001 From: enricofallacara Date: Wed, 16 Dec 2020 10:58:48 +0100 Subject: [PATCH 2/3] fixed number of threads parameter --- tslearn/barycenters.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/tslearn/barycenters.py b/tslearn/barycenters.py index 162ae9f14..b92db80ee 100644 --- a/tslearn/barycenters.py +++ b/tslearn/barycenters.py @@ -20,7 +20,6 @@ import warnings from sklearn.utils import check_random_state from joblib import parallel_backend, Parallel, delayed -import psutil from tslearn.utils import to_time_series_dataset, check_equal_size, \ to_time_series, ts_size @@ -227,7 +226,7 @@ def dtw_barycenter_averaging_petitjean(X, barycenter_size=None, return barycenter -def _mm_assignment(X, barycenter, weights, metric_params=None): +def _mm_assignment(X, barycenter, weights, metric_params=None, n_treads=15): """Computes item assignement based on DTW alignments and return cost as a bonus. Parameters @@ -256,7 +255,7 @@ def _mm_assignment(X, barycenter, weights, metric_params=None): cost = 0. list_p_k = [] #with parallel_backend('threading'): - res = Parallel(backend='threading',prefer="threads",require='sharedmem',n_jobs=15,verbose=10)(delayed(dtw_path)(barycenter, X[i], global_constraint="sakoe_chiba",sakoe_chiba_radius=1) for i in range(n)) + res = Parallel(backend='threading',prefer="threads",require='sharedmem',n_jobs=n_treads,verbose=10)(delayed(dtw_path)(barycenter, X[i], global_constraint="sakoe_chiba",sakoe_chiba_radius=1) for i in range(n)) paths, dists = zip(*res) paths = list(paths) dists = list(dists) @@ -271,7 +270,7 @@ def _mm_assignment(X, barycenter, weights, metric_params=None): return list_p_k, cost -def _subgradient_valence_warping(list_p_k, barycenter_size, weights): +def _subgradient_valence_warping(list_p_k, barycenter_size, weights, n_treads): """Compute Valence and Warping matrices from paths. Valence matrices are denoted :math:`V^{(k)}` and Warping matrices are :math:`W^{(k)}` in [1]_. @@ -302,17 +301,17 @@ def create_w_k(p_k, barycenter_size): for i, j in p_k: w_k[i, j] = 1. return w_k - w_k_ones = Parallel(backend='threading',prefer='threads',require='sharedmem', n_jobs=15, verbose = True)(delayed(create_w_k)(p_k,barycenter_size) for p_k in list_p_k) + w_k_ones = Parallel(backend='threading',prefer='threads',require='sharedmem', n_jobs=n_treads, verbose = True)(delayed(create_w_k)(p_k,barycenter_size) for p_k in list_p_k) def mult_w_k_weights(w_k, weight): return w_k * weight - list_w_k = Parallel(backend='threading',prefer='threads',require='sharedmem', n_jobs=15, verbose = True)(delayed(mult_w_k_weights)(w_k,weights[k]) for k,w_k in enumerate(w_k_ones)) + list_w_k = Parallel(backend='threading',prefer='threads',require='sharedmem', n_jobs=n_treads, verbose = True)(delayed(mult_w_k_weights)(w_k,weights[k]) for k,w_k in enumerate(w_k_ones)) def mult_w_k_sum_wieghts(w_k, weight): return w_k.sum(axis=1) * weight - list_v_k = Parallel(backend='threading',prefer='threads',require='sharedmem', n_jobs=15, verbose = True)(delayed(mult_w_k_sum_wieghts)(w_k,weight) for (w_k, weight) in zip(w_k_ones, weights)) + list_v_k = Parallel(backend='threading',prefer='threads',require='sharedmem', n_jobs=n_treads, verbose = True)(delayed(mult_w_k_sum_wieghts)(w_k,weight) for (w_k, weight) in zip(w_k_ones, weights)) return list_v_k, list_w_k -def _mm_valence_warping(list_p_k, barycenter_size, weights): +def _mm_valence_warping(list_p_k, barycenter_size, weights, n_treads): """Compute Valence and Warping matrices from paths. Valence matrices are denoted :math:`V^{(k)}` and Warping matrices are :math:`W^{(k)}` in [1]_. @@ -345,12 +344,12 @@ def _mm_valence_warping(list_p_k, barycenter_size, weights): #diag_sum_v_k = numpy.zeros(list_v_k[0].shape) #for v_k in list_v_k: # diag_sum_v_k += v_k - diag_sum_v_k = Parallel(backend='threading',prefer='threads',require='sharedmem', n_jobs=15, verbose = True)(delayed(numpy.sum)(x) for x in zip(*numpy.array(list_v_k))) + diag_sum_v_k = Parallel(backend='threading',prefer='threads',require='sharedmem', n_jobs=n_treads, verbose = True)(delayed(numpy.sum)(x) for x in zip(*numpy.array(list_v_k))) diag_sum_v_k = numpy.array(diag_sum_v_k) return diag_sum_v_k, list_w_k -def _mm_update_barycenter(X, diag_sum_v_k, list_w_k): +def _mm_update_barycenter(X, diag_sum_v_k, list_w_k, n_treads): """Update barycenters using the formula from Algorithm 2 in [1]_. Parameters ---------- @@ -375,8 +374,8 @@ def _mm_update_barycenter(X, diag_sum_v_k, list_w_k): #sum_w_x = numpy.zeros((barycenter_size, d)) #for k, (w_k, x_k) in enumerate(zip(list_w_k, X)): # sum_w_x += w_k.dot(x_k[:ts_size(x_k)]) - sum_w_x = Parallel(backend='threading',prefer='threads',require='sharedmem', n_jobs=15, verbose = True)(delayed(numpy.dot)(w_k,x_k[:ts_size(x_k)]) for (w_k, x_k) in zip(list_w_k, X)) - sum_w_x = Parallel(backend='threading',prefer='threads',require='sharedmem',n_jobs=15, verbose = True)(delayed(numpy.sum)(x) for x in zip(*numpy.array(sum_w_x))) + sum_w_x = Parallel(backend='threading',prefer='threads',require='sharedmem', n_jobs=n_treads, verbose = True)(delayed(numpy.dot)(w_k,x_k[:ts_size(x_k)]) for (w_k, x_k) in zip(list_w_k, X)) + sum_w_x = Parallel(backend='threading',prefer='threads',require='sharedmem',n_jobs=n_treads, verbose = True)(delayed(numpy.sum)(x) for x in zip(*numpy.array(sum_w_x))) sum_w_x = numpy.array(sum_w_x).reshape((len(sum_w_x), 1)) inverse_diag_sum_v_k = numpy.reciprocal(diag_sum_v_k) inverse_diag_sum_v_k = numpy.diag(inverse_diag_sum_v_k) @@ -422,7 +421,7 @@ def _subgradient_update_barycenter(X, list_diag_v_k, list_w_k, weights_sum, def dtw_barycenter_averaging(X, barycenter_size=None, init_barycenter=None, - max_iter=30, tol=1e-5, weights=None, + max_iter=30, tol=1e-5, weights=None, n_treads=15, metric_params=None, verbose=False, n_init=1): """DTW Barycenter Averaging (DBA) method estimated through @@ -530,6 +529,7 @@ def dtw_barycenter_averaging(X, barycenter_size=None, init_barycenter=None, def dtw_barycenter_averaging_one_init(X, barycenter_size=None, init_barycenter=None, max_iter=30, tol=1e-5, weights=None, + n_treads=15, metric_params=None, verbose=False): """DTW Barycenter Averaging (DBA) method estimated through @@ -591,12 +591,12 @@ def dtw_barycenter_averaging_one_init(X, barycenter_size=None, barycenter = init_barycenter cost_prev, cost = numpy.inf, numpy.inf for it in range(max_iter): - list_p_k, cost = _mm_assignment(X, barycenter, weights, metric_params) + list_p_k, cost = _mm_assignment(X, barycenter, weights, n_treads, metric_params) diag_sum_v_k, list_w_k = _mm_valence_warping(list_p_k, barycenter_size, - weights) + weights, n_treads) if verbose: print("[DBA] epoch %d, cost: %.3f" % (it + 1, cost)) - barycenter = _mm_update_barycenter(X, diag_sum_v_k, list_w_k) + barycenter = _mm_update_barycenter(X, diag_sum_v_k, list_w_k, n_treads) if abs(cost_prev - cost) < tol: break elif cost_prev < cost: From 6e2ce6b1515683331e376f48f57ba62e5b41ce6d Mon Sep 17 00:00:00 2001 From: Romain Tavenard Date: Tue, 5 Jan 2021 12:09:05 +0100 Subject: [PATCH 3/3] Merge branch 'master' into parallel_time_series_k_means # Conflicts: # tslearn/barycenters/dba.py # tslearn/clustering/kmeans.py # tslearn/metrics/dtw_variants.py Co-authored-by: enricofallacara --- tslearn/metrics/utils.py | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/tslearn/metrics/utils.py b/tslearn/metrics/utils.py index ece61c435..630d68be6 100644 --- a/tslearn/metrics/utils.py +++ b/tslearn/metrics/utils.py @@ -1,5 +1,5 @@ import numpy -from joblib import Parallel, delayed +from joblib import Parallel, delayed, parallel_backend from tslearn.utils import to_time_series_dataset __author__ = 'Romain Tavenard romain.tavenard[at]univ-rennes2.fr' @@ -9,26 +9,21 @@ def _cdist_generic(dist_fun, dataset1, dataset2, n_jobs, verbose, compute_diagonal=True, dtype=numpy.float, *args, **kwargs): """Compute cross-similarity matrix with joblib parallelization for a given similarity function. - Parameters ---------- dist_fun : function Similarity function to be used - dataset1 : array-like A dataset of time series - dataset2 : array-like (default: None) Another dataset of time series. If `None`, self-similarity of `dataset1` is returned. - n_jobs : int or None, optional (default=None) The number of jobs to run in parallel. ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. ``-1`` means using all processors. See scikit-learns' `Glossary `__ for more details. - verbose : int, optional (default=0) The verbosity level: if non zero, progress messages are printed. Above 50, the output is sent to stdout. @@ -36,21 +31,17 @@ def _cdist_generic(dist_fun, dataset1, dataset2, n_jobs, verbose, If it more than 10, all iterations are reported. `Glossary `__ for more details. - compute_diagonal : bool (default: True) Whether diagonal terms should be computed or assumed to be 0 in the self-similarity case. Used only if `dataset2` is `None`. - *args and **kwargs : Optional additional parameters to be passed to the similarity function. - - Returns ------- cdist : numpy.ndarray Cross-similarity matrix """ # noqa: E501 - dataset1 = to_time_series_dataset(dataset1, dtype=dtype) + #dataset1 = to_time_series_dataset(dataset1, dtype=dtype) if dataset2 is None: # Inspired from code by @GillesVandewiele: @@ -60,7 +51,7 @@ def _cdist_generic(dist_fun, dataset1, dataset2, n_jobs, verbose, k=0 if compute_diagonal else 1, m=len(dataset1)) matrix[indices] = Parallel(n_jobs=n_jobs, - prefer="threads", + prefer="processes", verbose=verbose)( delayed(dist_fun)( dataset1[i], dataset1[j], @@ -74,12 +65,13 @@ def _cdist_generic(dist_fun, dataset1, dataset2, n_jobs, verbose, matrix[indices] = matrix.T[indices] return matrix else: - dataset2 = to_time_series_dataset(dataset2, dtype=dtype) - matrix = Parallel(n_jobs=n_jobs, prefer="threads", verbose=verbose)( - delayed(dist_fun)( - dataset1[i], dataset2[j], - *args, **kwargs + #dataset2 = to_time_series_dataset(dataset2, dtype=dtype) + with parallel_backend('loky'): + matrix = Parallel(backend='loky', n_jobs=n_jobs, prefer="processes", verbose=verbose)( + delayed(dist_fun)( + dataset1[i], dataset2[j], + *args, **kwargs + ) + for i in range(len(dataset1)) for j in range(len(dataset2)) ) - for i in range(len(dataset1)) for j in range(len(dataset2)) - ) return numpy.array(matrix).reshape((len(dataset1), -1))