77:organization: ETS
88"""
99import numpy as np
10+ import warnings
11+ from scipy .linalg import cholesky
1012
1113
12- def cov (x ):
14+ def inv_chol (x , logdet = False ):
15+ """
16+ Calculate inverse using cholesky.
17+ Optionally, calculate the log determinant
18+ of the cholesky.
19+
20+ Parameters
21+ ----------
22+ x : array-like
23+ The matrix to invert.
24+ logdet : bool, optional
25+ Whether to calculate the
26+ log determinant, instead of
27+ the inverse.
28+ Defaults to False.
29+
30+ Returns
31+ -------
32+ chol_inv : array-like
33+ The inverted matrix
34+ chol_logdet : array-like or None
35+ The log determinant, if `logdet=True`;
36+ otherwise, None.
37+ """
38+ chol = cholesky (x , lower = True )
39+
40+ chol_inv = np .linalg .inv (chol )
41+ chol_inv = np .dot (chol_inv .T , chol_inv )
42+ chol_logdet = None
43+
44+ if logdet :
45+ chol_diag = np .diag (chol )
46+ chol_logdet = np .sum (np .log (chol_diag * chol_diag ))
47+
48+ return chol_inv , chol_logdet
49+
50+
51+ def cov (x , ddof = 0 ):
1352 """
1453 Calculate the covariance matrix.
1554
@@ -19,13 +58,17 @@ def cov(x):
1958 A 1-D or 2-D array containing multiple variables
2059 and observations. Each column of x represents a variable,
2160 and each row a single observation of all those variables.
61+ ddof : int, optional
62+ Means Delta Degrees of Freedom. The divisor used in calculations
63+ is N - ddof, where N represents the number of elements.
64+ Defaults to 0.
2265
2366 Returns
2467 -------
2568 r : numpy array
2669 The covariance matrix of the variables.
2770 """
28- r = np .cov (x , rowvar = False , ddof = 0 )
71+ r = np .cov (x , rowvar = False , ddof = ddof )
2972 return r
3073
3174
@@ -185,20 +228,30 @@ def partial_correlations(x):
185228 variables.
186229 """
187230 numrows , numcols = x .shape
188- x_cov = cov (x )
231+ x_cov = cov (x , ddof = 1 )
189232 # create empty array for when we cannot compute the
190233 # matrix inversion
191234 empty_array = np .empty ((numcols , numcols ))
192235 empty_array [:] = np .nan
193236 if numcols > numrows :
194237 icvx = empty_array
195238 else :
196- # we also return nans if there is singularity in the data
197- # (e.g. all human scores are the same)
239+ # if the determinant is less than the lowest representable
240+ # 32 bit integer, then we use the pseudo-inverse;
241+ # otherwise, use the inverse; if a linear algebra error
242+ # occurs, then we just set the matrix to empty
198243 try :
244+ assert np .linalg .det (x_cov ) > np .finfo (np .float32 ).eps
199245 icvx = np .linalg .inv (x_cov )
246+ except AssertionError :
247+ icvx = np .linalg .pinv (x_cov )
248+ warnings .warn ('The inverse of the variance-covariance matrix '
249+ 'was calculated using the Moore-Penrose generalized '
250+ 'matrix inversion, due to its determinant being at '
251+ 'or very close to zero.' )
200252 except np .linalg .LinAlgError :
201253 icvx = empty_array
254+
202255 pcor = - 1 * covariance_to_correlation (icvx )
203256 np .fill_diagonal (pcor , 1.0 )
204257 return pcor
0 commit comments