Merge pull request #43 from EducationalTestingService/some-minor-updates

jbiggsets · web-flow · commit d039266de9df · 2019-10-16T13:42:44.000-04:00
Some minor updates
diff --git a/.circleci/config.yml b/.circleci/config.yml
@@ -10,9 +10,6 @@ jobs:
     working_directory: ~/repo
     steps:
       - checkout
-      - restore_cache:
-          keys:
-          - deps
 
       - run: rm -rf ~/repo/artifacts
       - run: mkdir ~/repo/artifacts
@@ -22,29 +19,18 @@ jobs:
             wget https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
             chmod +x miniconda.sh
             ./miniconda.sh -b -f
-            ~/miniconda3/bin/conda install --file conda_requirements.txt --yes
-            ~/miniconda3/bin/pip install nose-cov python-coveralls
-
-      - save_cache:
-          paths:
-            - "~/miniconda3/pkgs"
-          key: deps
-
-      # install factor analyzer
-      - run:
-          name: Install factor analyzer
-          command: |
-            ~/miniconda3/bin/pip install -e .
+            export PATH=~/miniconda3/bin:$PATH
+            conda install -c anaconda --yes setuptools
+            conda install python=3.7 --file requirements.txt --yes
+            pip install nose nose-cov python-coveralls
+            pip install -e .
 
       # run all of the tests
       - run:
-          name: Run tests
-          command: ~/miniconda3/bin/nosetests -v tests --with-coverage --cover-package=factor_analyzer --cov-config .coveragerc
-
-      # change to factor analyzer directory and run coveralls
-      - run:
-          name: Run coveralls
-          command: cd ~/repo/factor_analyzer && ~/miniconda3/bin/coveralls
+          name: Run tests and coveralls
+          command: |
+            ~/miniconda3/bin/nosetests -v tests --with-coverage --cover-package=factor_analyzer --cov-config .coveragerc
+            cd ~/repo/factor_analyzer && ~/miniconda3/bin/coveralls
 
       - store_artifacts:
           path:  ~/repo/artifacts
diff --git a/conda_requirements.txt b/conda_requirements.txt
diff --git a/factor_analyzer/rotator.py b/factor_analyzer/rotator.py
@@ -191,9 +191,9 @@ def _oblimin_obj(self, loadings):
                 The value of the criterion for the objective.
         """
         X = np.dot(loadings**2, np.eye(loadings.shape[1]) != 1)
-        if (0 != self.gamma):
+        if (self.gamma != 0):
             p = loadings.shape[0]
-            X = np.diag(1, p) - np.dot(np.zeros((p, p)), X)
+            X = np.diag(np.full(1, p)) - np.dot(np.zeros((p, p)), X)
         gradient = loadings * X
         criterion = np.sum(loadings**2 * X) / 4
         return {'grad': gradient, 'criterion': criterion}
diff --git a/factor_analyzer/test_utils.py b/factor_analyzer/test_utils.py
@@ -340,7 +340,8 @@ def check_rotation(test_name,
                    method,
                    rotation,
                    rel_tol=0,
-                   abs_tol=0.1):
+                   abs_tol=0.1,
+                   **kwargs):
     """
     Check the rotation results.
 
@@ -373,8 +374,8 @@ def check_rotation(test_name,
     r_loading = r_input['loading']
     r_loading = normalize(r_loading, absolute=False)
 
-    rotator = Rotator(method=rotation)
-    rotated_loading = rotator.fit_transform(r_loading, rotation)
+    rotator = Rotator(method=rotation, **kwargs)
+    rotated_loading = rotator.fit_transform(r_loading)
 
     r_output = collect_r_output(test_name, factors, method, rotation,
                                 output_types=['loading'])
diff --git a/factor_analyzer/utils.py b/factor_analyzer/utils.py
@@ -7,9 +7,48 @@
 :organization: ETS
 """
 import numpy as np
+import warnings
+from scipy.linalg import cholesky
 
 
-def cov(x):
+def inv_chol(x, logdet=False):
+    """
+    Calculate inverse using cholesky.
+    Optionally, calculate the log determinant
+    of the cholesky.
+
+    Parameters
+    ----------
+    x : array-like
+        The matrix to invert.
+    logdet : bool, optional
+        Whether to calculate the
+        log determinant, instead of
+        the inverse.
+        Defaults to False.
+
+    Returns
+    -------
+    chol_inv : array-like
+        The inverted matrix
+    chol_logdet : array-like or None
+        The log determinant, if `logdet=True`;
+        otherwise, None.
+    """
+    chol = cholesky(x, lower=True)
+
+    chol_inv = np.linalg.inv(chol)
+    chol_inv = np.dot(chol_inv.T, chol_inv)
+    chol_logdet = None
+
+    if logdet:
+        chol_diag = np.diag(chol)
+        chol_logdet = np.sum(np.log(chol_diag * chol_diag))
+
+    return chol_inv, chol_logdet
+
+
+def cov(x, ddof=0):
     """
     Calculate the covariance matrix.
 
@@ -19,13 +58,17 @@ def cov(x):
         A 1-D or 2-D array containing multiple variables
         and observations. Each column of x represents a variable,
         and each row a single observation of all those variables.
+    ddof : int, optional
+        Means Delta Degrees of Freedom. The divisor used in calculations
+        is N - ddof, where N represents the number of elements.
+        Defaults to 0.
 
     Returns
     -------
     r : numpy array
         The covariance matrix of the variables.
     """
-    r = np.cov(x, rowvar=False, ddof=0)
+    r = np.cov(x, rowvar=False, ddof=ddof)
     return r
 
 
@@ -185,20 +228,30 @@ def partial_correlations(x):
         variables.
     """
     numrows, numcols = x.shape
-    x_cov = cov(x)
+    x_cov = cov(x, ddof=1)
     # create empty array for when we cannot compute the
     # matrix inversion
     empty_array = np.empty((numcols, numcols))
     empty_array[:] = np.nan
     if numcols > numrows:
         icvx = empty_array
     else:
-        # we also return nans if there is singularity in the data
-        # (e.g. all human scores are the same)
+        # if the determinant is less than the lowest representable
+        # 32 bit integer, then we use the pseudo-inverse;
+        # otherwise, use the inverse; if a linear algebra error
+        # occurs, then we just set the matrix to empty
         try:
+            assert np.linalg.det(x_cov) > np.finfo(np.float32).eps
             icvx = np.linalg.inv(x_cov)
+        except AssertionError:
+            icvx = np.linalg.pinv(x_cov)
+            warnings.warn('The inverse of the variance-covariance matrix '
+                          'was calculated using the Moore-Penrose generalized '
+                          'matrix inversion, due to its determinant being at '
+                          'or very close to zero.')
         except np.linalg.LinAlgError:
             icvx = empty_array
+
     pcor = -1 * covariance_to_correlation(icvx)
     np.fill_diagonal(pcor, 1.0)
     return pcor
diff --git a/tests/expected/test07_gamma/loading_uls_none_2_test07_gamma.csv b/tests/expected/test07_gamma/loading_uls_none_2_test07_gamma.csv
@@ -0,0 +1,24 @@
+"","ULS1","ULS2"
+"q01",0.556214446794607,0.12038813487999
+"q02",-0.281822776327459,0.387645428358021
+"q03",-0.60542332172341,0.253798600475957
+"q04",0.606804469688735,0.0934257765516741
+"q05",0.523025792652978,0.0534918898887911
+"q06",0.529725212374218,0.0417559566308573
+"q07",0.660720452123604,-0.00325533411175857
+"q08",0.531132789779371,0.397312113320854
+"q09",-0.266230283279582,0.469334924366135
+"q10",0.404307105368452,0.00582165019885599
+"q11",0.633075237129907,0.262235950578387
+"q12",0.643286181854888,-0.0807440059731107
+"q13",0.647738977613556,0.0396589226602089
+"q14",0.628659441568136,-0.0173462348178553
+"q15",0.561403964204197,0.000270516482592699
+"q16",0.654041340932595,-0.00697912977316184
+"q17",0.628165284655674,0.339069156172524
+"q18",0.678697773243279,-0.0132586489358537
+"q19",-0.398762512423482,0.282518905874988
+"q20",0.40464340977971,-0.152208016747372
+"q21",0.630863031076314,-0.0722141114458285
+"q22",-0.277948597756047,0.2903121776673
+"q23",-0.130382489433764,0.192236993346541
diff --git a/tests/expected/test07_gamma/loading_uls_oblimin_2_test07_gamma.csv b/tests/expected/test07_gamma/loading_uls_oblimin_2_test07_gamma.csv
@@ -0,0 +1,24 @@
+,ULS1,ULS2
+q01,0.5562144467946071,0.12038813487998999
+q02,-0.281822776327459,0.387645428358021
+q03,-0.6054233217234101,0.253798600475957
+q04,0.606804469688735,0.0934257765516741
+q05,0.523025792652978,0.0534918898887911
+q06,0.529725212374218,0.0417559566308573
+q07,0.660720452123604,-0.0032553341117585698
+q08,0.5311327897793711,0.39731211332085403
+q09,-0.266230283279582,0.469334924366135
+q10,0.404307105368452,0.00582165019885599
+q11,0.633075237129907,0.262235950578387
+q12,0.643286181854888,-0.0807440059731107
+q13,0.647738977613556,0.0396589226602089
+q14,0.628659441568136,-0.0173462348178553
+q15,0.561403964204197,0.00027051648259269903
+q16,0.654041340932595,-0.00697912977316184
+q17,0.628165284655674,0.33906915617252403
+q18,0.6786977732432788,-0.0132586489358537
+q19,-0.3987625124234821,0.282518905874988
+q20,0.40464340977971003,-0.15220801674737197
+q21,0.6308630310763139,-0.0722141114458285
+q22,-0.27794859775604697,0.2903121776673
+q23,-0.13038248943376402,0.192236993346541
diff --git a/tests/test_expected_rotator.py b/tests/test_expected_rotator.py
@@ -175,3 +175,15 @@ def test_07_equamax_minres_2_factors():
 
     check = check_rotation(test_name, factors, method, rotation)
     assert check > THRESHOLD
+
+
+def test_07_oblimin_minres_2_factors_gamma():
+
+    test_name = 'test07_gamma'
+    factors = 2
+    method = 'uls'
+    rotation = 'oblimin'
+    gamma = 0.5
+
+    check = check_rotation(test_name, factors, method, rotation, gamma=gamma)
+    assert check > THRESHOLD
diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -233,7 +233,7 @@ def test_partial_correlations_num_columns_greater():
     assert_almost_equal(result, expected.values)
 
 
-def test_partial_correlations_catch_linalgerror():
+def test_partial_correlations_with_zero_det():
 
     # Covariance matrix that will be singular
     data = pd.DataFrame([[10, 10, 10, 10],
@@ -242,13 +242,14 @@ def test_partial_correlations_catch_linalgerror():
                          [20, 20, 20, 20],
                          [11, 11, 11, 11]])
 
-    empty_array = np.empty((4, 4))
-    empty_array[:] = np.nan
-    np.fill_diagonal(empty_array, 1.0)
-
-    expected = pd.DataFrame(empty_array,
-                            columns=[0, 1, 2, 3],
-                            index=[0, 1, 2, 3])
+    expected = [[1.0,
+                 -0.9999999999999998,
+                 -0.9999999999999998,
+                 -0.9999999999999998],
+                [-1.0000000000000004, 1.0, -1.0, -1.0],
+                [-1.0000000000000004, -1.0, 1.0, -1.0],
+                [-1.0000000000000004, -1.0, -1.0, 1.0]]
+    expected = pd.DataFrame(expected)
 
     result = partial_correlations(data)
     assert_almost_equal(result, expected.values)