@@ -10,12 +10,16 @@ struct CCA{T<:Real} <: RegressionModel
1010 xproj:: Matrix{T} # projection matrix for X, of size (dx, p)
1111 yproj:: Matrix{T} # projection matrix for Y, of size (dy, p)
1212 corrs:: Vector{T} # correlations, of length p
13+ eigs:: Vector{T} # eigenvalues
14+ nobs:: Int64 # number of observations
1315
1416 function CCA (xm:: Vector{T} ,
1517 ym:: Vector{T} ,
1618 xp:: Matrix{T} ,
1719 yp:: Matrix{T} ,
18- crs:: Vector{T} ) where T<: Real
20+ crs:: Vector{T} ,
21+ eigs:: Vector{T} ,
22+ nobs:: Int ) where T<: Real
1923
2024 dx, px = size (xp)
2125 dy, py = size (yp)
@@ -32,7 +36,7 @@ struct CCA{T<:Real} <: RegressionModel
3236 length (crs) == px ||
3337 throw (DimensionMismatch (" Incorrect length of corrs." ))
3438
35- new {T} (xm, ym, xp, yp, crs)
39+ new {T} (xm, ym, xp, yp, crs, eigs, nobs )
3640 end
3741end
3842
@@ -177,7 +181,7 @@ function _ccacov(Cxx, Cyy, Cxy, xmean, ymean, p::Int)
177181 G = cholesky (Cyy) \ Cxy'
178182 Ex = eigen (Symmetric (Cxy * G), Symmetric (Cxx))
179183 ord = sortperm (Ex. values; rev= true )
180- vx , Px = extract_kv (Ex, ord, p)
184+ eigs , Px = extract_kv (Ex, ord, p)
181185 Py = qnormalize! (G * Px, Cyy)
182186 else
183187 # solve Py: (Cyx * inv(Cxx) * Cxy) Py = λ Cyy Py
@@ -186,7 +190,7 @@ function _ccacov(Cxx, Cyy, Cxy, xmean, ymean, p::Int)
186190 H = cholesky (Cxx) \ Cxy
187191 Ey = eigen (Symmetric (Cxy' H), Symmetric (Cyy))
188192 ord = sortperm (Ey. values; rev= true )
189- vy , Py = extract_kv (Ey, ord, p)
193+ eigs , Py = extract_kv (Ey, ord, p)
190194 Px = qnormalize! (H * Py, Cxx)
191195 end
192196
@@ -196,7 +200,7 @@ function _ccacov(Cxx, Cyy, Cxy, xmean, ymean, p::Int)
196200 crs = coldot (Px, Cxy * Py)
197201
198202 # construct CCA model
199- CCA (xmean, ymean, Px, Py, crs)
203+ CCA (xmean, ymean, Px, Py, crs, sqrt .(eigs), - 1 )
200204end
201205
202206"""
@@ -275,7 +279,7 @@ function _ccasvd(Zx::DenseMatrix{T}, Zy::DenseMatrix{T}, xmean::Vector{T}, ymean
275279 crs = rmul! (coldot (Zx' Px, Zy' Py), one (T)/ (n- 1 ))
276280
277281 # construct CCA model
278- CCA (xmean, ymean, Px, Py, crs)
282+ CCA (xmean, ymean, Px, Py, crs, S . S[si], n )
279283end
280284
281285# # interface functions
@@ -336,3 +340,112 @@ function fit(::Type{CCA}, X::AbstractMatrix{T}, Y::AbstractMatrix{T};
336340
337341 return M:: CCA
338342end
343+
344+ abstract type MultivariateTest <: HypothesisTest end
345+
346+ struct WilksLambdaTest <: MultivariateTest
347+ stat:: Float64
348+ fstat:: Float64
349+ df1:: Float64
350+ df2:: Float64
351+ end
352+
353+ struct LawleyHotellingTest <: MultivariateTest
354+ stat:: Float64
355+ fstat:: Float64
356+ df1:: Float64
357+ df2:: Float64
358+ end
359+
360+ struct PillaiTraceTest <: MultivariateTest
361+ stat:: Float64
362+ fstat:: Float64
363+ df1:: Float64
364+ df2:: Float64
365+ end
366+
367+ function pvalue (ct:: MultivariateTest )
368+ return ccdf (FDist (ct. df1, ct. df2), ct. fstat)
369+ end
370+
371+ function dof (ct:: MultivariateTest )
372+ return (ct. df1, ct. df2)
373+ end
374+
375+ function _testprep (cca:: CCA , n, k)
376+
377+ r = cca. eigs[k: end ]
378+ dx = length (cca. xmean)
379+ dy = length (cca. ymean)
380+ if isnothing (n) && cca. nobs == - 1
381+ throw (ArgumentError (" If CCA was fit using :cov, n must be provided to tests" ))
382+ end
383+ if n != - 1 && cca. nobs != - 1 && cca. nobs != n
384+ throw (" Provided n is different from actual n" )
385+ end
386+ n = n == - 1 ? cca. nobs : n
387+
388+ p = dx - k + 1
389+ q = dy - k + 1
390+ n = n - k + 1
391+
392+ m = (abs (p - q) - 1 ) / 2
393+ N = (n - p - q - 2 ) / 2
394+ s = min (p, q)
395+
396+ return r, s, m, N, n, dx, dy, p, q
397+ end
398+
399+ """
400+ WilksLambdaTest(cca; n=-1, k=1)
401+
402+ Use Wilks Lambda to test the dimension of a CCA. The null hypothesis of
403+ the test is that canonical correlations k, k+1, ... are zero. If the
404+ CCA was fit with a covariance matrix then the sample size n must be provided.
405+ """
406+ function WilksLambdaTest (cca:: CCA ; n= - 1 , k= 1 )
407+
408+ # Reference: Rencher and Christensen (2012)
409+
410+ r, s, m, N, n, dx, dy, p, q = _testprep (cca, n, k)
411+ stat = prod (1 .- r.^ 2 )
412+ w = n - (p + q + 3 ) / 2
413+ t = p* q == 2 ? 1.0 : sqrt ((p^ 2 * q^ 2 - 4 ) / (p^ 2 + q^ 2 - 5 ))
414+ df1 = p* q
415+ df2 = w* t - p* q/ 2 + 1
416+ fstat = ((1 - stat^ (1 / t)) / stat^ (1 / t)) * (df2 / df1)
417+ return WilksLambdaTest (stat, fstat, df1, df2)
418+ end
419+
420+ """
421+ PillaiTraceTest(cca; n=-1, k=1)
422+
423+ Use Pillai's trace to test the dimension of a CCA. The null hypothesis of
424+ the test is that canonical correlations k, k+1, ... are zero. If the
425+ CCA was fit with a covariance matrix then the sample size n must be provided.
426+ """
427+ function PillaiTraceTest (cca:: CCA ; n= - 1 , k= 1 )
428+ r, s, m, N, n, dx, dy, p, q = _testprep (cca, n, k)
429+ stat = sum (abs2, r)
430+ fstat = (2 * N + s + 1 )* stat / ((2 * m + s + 1 ) * (s - stat))
431+ df1 = s* (2 * m + s + 1 )
432+ df2 = s* (2 * N + s + 1 )
433+ return PillaiTraceTest (stat, fstat, df1, df2)
434+ end
435+
436+ """
437+ LawleyHotellingTest(cca; n=-1, k=1)
438+
439+ Use the Lawley Hotelling statistics to test the dimension of a CCA. The
440+ null hypothesis of the test is that canonical correlations k, k+1, ... are
441+ zero. If the CCA was fit with a covariance matrix then the sample size n
442+ must be provided.
443+ """
444+ function LawleyHotellingTest (cca:: CCA ; n= - 1 , k= 1 )
445+ r, s, m, N, n, dx, dy, p, q = _testprep (cca, n, k)
446+ stat = sum (r.^ 2 ./ (1 .- r.^ 2 ))
447+ fstat = 2 * (s* N + 1 ) * stat / (s^ 2 * (2 * m + s + 1 ))
448+ df1 = s* (2 * m + s + 1 )
449+ df2 = 2 * (s* N + 1 )
450+ return LawleyHotellingTest (stat, fstat, df1, df2)
451+ end
0 commit comments