From 923ca2f2c0a55cd0d71958cdfd736153b13677cc Mon Sep 17 00:00:00 2001 From: Tokazama Date: Sun, 7 Apr 2019 14:38:55 -0400 Subject: [PATCH 01/11] Additional `show` info for PCA This arose from https://discourse.julialang.org/t/pca-output/22687/10. Main contribution is pattern matrix with features x components. This should probably be refined but not sure what to do without DataFrames dependency. --- src/pca.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/pca.jl b/src/pca.jl index d3e75ba..ab64136 100644 --- a/src/pca.jl +++ b/src/pca.jl @@ -50,6 +50,18 @@ reconstruct(M::PCA{T}, y::AbstractVecOrMat{T}) where {T<:Real} = decentralize(M. function show(io::IO, M::PCA) print(io, "PCA(indim = $(indim(M)), outdim = $(outdim(M)), principalratio = $(principalratio(M)))") + + ldgs = projection(M) * diagm(0 => sqrt.(M.prinvars)) + rot = diag(ldgs' * ldgs) + ldgs = ldgs[:,sortperm(rot, rev=true)] + ldgs_signs = sign.(sum(ldgs, dims=1)) + ldgs_signs[ldgs_signs .== 0] .= 1 + ldgs = ldgs * diagm(0 => ldgs_signs[:]) + print(io, "\nPattern matrix\n") + display(ldgs) + print(io, "\nLoadings $(principalvars(M))\n") + print(io, "Proportion explained $(principalvars(M) ./ M.tvar)\n") + print(io, "Cumulative proportion $(cumsum(principalvars(M) ./M.tvar))\n") end function dump(io::IO, M::PCA) From 5c9b6ddcdfe238fe3ac982388a8c5a3b5435cb5a Mon Sep 17 00:00:00 2001 From: Tokazama Date: Wed, 4 Sep 2019 06:44:16 -0400 Subject: [PATCH 02/11] Add show method --- src/pca.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/pca.jl b/src/pca.jl index ab64136..5348d7f 100644 --- a/src/pca.jl +++ b/src/pca.jl @@ -47,10 +47,12 @@ transform(M::PCA{T}, x::AbstractVecOrMat{T}) where {T<:Real} = transpose(M.proj) reconstruct(M::PCA{T}, y::AbstractVecOrMat{T}) where {T<:Real} = decentralize(M.proj * y, M.mean) ## show & dump - function show(io::IO, M::PCA) print(io, "PCA(indim = $(indim(M)), outdim = $(outdim(M)), principalratio = $(principalratio(M)))") +end +function show(io::IO, ::MIME"text/plain", M::PCA) + print(io, "PCA(indim = $(indim(M)), outdim = $(outdim(M)), principalratio = $(principalratio(M)))") ldgs = projection(M) * diagm(0 => sqrt.(M.prinvars)) rot = diag(ldgs' * ldgs) ldgs = ldgs[:,sortperm(rot, rev=true)] From 6540e8ffbd6d5acb69b35e3e458b8288569a0080 Mon Sep 17 00:00:00 2001 From: Zachary Christensen Date: Wed, 4 Sep 2019 14:53:28 -0400 Subject: [PATCH 03/11] Added CoefTable for pretty printing. --- src/MultivariateStats.jl | 2 +- src/pca.jl | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/MultivariateStats.jl b/src/MultivariateStats.jl index 7bcb5f7..d7319e8 100644 --- a/src/MultivariateStats.jl +++ b/src/MultivariateStats.jl @@ -3,7 +3,7 @@ module MultivariateStats using StatsBase: SimpleCovariance, CovarianceEstimator import Statistics: mean, var, cov, covm import Base: length, size, show, dump - import StatsBase: fit, predict, ConvergenceException + import StatsBase: fit, predict, ConvergenceException, CoefTable import SparseArrays export diff --git a/src/pca.jl b/src/pca.jl index 5348d7f..7a2a749 100644 --- a/src/pca.jl +++ b/src/pca.jl @@ -59,11 +59,13 @@ function show(io::IO, ::MIME"text/plain", M::PCA) ldgs_signs = sign.(sum(ldgs, dims=1)) ldgs_signs[ldgs_signs .== 0] .= 1 ldgs = ldgs * diagm(0 => ldgs_signs[:]) - print(io, "\nPattern matrix\n") + print(io, "\n\nPattern matrix\n") display(ldgs) - print(io, "\nLoadings $(principalvars(M))\n") - print(io, "Proportion explained $(principalvars(M) ./ M.tvar)\n") - print(io, "Cumulative proportion $(cumsum(principalvars(M) ./M.tvar))\n") + print(io, "\n") + print(io, "Importance of components:\n") + print(io, CoefTable(hcat(principalvars(M), principalvars(M) ./ M.tvar, cumsum(principalvars(M) ./M.tvar)), + string.("PC", 1:length(principalvars(M))), # components in order + ["Loadings", "Proportion explained", "Cumulative proportion"])) # row names end function dump(io::IO, M::PCA) From 78c24ae5fd002509bfcc8c8b4de82073b7ab9bde Mon Sep 17 00:00:00 2001 From: Zachary Christensen Date: Wed, 4 Sep 2019 15:41:11 -0400 Subject: [PATCH 04/11] Fixed row orientation --- src/pca.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pca.jl b/src/pca.jl index 7a2a749..6fe4d69 100644 --- a/src/pca.jl +++ b/src/pca.jl @@ -63,7 +63,7 @@ function show(io::IO, ::MIME"text/plain", M::PCA) display(ldgs) print(io, "\n") print(io, "Importance of components:\n") - print(io, CoefTable(hcat(principalvars(M), principalvars(M) ./ M.tvar, cumsum(principalvars(M) ./M.tvar)), + print(io, CoefTable(vcat(principalvars(M)', (principalvars(M) ./ tvar(M))', (cumsum(principalvars(M) ./tvar(M)))'), string.("PC", 1:length(principalvars(M))), # components in order ["Loadings", "Proportion explained", "Cumulative proportion"])) # row names end From d313dd4d7cb4f00093052b1c04ac3036259a63bb Mon Sep 17 00:00:00 2001 From: Zachary P Christensen Date: Thu, 5 Sep 2019 11:06:41 -0400 Subject: [PATCH 05/11] Update src/pca.jl Co-Authored-By: Milan Bouchet-Valat --- src/pca.jl | 1 + 1 file changed, 1 insertion(+) diff --git a/src/pca.jl b/src/pca.jl index 6fe4d69..901baf1 100644 --- a/src/pca.jl +++ b/src/pca.jl @@ -66,6 +66,7 @@ function show(io::IO, ::MIME"text/plain", M::PCA) print(io, CoefTable(vcat(principalvars(M)', (principalvars(M) ./ tvar(M))', (cumsum(principalvars(M) ./tvar(M)))'), string.("PC", 1:length(principalvars(M))), # components in order ["Loadings", "Proportion explained", "Cumulative proportion"])) # row names + return nothing end function dump(io::IO, M::PCA) From 2cb0251aebc25c379eb1f3602271300d2747cfac Mon Sep 17 00:00:00 2001 From: Zachary P Christensen Date: Thu, 5 Sep 2019 11:07:53 -0400 Subject: [PATCH 06/11] Update src/pca.jl Co-Authored-By: Milan Bouchet-Valat --- src/pca.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pca.jl b/src/pca.jl index 901baf1..5c04f74 100644 --- a/src/pca.jl +++ b/src/pca.jl @@ -57,7 +57,7 @@ function show(io::IO, ::MIME"text/plain", M::PCA) rot = diag(ldgs' * ldgs) ldgs = ldgs[:,sortperm(rot, rev=true)] ldgs_signs = sign.(sum(ldgs, dims=1)) - ldgs_signs[ldgs_signs .== 0] .= 1 + replace!(ldgs_signs, 0=>1) ldgs = ldgs * diagm(0 => ldgs_signs[:]) print(io, "\n\nPattern matrix\n") display(ldgs) From e31458a34b071605212364462ef0cbcb3c57a09c Mon Sep 17 00:00:00 2001 From: Zachary P Christensen Date: Thu, 5 Sep 2019 11:08:03 -0400 Subject: [PATCH 07/11] Update src/pca.jl Co-Authored-By: Milan Bouchet-Valat --- src/pca.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pca.jl b/src/pca.jl index 5c04f74..d4a5b27 100644 --- a/src/pca.jl +++ b/src/pca.jl @@ -60,7 +60,7 @@ function show(io::IO, ::MIME"text/plain", M::PCA) replace!(ldgs_signs, 0=>1) ldgs = ldgs * diagm(0 => ldgs_signs[:]) print(io, "\n\nPattern matrix\n") - display(ldgs) + show(io, ldgs) print(io, "\n") print(io, "Importance of components:\n") print(io, CoefTable(vcat(principalvars(M)', (principalvars(M) ./ tvar(M))', (cumsum(principalvars(M) ./tvar(M)))'), From 07331d6b3a6f3d6b6e1a9b92569f4ce907a215e2 Mon Sep 17 00:00:00 2001 From: Zachary P Christensen Date: Thu, 5 Sep 2019 11:08:11 -0400 Subject: [PATCH 08/11] Update src/pca.jl Co-Authored-By: Milan Bouchet-Valat --- src/pca.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pca.jl b/src/pca.jl index d4a5b27..6264a3d 100644 --- a/src/pca.jl +++ b/src/pca.jl @@ -55,7 +55,7 @@ function show(io::IO, ::MIME"text/plain", M::PCA) print(io, "PCA(indim = $(indim(M)), outdim = $(outdim(M)), principalratio = $(principalratio(M)))") ldgs = projection(M) * diagm(0 => sqrt.(M.prinvars)) rot = diag(ldgs' * ldgs) - ldgs = ldgs[:,sortperm(rot, rev=true)] + ldgs = ldgs[:, sortperm(rot, rev=true)] ldgs_signs = sign.(sum(ldgs, dims=1)) replace!(ldgs_signs, 0=>1) ldgs = ldgs * diagm(0 => ldgs_signs[:]) From 63baa5a413348583032cd819a978a9b0e1dbb6cf Mon Sep 17 00:00:00 2001 From: Art Date: Wed, 16 Feb 2022 21:44:29 -0500 Subject: [PATCH 09/11] Updated output tables --- src/pca.jl | 40 ++++++++++++++++------------------------ 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/src/pca.jl b/src/pca.jl index bf8f900..0bd5206 100644 --- a/src/pca.jl +++ b/src/pca.jl @@ -129,38 +129,30 @@ function show(io::IO, M::PCA) end function show(io::IO, ::MIME"text/plain", M::PCA) - print(io, "PCA(indim = $(indim(M)), outdim = $(outdim(M)), principalratio = $(principalratio(M)))") - ldgs = projection(M) * diagm(0 => sqrt.(M.prinvars)) + idim, odim = size(M) + print(io, "PCA(indim = $idim, outdim = $odim, principalratio = $(r2(M)))") + ldgs = loadings(M) rot = diag(ldgs' * ldgs) ldgs = ldgs[:, sortperm(rot, rev=true)] ldgs_signs = sign.(sum(ldgs, dims=1)) replace!(ldgs_signs, 0=>1) ldgs = ldgs * diagm(0 => ldgs_signs[:]) - print(io, "\n\nPattern matrix\n") - show(io, ldgs) - print(io, "\n") + print(io, "\n\nPattern matrix:\n") + cft = CoefTable(ldgs, string.("PC", 1:odim), string.("", 1:idim)) + print(io, cft) + print(io, "\n\n") print(io, "Importance of components:\n") - print(io, CoefTable(vcat(principalvars(M)', (principalvars(M) ./ tvar(M))', (cumsum(principalvars(M) ./tvar(M)))'), - string.("PC", 1:length(principalvars(M))), # components in order - ["Loadings", "Proportion explained", "Cumulative proportion"])) # row names - return nothing + λ = eigvals(M) + prp = λ ./ var(M) + prpv = λ ./ sum(λ) + names = ["SS Loadings (Eigenvalues)", + "Variance explained", "Cumulative variance", + "Proportion explained","Cumulative proportion"] + cft = CoefTable(vcat(λ', prp', cumsum(prp)', prpv', cumsum(prpv)'), + string.("PC", 1:odim), names) + print(io, cft) end -function dump(io::IO, M::PCA) - show(io, M) - println(io) - print(io, "principal vars: ") - printvecln(io, M.prinvars) - println(io, "total var = $(tvar(M))") - println(io, "total principal var = $(tprincipalvar(M))") - println(io, "total residual var = $(tresidualvar(M))") - println(io, "mean:") - printvecln(io, mean(M)) - println(io, "projection:") - printarrln(io, projection(M)) -end - - #### PCA Training ## auxiliary From 15f96d4b179045060cc3ca71cf5dae2138b8b95a Mon Sep 17 00:00:00 2001 From: Zachary P Christensen Date: Thu, 17 Feb 2022 08:54:24 -0500 Subject: [PATCH 10/11] Update src/pca.jl Co-authored-by: Milan Bouchet-Valat --- src/pca.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pca.jl b/src/pca.jl index d830e66..089df38 100644 --- a/src/pca.jl +++ b/src/pca.jl @@ -149,7 +149,7 @@ function show(io::IO, ::MIME"text/plain", M::PCA) ldgs_signs = sign.(sum(ldgs, dims=1)) replace!(ldgs_signs, 0=>1) ldgs = ldgs * diagm(0 => ldgs_signs[:]) - print(io, "\n\nPattern matrix:\n") + print(io, "\n\nPattern matrix (unstandardized loadings):\n") cft = CoefTable(ldgs, string.("PC", 1:odim), string.("", 1:idim)) print(io, cft) print(io, "\n\n") From 6807a4a3d9287233c352b58178e6cf8edc2d618e Mon Sep 17 00:00:00 2001 From: Zachary P Christensen Date: Thu, 17 Feb 2022 08:54:31 -0500 Subject: [PATCH 11/11] Update src/pca.jl Co-authored-by: Milan Bouchet-Valat --- src/pca.jl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pca.jl b/src/pca.jl index 089df38..eb7e63d 100644 --- a/src/pca.jl +++ b/src/pca.jl @@ -159,7 +159,7 @@ function show(io::IO, ::MIME"text/plain", M::PCA) prpv = λ ./ sum(λ) names = ["SS Loadings (Eigenvalues)", "Variance explained", "Cumulative variance", - "Proportion explained","Cumulative proportion"] + "Proportion explained", "Cumulative proportion"] cft = CoefTable(vcat(λ', prp', cumsum(prp)', prpv', cumsum(prpv)'), string.("PC", 1:odim), names) print(io, cft)