From 9b9bf6dfbea8a1a51826b1e04e6bea7873ebdd2e Mon Sep 17 00:00:00 2001 From: Joel Dierkes Date: Wed, 18 Sep 2024 15:51:48 +0200 Subject: [PATCH] Add the option to ignore `ConvergenceExceptions` (#233) The result of a non-converged model might still be interesting. However, since `ConvergenceExceptions` are thrown these partially fitted models are discarded. This commit adds a `omit_convergence_exception` argument to all relevant functions that allows the user to omit such exceptions and return the partially trained model. The exception is still thrown in the default case to preserve backwards compatibility. --- src/ica.jl | 14 +++++++++++--- src/mmds.jl | 9 +++++++-- src/ppca.jl | 46 +++++++++++++++++++++++++++++++++++++--------- test/ica.jl | 1 + test/ppca.jl | 2 ++ 5 files changed, 58 insertions(+), 14 deletions(-) diff --git a/src/ica.jl b/src/ica.jl index 123f375..0475232 100644 --- a/src/ica.jl +++ b/src/ica.jl @@ -107,6 +107,7 @@ Invoke the Fast ICA algorithm[^1]. - `fun`: The approximate neg-entropy functor of type [`ICAGDeriv`](@ref). - `maxiter`: Maximum number of iterations. - `tol`: Tolerable change of `W` at convergence. +- `omit_convergence_exception`: Whether to omit an exception if the function did not converge. Returns the updated `W`. @@ -116,7 +117,8 @@ function fastica!(W::DenseMatrix{T}, # initialized component matrix, siz X::DenseMatrix{T}, # (whitened) observation sample matrix, size(m, n) fun::ICAGDeriv, # approximate neg-entropy functor maxiter::Int, # maximum number of iterations - tol::Real) where {T<:Real} # convergence tolerance + tol::Real, # convergence tolerance + omit_convergence_exception::Bool) where {T<:Real} # argument checking m = size(W, 1) @@ -173,7 +175,11 @@ function fastica!(W::DenseMatrix{T}, # initialized component matrix, siz @debug "Iteration $t" change=chg tolerance=tol end - converged || throw(ConvergenceException(maxiter, chg, oftype(chg, tol))) + + if !omit_convergence_exception && !converged + throw(ConvergenceException(maxiter, chg, oftype(chg, tol))) + end + return W end @@ -194,6 +200,7 @@ while each column corresponds to an observation (*e.g* all signal value at a par - `do_whiten`: Whether to perform pre-whitening (*default* `true`) - `maxiter`: Maximum number of iterations (*default* `100`) - `tol`: Tolerable change of ``W`` at convergence (*default* `1.0e-6`) +- `omit_convergence_exception`: Whether to omit an exception if the function did not converge (*default* `false`). - `mean`: The mean vector, which can be either of: - `0`: the input data has already been centralized - `nothing`: this function will compute the mean (*default*) @@ -216,6 +223,7 @@ function fit(::Type{ICA}, X::AbstractMatrix{T},# sample matrix, size (m, n) do_whiten::Bool=true, # whether to perform pre-whitening maxiter::Integer=100, # maximum number of iterations tol::Real=1.0e-6, # convergence tolerance + omit_convergence_exception::Bool=false, mean=nothing, # pre-computed mean winit::Matrix{T}=zeros(T,0,0) # init guess of W, size (m, k) ) where {T<:Real} @@ -247,7 +255,7 @@ function fit(::Type{ICA}, X::AbstractMatrix{T},# sample matrix, size (m, n) W = (isempty(winit) ? randn(T, size(Z,1), k) : copy(winit)) # invoke core algorithm - fastica!(W, Z, fun, maxiter, tol) + fastica!(W, Z, fun, maxiter, tol, omit_convergence_exception) # construct model if do_whiten diff --git a/src/mmds.jl b/src/mmds.jl index aa6ab37..20386df 100644 --- a/src/mmds.jl +++ b/src/mmds.jl @@ -113,6 +113,7 @@ Let `(d, n) = size(X)` be respectively the input dimension and the number of obs - any two parameter disparity transformation function, where the first parameter is a vector of proximities (i.e. dissimilarities) and the second parameter is a vector of distances, e.g. `(p,d)->b*p` for some `b` is a transformation function for *ratio* MDS. - `tol`: Convergence tolerance (*default* `1.0e-3`) - `maxiter`: Maximum number of iterations (*default* `300`) +- `omit_convergence_exception`: Whether to omit an exception if the function did not converge (*default* `false`). - `initial`: an initial reduced space point configuration - `nothing`: then an initial configuration is randomly generated (*default*) - pre-defined matrix @@ -129,7 +130,8 @@ function fit(::Type{MetricMDS}, X::AbstractMatrix{T}; maxiter::Int = 300, initial::Union{Nothing,AbstractMatrix{<:Real}} = nothing, weights::Union{Nothing,AbstractMatrix{<:Real}} = nothing, - distances::Bool) where {T<:Real} + distances::Bool, + omit_convergence_exception::Bool = false) where {T<:Real} # get distance matrix and space dimension Δ, d = if !distances @@ -204,7 +206,10 @@ function fit(::Type{MetricMDS}, X::AbstractMatrix{T}; σ′ = σ i += 1 end - converged || throw(ConvergenceException(maxiter, chg, oftype(chg, tol))) + + if !omit_convergence_exception && !converged + throw(ConvergenceException(maxiter, chg, oftype(chg, tol))) + end MetricMDS(d, Z, σ′) end diff --git a/src/ppca.jl b/src/ppca.jl index 8400890..3a233ad 100644 --- a/src/ppca.jl +++ b/src/ppca.jl @@ -153,12 +153,14 @@ or an empty vector indicating a zero mean. Returns the resultant [`PPCA`](@ref) model. -**Note:** This function accepts three keyword arguments: `maxoutdim`, `tol`, and `maxiter`. +**Note:** This function accepts four keyword arguments: `maxoutdim`, `tol`, +`maxiter` and `omit_convergence_exception`. """ function ppcaem(S::AbstractMatrix{T}, mean::Vector{T}, n::Int; maxoutdim::Int=size(S,1)-1, tol::Real=1.0e-6, # convergence tolerance - maxiter::Integer=1000) where {T<:Real} + maxiter::Integer=1000, + omit_convergence_exception::Bool = false) where {T<:Real} check_pcaparams(size(S,1), mean, maxoutdim, 1.) @@ -197,7 +199,10 @@ function ppcaem(S::AbstractMatrix{T}, mean::Vector{T}, n::Int; L_old = L i += 1 end - converged || throw(ConvergenceException(maxiter, chg, oftype(chg, tol))) + + if !omit_convergence_exception && !converged + throw(ConvergenceException(maxiter, chg, oftype(chg, tol))) + end return PPCA(mean, W, σ²) end @@ -217,14 +222,16 @@ or an empty vector indicating a zero mean. Returns the resultant [`PPCA`](@ref) model. **Notes:** -- This function accepts three keyword arguments: `maxoutdim`, `tol`, and `maxiter`. +- **Note:** This function accepts four keyword arguments: `maxoutdim`, `tol`, +`maxiter` and `omit_convergence_exception`. - Function uses the `maxoutdim` parameter as an upper boundary when it automatically determines the latent space dimensionality. """ function bayespca(S::AbstractMatrix{T}, mean::Vector{T}, n::Int; maxoutdim::Int=size(S,1)-1, tol::Real=1.0e-6, # convergence tolerance - maxiter::Integer=1000) where {T<:Real} + maxiter::Integer=1000, + omit_convergence_exception::Bool = false) where {T<:Real} check_pcaparams(size(S,1), mean, maxoutdim, 1.) @@ -271,7 +278,10 @@ function bayespca(S::AbstractMatrix{T}, mean::Vector{T}, n::Int; L_old = L i += 1 end - converged || throw(ConvergenceException(maxiter, chg, oftype(chg, tol))) + + if !omit_convergence_exception && !converged + throw(ConvergenceException(maxiter, chg, oftype(chg, tol))) + end return PPCA(mean, W[:,wnorm .> 0.], σ²) end @@ -299,6 +309,7 @@ Let `(d, n) = size(X)` be respectively the input dimension and the number of obs - a pre-computed mean vector - `tol`: Convergence tolerance (*default* `1.0e-6`) - `maxiter`: Maximum number of iterations (*default* `1000`) +- `omit_convergence_exception`: Whether to omit an exception if the function did not converge (*default* `false`). **Notes:** This function calls [`ppcaml`](@ref), [`ppcaem`](@ref) or [`bayespca`](@ref) internally, depending on the choice of method. @@ -308,7 +319,8 @@ function fit(::Type{PPCA}, X::AbstractMatrix{T}; maxoutdim::Int=size(X,1)-1, mean=nothing, tol::Real=1.0e-6, # convergence tolerance - maxiter::Integer=1000) where {T<:Real} + maxiter::Integer=1000, + omit_convergence_exception::Bool = false) where {T<:Real} @assert !SparseArrays.issparse(X) "Use Kernel PCA for sparse arrays" @@ -326,9 +338,25 @@ function fit(::Type{PPCA}, X::AbstractMatrix{T}; elseif method == :em || method == :bayes S = covm(X, isempty(mv) ? 0 : mv, 2) if method == :em - M = ppcaem(S, mv, n, maxoutdim=maxoutdim, tol=tol, maxiter=maxiter) + M = ppcaem( + S, + mv, + n, + maxoutdim=maxoutdim, + tol=tol, + maxiter=maxiter, + omit_convergence_exception=omit_convergence_exception + ) elseif method == :bayes - M = bayespca(S, mv, n, maxoutdim=maxoutdim, tol=tol, maxiter=maxiter) + M = bayespca( + S, + mv, + n, + maxoutdim=maxoutdim, + tol=tol, + maxiter=maxiter, + omit_convergence_exception=omit_convergence_exception + ) end else throw(ArgumentError("Invalid method name $(method)")) diff --git a/test/ica.jl b/test/ica.jl index 78fee24..1200da4 100644 --- a/test/ica.jl +++ b/test/ica.jl @@ -85,6 +85,7 @@ using StatsBase: ConvergenceException @test W'C * W ≈ Matrix(I, k, k) @test_throws ConvergenceException fit(ICA, X, k; do_whiten=true, tol=1e-8, maxiter=2) + _ = fit(ICA, X, k; do_whiten=true, tol=1e-8, maxiter=2, omit_convergence_exception=true) # Use data of different type XX = convert(Matrix{Float32}, X) diff --git a/test/ppca.jl b/test/ppca.jl index e2d9532..6c9587a 100644 --- a/test/ppca.jl +++ b/test/ppca.jl @@ -113,6 +113,7 @@ import StatsBase @test P'P ≈ Matrix(I, 3, 3) @test_throws StatsBase.ConvergenceException fit(PPCA, X; method=:em, maxiter=1) + _ = fit(PPCA, X; method=:em, maxiter=1, omit_convergence_exception=true) # bayespca M0 = fit(PCA, X; mean=mval, maxoutdim = 3) @@ -139,6 +140,7 @@ import StatsBase @test P'P ≈ Matrix(I, 2, 2) @test_throws StatsBase.ConvergenceException fit(PPCA, X; method=:bayes, maxiter=1) + _ = fit(PPCA, X; method=:bayes, maxiter=1, omit_convergence_exception=true) # Different data types # --------------------