From 9b9bf6dfbea8a1a51826b1e04e6bea7873ebdd2e Mon Sep 17 00:00:00 2001
From: Joel Dierkes <dierkesjoel@gmail.com>
Date: Wed, 18 Sep 2024 15:51:48 +0200
Subject: [PATCH] Add the option to ignore `ConvergenceExceptions` (#233)

The result of a non-converged model might still be interesting. However, since
`ConvergenceExceptions` are thrown these partially fitted models are discarded.

This commit adds a `omit_convergence_exception` argument to all relevant
functions that allows the user to omit such exceptions and return the partially
trained model. The exception is still thrown in the default case to preserve
backwards compatibility.
---
 src/ica.jl   | 14 +++++++++++---
 src/mmds.jl  |  9 +++++++--
 src/ppca.jl  | 46 +++++++++++++++++++++++++++++++++++++---------
 test/ica.jl  |  1 +
 test/ppca.jl |  2 ++
 5 files changed, 58 insertions(+), 14 deletions(-)

diff --git a/src/ica.jl b/src/ica.jl
index 123f375..0475232 100644
--- a/src/ica.jl
+++ b/src/ica.jl
@@ -107,6 +107,7 @@ Invoke the Fast ICA algorithm[^1].
 - `fun`: The approximate neg-entropy functor of type [`ICAGDeriv`](@ref).
 - `maxiter`: Maximum number of iterations.
 - `tol`: Tolerable change of `W` at convergence.
+- `omit_convergence_exception`: Whether to omit an exception if the function did not converge.
 
 Returns the updated `W`.
 
@@ -116,7 +117,8 @@ function fastica!(W::DenseMatrix{T},         # initialized component matrix, siz
                   X::DenseMatrix{T},         # (whitened) observation sample matrix, size(m, n)
                   fun::ICAGDeriv,            # approximate neg-entropy functor
                   maxiter::Int,              # maximum number of iterations
-                  tol::Real) where {T<:Real} # convergence tolerance
+                  tol::Real,                 # convergence tolerance
+                  omit_convergence_exception::Bool) where {T<:Real}
 
     # argument checking
     m = size(W, 1)
@@ -173,7 +175,11 @@ function fastica!(W::DenseMatrix{T},         # initialized component matrix, siz
 
         @debug "Iteration $t" change=chg tolerance=tol
     end
-    converged || throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
+
+    if !omit_convergence_exception && !converged
+        throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
+    end
+
     return W
 end
 
@@ -194,6 +200,7 @@ while each column corresponds to an observation (*e.g* all signal value at a par
 - `do_whiten`: Whether to perform pre-whitening (*default* `true`)
 - `maxiter`: Maximum number of iterations (*default* `100`)
 - `tol`: Tolerable change of ``W`` at convergence (*default* `1.0e-6`)
+- `omit_convergence_exception`: Whether to omit an exception if the function did not converge (*default* `false`).
 - `mean`: The mean vector, which can be either of:
     - `0`: the input data has already been centralized
     - `nothing`: this function will compute the mean (*default*)
@@ -216,6 +223,7 @@ function fit(::Type{ICA}, X::AbstractMatrix{T},# sample matrix, size (m, n)
              do_whiten::Bool=true,             # whether to perform pre-whitening
              maxiter::Integer=100,             # maximum number of iterations
              tol::Real=1.0e-6,                 # convergence tolerance
+             omit_convergence_exception::Bool=false,
              mean=nothing,                     # pre-computed mean
              winit::Matrix{T}=zeros(T,0,0)     # init guess of W, size (m, k)
             ) where {T<:Real}
@@ -247,7 +255,7 @@ function fit(::Type{ICA}, X::AbstractMatrix{T},# sample matrix, size (m, n)
     W = (isempty(winit) ? randn(T, size(Z,1), k) : copy(winit))
 
     # invoke core algorithm
-    fastica!(W, Z, fun, maxiter, tol)
+    fastica!(W, Z, fun, maxiter, tol, omit_convergence_exception)
 
     # construct model
     if do_whiten
diff --git a/src/mmds.jl b/src/mmds.jl
index aa6ab37..20386df 100644
--- a/src/mmds.jl
+++ b/src/mmds.jl
@@ -113,6 +113,7 @@ Let `(d, n) = size(X)` be respectively the input dimension and the number of obs
     - any two parameter disparity transformation function, where the first parameter is a vector of proximities (i.e. dissimilarities) and the second parameter is a vector of distances, e.g. `(p,d)->b*p` for some `b` is a transformation function for *ratio* MDS.
 - `tol`: Convergence tolerance (*default* `1.0e-3`)
 - `maxiter`: Maximum number of iterations (*default* `300`)
+- `omit_convergence_exception`: Whether to omit an exception if the function did not converge (*default* `false`).
 - `initial`: an initial reduced space point configuration
     - `nothing`: then an initial configuration is randomly generated (*default*)
     - pre-defined matrix
@@ -129,7 +130,8 @@ function fit(::Type{MetricMDS}, X::AbstractMatrix{T};
              maxiter::Int = 300,
              initial::Union{Nothing,AbstractMatrix{<:Real}} = nothing,
              weights::Union{Nothing,AbstractMatrix{<:Real}} = nothing,
-             distances::Bool) where {T<:Real}
+             distances::Bool,
+             omit_convergence_exception::Bool = false) where {T<:Real}
 
     # get distance matrix and space dimension
     Δ, d = if !distances
@@ -204,7 +206,10 @@ function fit(::Type{MetricMDS}, X::AbstractMatrix{T};
         σ′ = σ
         i += 1
     end
-    converged || throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
+
+    if !omit_convergence_exception && !converged
+        throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
+    end
 
     MetricMDS(d, Z, σ′)
 end
diff --git a/src/ppca.jl b/src/ppca.jl
index 8400890..3a233ad 100644
--- a/src/ppca.jl
+++ b/src/ppca.jl
@@ -153,12 +153,14 @@ or an empty vector indicating a zero mean.
 
 Returns the resultant [`PPCA`](@ref) model.
 
-**Note:** This function accepts three keyword arguments: `maxoutdim`, `tol`, and `maxiter`.
+**Note:** This function accepts four keyword arguments: `maxoutdim`, `tol`,
+`maxiter` and `omit_convergence_exception`.
 """
 function ppcaem(S::AbstractMatrix{T}, mean::Vector{T}, n::Int;
                 maxoutdim::Int=size(S,1)-1,
                 tol::Real=1.0e-6,   # convergence tolerance
-                maxiter::Integer=1000) where {T<:Real}
+                maxiter::Integer=1000,
+                omit_convergence_exception::Bool = false) where {T<:Real}
 
     check_pcaparams(size(S,1), mean, maxoutdim, 1.)
 
@@ -197,7 +199,10 @@ function ppcaem(S::AbstractMatrix{T}, mean::Vector{T}, n::Int;
         L_old = L
         i += 1
     end
-    converged || throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
+
+    if !omit_convergence_exception && !converged
+        throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
+    end
 
     return PPCA(mean, W, σ²)
 end
@@ -217,14 +222,16 @@ or an empty vector indicating a zero mean.
 Returns the resultant [`PPCA`](@ref) model.
 
 **Notes:**
-- This function accepts three keyword arguments: `maxoutdim`, `tol`, and `maxiter`.
+- **Note:** This function accepts four keyword arguments: `maxoutdim`, `tol`,
+`maxiter` and `omit_convergence_exception`.
 - Function uses the `maxoutdim` parameter as an upper boundary when it automatically
 determines the latent space dimensionality.
 """
 function bayespca(S::AbstractMatrix{T}, mean::Vector{T}, n::Int;
                  maxoutdim::Int=size(S,1)-1,
                  tol::Real=1.0e-6,   # convergence tolerance
-                 maxiter::Integer=1000) where {T<:Real}
+                 maxiter::Integer=1000,
+                omit_convergence_exception::Bool = false) where {T<:Real}
 
     check_pcaparams(size(S,1), mean, maxoutdim, 1.)
 
@@ -271,7 +278,10 @@ function bayespca(S::AbstractMatrix{T}, mean::Vector{T}, n::Int;
         L_old = L
         i += 1
     end
-    converged || throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
+
+    if !omit_convergence_exception && !converged
+        throw(ConvergenceException(maxiter, chg, oftype(chg, tol)))
+    end
 
     return PPCA(mean, W[:,wnorm .> 0.], σ²)
 end
@@ -299,6 +309,7 @@ Let `(d, n) = size(X)` be respectively the input dimension and the number of obs
     - a pre-computed mean vector
 - `tol`: Convergence tolerance (*default* `1.0e-6`)
 - `maxiter`: Maximum number of iterations (*default* `1000`)
+- `omit_convergence_exception`: Whether to omit an exception if the function did not converge (*default* `false`).
 
 **Notes:** This function calls [`ppcaml`](@ref), [`ppcaem`](@ref) or
 [`bayespca`](@ref) internally, depending on the choice of method.
@@ -308,7 +319,8 @@ function fit(::Type{PPCA}, X::AbstractMatrix{T};
              maxoutdim::Int=size(X,1)-1,
              mean=nothing,
              tol::Real=1.0e-6,   # convergence tolerance
-             maxiter::Integer=1000) where {T<:Real}
+             maxiter::Integer=1000,
+                omit_convergence_exception::Bool = false) where {T<:Real}
 
     @assert !SparseArrays.issparse(X) "Use Kernel PCA for sparse arrays"
 
@@ -326,9 +338,25 @@ function fit(::Type{PPCA}, X::AbstractMatrix{T};
     elseif method == :em || method == :bayes
         S = covm(X, isempty(mv) ? 0 : mv, 2)
         if method == :em
-            M = ppcaem(S, mv, n, maxoutdim=maxoutdim, tol=tol, maxiter=maxiter)
+            M = ppcaem(
+                S,
+                mv,
+                n,
+                maxoutdim=maxoutdim,
+                tol=tol,
+                maxiter=maxiter,
+                omit_convergence_exception=omit_convergence_exception
+            )
         elseif method == :bayes
-            M = bayespca(S, mv, n, maxoutdim=maxoutdim, tol=tol, maxiter=maxiter)
+            M = bayespca(
+                S,
+                mv,
+                n,
+                maxoutdim=maxoutdim,
+                tol=tol,
+                maxiter=maxiter,
+                omit_convergence_exception=omit_convergence_exception
+            )
         end
     else
         throw(ArgumentError("Invalid method name $(method)"))
diff --git a/test/ica.jl b/test/ica.jl
index 78fee24..1200da4 100644
--- a/test/ica.jl
+++ b/test/ica.jl
@@ -85,6 +85,7 @@ using StatsBase: ConvergenceException
         @test W'C * W ≈ Matrix(I, k, k)
 
         @test_throws ConvergenceException fit(ICA, X, k; do_whiten=true, tol=1e-8, maxiter=2)
+        _ = fit(ICA, X, k; do_whiten=true, tol=1e-8, maxiter=2, omit_convergence_exception=true)
 
         # Use data of different type
         XX = convert(Matrix{Float32}, X)
diff --git a/test/ppca.jl b/test/ppca.jl
index e2d9532..6c9587a 100644
--- a/test/ppca.jl
+++ b/test/ppca.jl
@@ -113,6 +113,7 @@ import StatsBase
     @test P'P ≈ Matrix(I, 3, 3)
 
     @test_throws StatsBase.ConvergenceException fit(PPCA, X; method=:em, maxiter=1)
+    _ = fit(PPCA, X; method=:em, maxiter=1, omit_convergence_exception=true)
 
     # bayespca
     M0 = fit(PCA, X; mean=mval, maxoutdim = 3)
@@ -139,6 +140,7 @@ import StatsBase
     @test P'P ≈ Matrix(I, 2, 2)
 
     @test_throws StatsBase.ConvergenceException fit(PPCA, X; method=:bayes, maxiter=1)
+    _ = fit(PPCA, X; method=:bayes, maxiter=1, omit_convergence_exception=true)
 
     # Different data types
     # --------------------