Merge pull request #23 from leburgel/patch-1

Jutho · web-flow · commit abd80ca9f2f6 · 2025-10-22T16:00:10.000+02:00
Record time at start of every LBFGS iteration
diff --git a/src/OptimKit.jl b/src/OptimKit.jl
@@ -104,6 +104,20 @@ Also see [`GradientDescent`](@ref), [`ConjugateGradient`](@ref), [`LBFGS`](@ref)
 """
 function optimize end
 
+function format_time(t::Float64)
+    if t < 1e-3
+        return @sprintf("%5.1f μs", 1e6*t)
+    elseif t < 1
+        return @sprintf("%5.1f ms", 1e3*t)
+    elseif t < 60
+        return @sprintf("%5.2f s", t)
+    elseif t < 3600
+        return @sprintf("%5.2f m", t / 60)
+    else
+        return @sprintf("%.2f h", t / 3600)
+    end
+end
+
 include("linesearches.jl")
 include("gd.jl")
 include("cg.jl")
diff --git a/src/cg.jl b/src/cg.jl
@@ -98,9 +98,10 @@ function optimize(fg, x, alg::ConjugateGradient;
 
     numiter = 0
     verbosity >= 2 &&
-        @info @sprintf("CG: initializing with f = %.12f, ‖∇f‖ = %.4e", f, normgrad)
+        @info @sprintf("CG: initializing with f = %.12e, ‖∇f‖ = %.4e", f, normgrad)
     local xprev, gprev, Pgprev, ηprev
     while !(_hasconverged || _shouldstop)
+        told = t
         # compute new search direction
         if precondition === _precondition
             Pg = g
@@ -140,6 +141,7 @@ function optimize(fg, x, alg::ConjugateGradient;
         push!(fhistory, f)
         push!(normgradhistory, normgrad)
         t = time() - t₀
+        Δt = t - told
         _hasconverged = hasconverged(x, f, g, normgrad)
         _shouldstop = shouldstop(x, f, g, numfg, numiter, t)
 
@@ -148,8 +150,8 @@ function optimize(fg, x, alg::ConjugateGradient;
             break
         end
         verbosity >= 3 &&
-            @info @sprintf("CG: iter %4d, time %7.2f s: f = %.12f, ‖∇f‖ = %.4e, α = %.2e, β = %.2e, nfg = %d",
-                           numiter, t, f, normgrad, α, β, nfg)
+            @info @sprintf("CG: iter %4d, Δt %s: f = %.12e, ‖∇f‖ = %.4e, α = %.2e, β = %.2e, nfg = %d",
+                           numiter, format_time(Δt), f, normgrad, α, β, nfg)
 
         # transport gprev, ηprev and vectors in Hessian approximation to x
         gprev = transport!(gprev, xprev, ηprev, α, x)
@@ -165,12 +167,12 @@ function optimize(fg, x, alg::ConjugateGradient;
     end
     if _hasconverged
         verbosity >= 2 &&
-            @info @sprintf("CG: converged after %d iterations and time %.2f s: f = %.12f, ‖∇f‖ = %.4e",
-                           numiter, t, f, normgrad)
+            @info @sprintf("CG: converged after %d iterations and time %s: f = %.12e, ‖∇f‖ = %.4e",
+                           numiter, format_time(t), f, normgrad)
     else
         verbosity >= 1 &&
-            @warn @sprintf("CG: not converged to requested tol after %d iterations and time %.2f s: f = %.12f, ‖∇f‖ = %.4e",
-                           numiter, t, f, normgrad)
+            @warn @sprintf("CG: not converged to requested tol after %d iterations and time %s: f = %.12e, ‖∇f‖ = %.4e",
+                           numiter, format_time(t), f, normgrad)
     end
     history = [fhistory normgradhistory]
     return x, f, g, numfg, history
diff --git a/src/gd.jl b/src/gd.jl
@@ -76,8 +76,9 @@ function optimize(fg, x, alg::GradientDescent;
 
     numiter = 0
     verbosity >= 2 &&
-        @info @sprintf("GD: initializing with f = %.12f, ‖∇f‖ = %.4e", f, normgrad)
+        @info @sprintf("GD: initializing with f = %.12e, ‖∇f‖ = %.4e", f, normgrad)
     while !(_hasconverged || _shouldstop)
+        told = t
         # compute new search direction
         Pg = precondition(x, deepcopy(g))
         η = scale!(Pg, -1) # we don't need g or Pg anymore, so we can overwrite it
@@ -97,6 +98,7 @@ function optimize(fg, x, alg::GradientDescent;
         push!(fhistory, f)
         push!(normgradhistory, normgrad)
         t = time() - t₀
+        Δt = t - told
         _hasconverged = hasconverged(x, f, g, normgrad)
         _shouldstop = shouldstop(x, f, g, numfg, numiter, t)
 
@@ -105,20 +107,20 @@ function optimize(fg, x, alg::GradientDescent;
             break
         end
         verbosity >= 3 &&
-            @info @sprintf("GD: iter %4d, time %7.2f s: f = %.12f, ‖∇f‖ = %.4e, α = %.2e, nfg = %d",
-                           numiter, t, f, normgrad, α, nfg)
+            @info @sprintf("GD: iter %4d, Δt %s: f = %.12e, ‖∇f‖ = %.4e, α = %.2e, nfg = %d",
+                           numiter, format_time(Δt), f, normgrad, α, nfg)
 
         # increase α for next step
         α = 2 * α
     end
     if _hasconverged
         verbosity >= 2 &&
-            @info @sprintf("GD: converged after %d iterations and time %.2f s: f = %.12f, ‖∇f‖ = %.4e",
-                           numiter, t, f, normgrad)
+            @info @sprintf("GD: converged after %d iterations and time %s: f = %.12e, ‖∇f‖ = %.4e",
+                           numiter, format_time(t), f, normgrad)
     else
         verbosity >= 1 &&
-            @warn @sprintf("GD: not converged to requested tol after %d iterations and time %.2f s: f = %.12f, ‖∇f‖ = %.4e",
-                           numiter, t, f, normgrad)
+            @warn @sprintf("GD: not converged to requested tol after %d iterations and time %s: f = %.12e, ‖∇f‖ = %.4e",
+                           numiter, format_time(t), f, normgrad)
     end
     history = [fhistory normgradhistory]
     return x, f, g, numfg, history
diff --git a/src/lbfgs.jl b/src/lbfgs.jl
@@ -80,9 +80,10 @@ function optimize(fg, x, alg::LBFGS;
     H = LBFGSInverseHessian(m, TangentType[], TangentType[], ScalarType[])
 
     verbosity >= 2 &&
-        @info @sprintf("LBFGS: initializing with f = %.12f, ‖∇f‖ = %.4e", f, normgrad)
+        @info @sprintf("LBFGS: initializing with f = %.12e, ‖∇f‖ = %.4e", f, normgrad)
 
     while !(_hasconverged || _shouldstop)
+        told = t
         # compute new search direction
         if length(H) > 0
             Hg = let x = x
@@ -117,6 +118,7 @@ function optimize(fg, x, alg::LBFGS;
         push!(fhistory, f)
         push!(normgradhistory, normgrad)
         t = time() - t₀
+        Δt = t - told
         _hasconverged = hasconverged(x, f, g, normgrad)
         _shouldstop = shouldstop(x, f, g, numfg, numiter, t)
 
@@ -125,8 +127,8 @@ function optimize(fg, x, alg::LBFGS;
             break
         end
         verbosity >= 3 &&
-            @info @sprintf("LBFGS: iter %4d, time %7.2f s: f = %.12f, ‖∇f‖ = %.4e, α = %.2e, m = %d, nfg = %d",
-                           numiter, t, f, normgrad, α, length(H), nfg)
+            @info @sprintf("LBFGS: iter %4d, Δt %s: f = %.12e, ‖∇f‖ = %.4e, α = %.2e, m = %d, nfg = %d",
+                           numiter, format_time(Δt), f, normgrad, α, length(H), nfg)
 
         # transport gprev, ηprev and vectors in Hessian approximation to x
         gprev = transport!(gprev, xprev, ηprev, α, x)
@@ -190,12 +192,12 @@ function optimize(fg, x, alg::LBFGS;
     end
     if _hasconverged
         verbosity >= 2 &&
-            @info @sprintf("LBFGS: converged after %d iterations and time %.2f s: f = %.12f, ‖∇f‖ = %.4e",
-                           numiter, t, f, normgrad)
+            @info @sprintf("LBFGS: converged after %d iterations and time %s: f = %.12e, ‖∇f‖ = %.4e",
+                           numiter, format_time(t), f, normgrad)
     else
         verbosity >= 1 &&
-            @warn @sprintf("LBFGS: not converged to requested tol after %d iterations and time %.2f s: f = %.12f, ‖∇f‖ = %.4e",
-                           numiter, t, f, normgrad)
+            @warn @sprintf("LBFGS: not converged to requested tol after %d iterations and time %s: f = %.12e, ‖∇f‖ = %.4e",
+                           numiter, format_time(t), f, normgrad)
     end
     history = [fhistory normgradhistory]
     return x, f, g, numfg, history