From ad8e10f79725d6cee04b802fe3f57fe838bcee5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Miclu=C8=9Ba-C=C3=A2mpeanu?= Date: Wed, 15 Oct 2025 00:03:20 +0300 Subject: [PATCH 01/12] add initial version of OptimizationMadNLP This is a wrapper for https://github.com/MadNLP/MadNLP.jl Co-authored-by: Claude --- lib/OptimizationMadNLP/LICENSE | 21 + lib/OptimizationMadNLP/Project.toml | 37 ++ .../src/OptimizationMadNLP.jl | 380 ++++++++++++++++++ lib/OptimizationMadNLP/test/runtests.jl | 28 ++ 4 files changed, 466 insertions(+) create mode 100644 lib/OptimizationMadNLP/LICENSE create mode 100644 lib/OptimizationMadNLP/Project.toml create mode 100644 lib/OptimizationMadNLP/src/OptimizationMadNLP.jl create mode 100644 lib/OptimizationMadNLP/test/runtests.jl diff --git a/lib/OptimizationMadNLP/LICENSE b/lib/OptimizationMadNLP/LICENSE new file mode 100644 index 000000000..ac2363b14 --- /dev/null +++ b/lib/OptimizationMadNLP/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 Sebastian Micluța-Câmpeanu and contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/lib/OptimizationMadNLP/Project.toml b/lib/OptimizationMadNLP/Project.toml new file mode 100644 index 000000000..8a6f3f7c0 --- /dev/null +++ b/lib/OptimizationMadNLP/Project.toml @@ -0,0 +1,37 @@ +name = "OptimizationMadNLP" +uuid = "5d9c809f-c847-4062-9fba-1793bbfef577" +version = "0.1.0" +authors = ["Sebastian Micluța-Câmpeanu and contributors"] + +[deps] +LinearAlgebra = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" +MadNLP = "2621e9c9-9eb4-46b1-8089-e8c72242dfb6" +NLPModels = "a4795742-8479-5a88-8948-cc11e1c8c1a6" +OptimizationBase = "bca83a33-5cc9-4baa-983d-23429ab6bcbb" +SciMLBase = "0bca4576-84f4-4d90-8ffe-ffa030f20462" +SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" +SymbolicIndexingInterface = "2efcf032-c050-4f8e-a9bb-153293bab1f5" + +[compat] +LinearAlgebra = "1.10.0" +MadNLP = "0.8.12" +ModelingToolkit = "10.23" +NLPModels = "0.21.5" +OptimizationBase = "3" +SciMLBase = "2.90.0" +SparseArrays = "1.10.0" +SymbolicIndexingInterface = "0.3.40" +Zygote = "0.7" +julia = "1.10" + +[extras] +Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78" +Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" +Symbolics = "0c5d862f-8b57-4792-8d23-62f2024744c7" +Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" + +[targets] +test = ["Aqua", "ModelingToolkit", "Random", "ReverseDiff", "Test", "Symbolics", "Zygote"] diff --git a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl new file mode 100644 index 000000000..3fa42a0b7 --- /dev/null +++ b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl @@ -0,0 +1,380 @@ +module OptimizationMadNLP + +using OptimizationBase +using MadNLP +using NLPModels +using SparseArrays + +export MadNLPOptimizer + +struct NLPModelsAdaptor{C, T} <: NLPModels.AbstractNLPModel{T, Vector{T}} + cache::C + meta::NLPModels.NLPModelMeta{T, Vector{T}} + counters::NLPModels.Counters + jac_rows::Vector{Int} + jac_cols::Vector{Int} + jac_buffer::AbstractMatrix{T} + hess_rows::Vector{Int} + hess_cols::Vector{Int} + hess_buffer::AbstractMatrix{T} +end + +function _enumerate_dense_structure(ncon, nvar) + nnz = ncon * nvar + rows = Vector{Int}(undef, nnz) + cols = Vector{Int}(undef, nnz) + idx = 1 + for j in 1:nvar + for i in 1:ncon + rows[idx] = i + cols[idx] = j + idx += 1 + end + end + return rows, cols +end + +function _enumerate_lower_triangle(n) + nnz = div(n * (n + 1), 2) + rows = Vector{Int}(undef, nnz) + cols = Vector{Int}(undef, nnz) + idx = 1 + for j in 1:n + for i in j:n # Only lower triangle + rows[idx] = i + cols[idx] = j + idx += 1 + end + end + return rows, cols +end + +function NLPModelsAdaptor( + cache::C, meta::NLPModels.NLPModelMeta{T, Vector{T}}, counters) where {C, T} + # Extract Jacobian structure once + jac_prototype = cache.f.cons_jac_prototype + + if jac_prototype isa SparseMatrixCSC + jac_rows, jac_cols, _ = findnz(jac_prototype) + jac_buffer = similar(jac_prototype) + elseif jac_prototype isa AbstractMatrix + ncon, nvar = size(jac_prototype) + jac_rows, jac_cols = _enumerate_dense_structure(ncon, nvar) + jac_buffer = similar(jac_prototype) + else + # Fallback: assume dense structure + ncon, nvar = meta.ncon, meta.nvar + jac_rows, jac_cols = _enumerate_dense_structure(ncon, nvar) + jac_buffer = zeros(T, ncon, nvar) + end + + # Extract Hessian structure + hess_proto = something(cache.f.lag_hess_prototype, cache.f.hess_prototype, nothing) + + if !isnothing(hess_proto) && hess_proto isa SparseMatrixCSC + I, J, _ = findnz(hess_proto) + # Keep only lower triangle + lower_mask = I .>= J + hess_rows = I[lower_mask] + hess_cols = J[lower_mask] + hess_buffer = similar(hess_proto) + elseif !isnothing(hess_proto) + # Dense Hessian + n = size(hess_proto, 1) + hess_rows, hess_cols = _enumerate_lower_triangle(n) + hess_buffer = similar(hess_proto) + else + # No prototype - create dense structure + n = meta.nvar + hess_rows, hess_cols = _enumerate_lower_triangle(n) + hess_buffer = zeros(T, n, n) + end + + return NLPModelsAdaptor{C, T}(cache, meta, counters, + jac_rows, jac_cols, jac_buffer, + hess_rows, hess_cols, hess_buffer) +end + +function NLPModels.obj(nlp::NLPModelsAdaptor, x::AbstractVector) + nlp.cache.f(x, nlp.cache.p) +end + +function NLPModels.grad!(nlp::NLPModelsAdaptor, x::AbstractVector, g::AbstractVector) + nlp.cache.f.grad(g, x, nlp.cache.p) +end + +function NLPModels.cons!(nlp::NLPModelsAdaptor, x::AbstractVector, c::AbstractVector) + nlp.cache.f.cons(c, x) +end + +function NLPModels.jac_structure!( + nlp::NLPModelsAdaptor, I::AbstractVector{T}, J::AbstractVector{T}) where {T} + copyto!(I, nlp.jac_rows) + copyto!(J, nlp.jac_cols) + return I, J +end + +function NLPModels.jac_coord!( + nlp::NLPModelsAdaptor, x::AbstractVector, vals::AbstractVector) + # Evaluate Jacobian into preallocated buffer + nlp.cache.f.cons_j(nlp.jac_buffer, x) + + # Extract values in COO order + if nlp.jac_buffer isa SparseMatrixCSC + _, _, v = findnz(nlp.jac_buffer) + copyto!(vals, v) + else + # Dense case: extract in column-major order matching structure + for (idx, (i, j)) in enumerate(zip(nlp.jac_rows, nlp.jac_cols)) + vals[idx] = nlp.jac_buffer[i, j] + end + end + + return vals +end + +function NLPModels.hess_structure!( + nlp::NLPModelsAdaptor, I::AbstractVector{T}, J::AbstractVector{T}) where {T} + copyto!(I, nlp.hess_rows) + copyto!(J, nlp.hess_cols) + return I, J +end + +function NLPModels.hess_coord!( + nlp::NLPModelsAdaptor, x, y, H::AbstractVector; obj_weight = 1.0) + if !isnothing(nlp.cache.f.lag_h) + # Use Lagrangian Hessian directly + nlp.cache.f.lag_h(nlp.hess_buffer, x, obj_weight, y) + else + # Manual computation: objective + constraint Hessians + nlp.cache.f.hess(nlp.hess_buffer, x) + nlp.hess_buffer .*= obj_weight + + if !isnothing(nlp.cache.f.cons_h) + # Add weighted constraint Hessians + cons_hessians = [similar(nlp.hess_buffer) for _ in 1:length(y)] + nlp.cache.f.cons_h(cons_hessians, x) + for (λ, H_cons) in zip(y, cons_hessians) + nlp.hess_buffer .+= λ .* H_cons + end + end + end + + # Extract lower triangle values + for (idx, (i, j)) in enumerate(zip(nlp.hess_rows, nlp.hess_cols)) + H[idx] = nlp.hess_buffer[i, j] + end + + return H +end + +@kwdef struct MadNLPOptimizer{T} + # General options + rethrow_error::Bool = true + disable_garbage_collector::Bool = false + blas_num_threads::Int = 1 + + # Output options + output_file::String = "" + file_print_level::MadNLP.LogLevels = MadNLP.INFO + + # Termination options + acceptable_tol::T = 1e-6 + acceptable_iter::Int = 15 + + # NLP options + jacobian_constant::Bool = false + hessian_constant::Bool = false + hessian_approximation::Type = MadNLP.ExactHessian + + # Barrier + mu_init::T = 1e-1 + + # Additional MadNLP options + additional_options::Dict{Symbol, Any} = Dict{Symbol, Any}() +end + +OptimizationBase.supports_opt_cache_interface(opt::MadNLPOptimizer) = true + +function SciMLBase.requiresgradient(opt::MadNLPOptimizer) + true +end +function SciMLBase.requireshessian(opt::MadNLPOptimizer) + true +end +function SciMLBase.allowsbounds(opt::MadNLPOptimizer) + true +end +function SciMLBase.allowsconstraints(opt::MadNLPOptimizer) + true +end +function SciMLBase.requiresconsjac(opt::MadNLPOptimizer) + true +end +function SciMLBase.requireslagh(opt::MadNLPOptimizer) + true +end + +function SciMLBase.__init(prob::SciMLBase.OptimizationProblem, opt::MadNLPOptimizer; + kwargs...) + return OptimizationCache(prob, opt; kwargs...) +end + +function map_madnlp_status(status::MadNLP.Status) + if status in [ + MadNLP.SOLVE_SUCCEEDED, + MadNLP.SOLVED_TO_ACCEPTABLE_LEVEL, + MadNLP.USER_REQUESTED_STOP + ] + return SciMLBase.ReturnCode.Success + elseif status in [ + MadNLP.INFEASIBLE_PROBLEM_DETECTED, + MadNLP.SEARCH_DIRECTION_BECOMES_TOO_SMALL, + MadNLP.DIVERGING_ITERATES, + MadNLP.RESTORATION_FAILED, + MadNLP.NOT_ENOUGH_DEGREES_OF_FREEDOM + ] + return SciMLBase.ReturnCode.Infeasible + elseif status == MadNLP.MAXIMUM_ITERATIONS_EXCEEDED + return SciMLBase.ReturnCode.MaxIters + elseif status == MadNLP.MAXIMUM_WALLTIME_EXCEEDED + return SciMLBase.ReturnCode.MaxTime + else + # All error codes and invalid numbers + return SciMLBase.ReturnCode.Failure + end +end + +function _get_nnzh(f) + hess_proto = something(f.lag_hess_prototype, f.hess_prototype, nothing) + + if isnothing(hess_proto) + return 0 # Unknown, let NLPModels compute it + elseif hess_proto isa SparseMatrixCSC + # Only count lower triangle + I, J, _ = findnz(hess_proto) + return count(i >= j for (i, j) in zip(I, J)) + else + # Dense: n(n+1)/2 + n = size(hess_proto, 1) + return div(n * (n + 1), 2) + end +end + +function __map_optimizer_args(cache, + opt::MadNLPOptimizer; + maxiters::Union{Number, Nothing} = nothing, + maxtime::Union{Number, Nothing} = nothing, + abstol::Union{Number, Nothing} = nothing, + reltol::Union{Number, Nothing} = nothing, + verbose = false, + progress::Bool = false, + callback = nothing) + nvar = length(cache.u0) + ncon = length(cache.lcons) + + meta = NLPModels.NLPModelMeta( + nvar; + ncon, + nnzj = nnz(cache.f.cons_jac_prototype), + nnzh = _get_nnzh(cache.f), + x0 = cache.u0, + y0 = zeros(eltype(cache.u0), ncon), + lvar = cache.lb, + uvar = cache.ub, + lcon = cache.lcons, + ucon = cache.ucons, + minimize = cache.sense == MinSense + ) + + nlp = NLPModelsAdaptor(cache, meta, NLPModels.Counters()) + + if verbose isa Bool + print_level = verbose ? MadNLP.INFO : MadNLP.WARN + else + print_level = verbose + end + # use MadNLP defaults + tol = isnothing(reltol) ? 1e-8 : reltol + max_iter = isnothing(maxiters) ? 3000 : maxiters + max_wall_time = isnothing(maxtime) ? 1e6 : maxtime + + MadNLP.MadNLPSolver(nlp; + print_level, tol, max_iter, max_wall_time, + opt.rethrow_error, + opt.disable_garbage_collector, + opt.blas_num_threads, + opt.output_file, + opt.file_print_level, + opt.acceptable_tol, + opt.acceptable_iter, + opt.jacobian_constant, + opt.hessian_constant, + opt.hessian_approximation, + opt.mu_init, + opt.additional_options... + ) +end + +function SciMLBase.__solve(cache::OptimizationBase.OptimizationCache{ + F, + RC, + LB, + UB, + LC, + UC, + S, + O, + D, + P, + C +}) where { + F, + RC, + LB, + UB, + LC, + UC, + S, + O <: MadNLPOptimizer, + D, + P, + C +} + maxiters = OptimizationBase._check_and_convert_maxiters(cache.solver_args.maxiters) + maxtime = OptimizationBase._check_and_convert_maxtime(cache.solver_args.maxtime) + + solver = __map_optimizer_args(cache, + cache.opt; + abstol = cache.solver_args.abstol, + reltol = cache.solver_args.reltol, + maxiters = maxiters, + maxtime = maxtime, + verbose = get(cache.solver_args, :verbose, false), + progress = cache.progress, + callback = cache.callback) + + results = MadNLP.solve!(solver) + + # if cache.progress + # # Set progressbar to 1 to finish it + # Base.@logmsg(Base.LogLevel(-1), "", progress = 1, _id = :OptimizationMadNLP) + # end + + stats = OptimizationBase.OptimizationStats(; time = results.counters.total_time, + iterations = results.iter, + fevals = results.counters.obj_cnt, + gevals = results.counters.obj_grad_cnt) + + retcode = map_madnlp_status(results.status) + + return SciMLBase.build_solution(cache, + cache.opt, + results.solution, + results.objective; + original = results, + retcode, + stats = stats) +end + +end diff --git a/lib/OptimizationMadNLP/test/runtests.jl b/lib/OptimizationMadNLP/test/runtests.jl new file mode 100644 index 000000000..3b641fd89 --- /dev/null +++ b/lib/OptimizationMadNLP/test/runtests.jl @@ -0,0 +1,28 @@ +using OptimizationMadNLP +using OptimizationBase +using Test +import Zygote + +function objective(x, ::Any) + return x[1] * x[4] * (x[1] + x[2] + x[3]) + x[3] +end +function constraints(res, x, ::Any) + res .= [ + x[1] * x[2] * x[3] * x[4], + x[1]^2 + x[2]^2 + x[3]^2 + x[4]^2 + ] +end + +x0 = [1.0, 5.0, 5.0, 1.0] +optfunc = OptimizationFunction(objective, AutoSparse(OptimizationBase.AutoZygote()), cons=constraints) +prob = OptimizationProblem(optfunc, x0; sense = OptimizationBase.MinSense, + lb = [1.0, 1.0, 1.0, 1.0], + ub = [5.0, 5.0, 5.0, 5.0], + lcons = [25.0, 40.0], + ucons = [Inf, 40.0]) + +cache = init(prob, MadNLPOptimizer()) + +sol = OptimizationBase.solve!(cache) + +@test SciMLBase.successful_retcode(sol) From 91512d3b557eda6a458028d7eff677d5554b4ce2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Miclu=C8=9Ba-C=C3=A2mpeanu?= Date: Wed, 15 Oct 2025 00:06:31 +0300 Subject: [PATCH 02/12] add OptimizationMadNLP to CI Co-authored-by: Claude --- .github/workflows/CI.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 6b6610ab4..1a5824453 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -26,6 +26,7 @@ jobs: - OptimizationGCMAES - OptimizationLBFGSB - OptimizationIpopt + - OptimizationMadNLP - OptimizationManopt - OptimizationMetaheuristics - OptimizationMOI @@ -69,7 +70,7 @@ jobs: GROUP: ${{ matrix.group }} - uses: julia-actions/julia-processcoverage@v1 with: - directories: src,lib/OptimizationBBO/src,lib/OptimizationCMAEvolutionStrategy/src,lib/OptimizationEvolutionary/src,lib/OptimizationGCMAES/src,lib/OptimizationIpopt/src,lib/OptimizationManopt/src,lib/OptimizationMOI/src,lib/OptimizationMetaheuristics/src,lib/OptimizationMultistartOptimization/src,lib/OptimizationNLopt/src,lib/OptimizationNOMAD/src,lib/OptimizationOptimJL/src,lib/OptimizationOptimisers/src,lib/OptimizationPolyalgorithms/src,lib/OptimizationQuadDIRECT/src,lib/OptimizationSpeedMapping/src + directories: src,lib/OptimizationBBO/src,lib/OptimizationCMAEvolutionStrategy/src,lib/OptimizationEvolutionary/src,lib/OptimizationGCMAES/src,lib/OptimizationIpopt/src,lib/OptimizationMadNLP/src,lib/OptimizationManopt/src,lib/OptimizationMOI/src,lib/OptimizationMetaheuristics/src,lib/OptimizationMultistartOptimization/src,lib/OptimizationNLopt/src,lib/OptimizationNOMAD/src,lib/OptimizationOptimJL/src,lib/OptimizationOptimisers/src,lib/OptimizationPolyalgorithms/src,lib/OptimizationQuadDIRECT/src,lib/OptimizationSpeedMapping/src - uses: codecov/codecov-action@v5 with: file: lcov.info From 534d62557fa3fc31feeb178e208100c3452496b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Miclu=C8=9Ba-C=C3=A2mpeanu?= Date: Wed, 15 Oct 2025 18:53:28 +0300 Subject: [PATCH 03/12] add warning for callback and progress no support in MadNLP :( --- lib/OptimizationMadNLP/src/OptimizationMadNLP.jl | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl index 3fa42a0b7..7c2917939 100644 --- a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl +++ b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl @@ -273,6 +273,11 @@ function __map_optimizer_args(cache, nvar = length(cache.u0) ncon = length(cache.lcons) + if !isnothing(progress) || !isnothing(callback) + @warn("MadNLP doesn't currently support user defined callbacks.") + end + # TODO: add support for user callbacks in MadNLP + meta = NLPModels.NLPModelMeta( nvar; ncon, @@ -352,15 +357,11 @@ function SciMLBase.__solve(cache::OptimizationBase.OptimizationCache{ maxtime = maxtime, verbose = get(cache.solver_args, :verbose, false), progress = cache.progress, - callback = cache.callback) + callback = cache.callback + ) results = MadNLP.solve!(solver) - # if cache.progress - # # Set progressbar to 1 to finish it - # Base.@logmsg(Base.LogLevel(-1), "", progress = 1, _id = :OptimizationMadNLP) - # end - stats = OptimizationBase.OptimizationStats(; time = results.counters.total_time, iterations = results.iter, fevals = results.counters.obj_cnt, @@ -374,7 +375,7 @@ function SciMLBase.__solve(cache::OptimizationBase.OptimizationCache{ results.objective; original = results, retcode, - stats = stats) + stats) end end From 76fd254bd188774e42f1791f7704ab634a3e7012 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Miclu=C8=9Ba-C=C3=A2mpeanu?= Date: Wed, 15 Oct 2025 21:01:26 +0300 Subject: [PATCH 04/12] handle unconstrained and unbounded problems --- .../src/OptimizationMadNLP.jl | 77 +++++++++++++------ 1 file changed, 52 insertions(+), 25 deletions(-) diff --git a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl index 7c2917939..91a06e0fb 100644 --- a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl +++ b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl @@ -68,8 +68,10 @@ function NLPModelsAdaptor( jac_buffer = zeros(T, ncon, nvar) end + ncon = !isnothing(cache.lcons) ? length(cache.lcons) : 0 + # Extract Hessian structure - hess_proto = something(cache.f.lag_hess_prototype, cache.f.hess_prototype, nothing) + hess_proto = ncon > 0 ? cache.f.lag_hess_prototype : cache.f.hess_prototype if !isnothing(hess_proto) && hess_proto isa SparseMatrixCSC I, J, _ = findnz(hess_proto) @@ -104,7 +106,10 @@ function NLPModels.grad!(nlp::NLPModelsAdaptor, x::AbstractVector, g::AbstractVe end function NLPModels.cons!(nlp::NLPModelsAdaptor, x::AbstractVector, c::AbstractVector) - nlp.cache.f.cons(c, x) + if !isempty(c) + nlp.cache.f.cons(c, x) + end + return c end function NLPModels.jac_structure!( @@ -116,17 +121,19 @@ end function NLPModels.jac_coord!( nlp::NLPModelsAdaptor, x::AbstractVector, vals::AbstractVector) - # Evaluate Jacobian into preallocated buffer - nlp.cache.f.cons_j(nlp.jac_buffer, x) - - # Extract values in COO order - if nlp.jac_buffer isa SparseMatrixCSC - _, _, v = findnz(nlp.jac_buffer) - copyto!(vals, v) - else - # Dense case: extract in column-major order matching structure - for (idx, (i, j)) in enumerate(zip(nlp.jac_rows, nlp.jac_cols)) - vals[idx] = nlp.jac_buffer[i, j] + if !isempty(vals) + # Evaluate Jacobian into preallocated buffer + nlp.cache.f.cons_j(nlp.jac_buffer, x) + + # Extract values in COO order + if nlp.jac_buffer isa SparseMatrixCSC + _, _, v = findnz(nlp.jac_buffer) + copyto!(vals, v) + else + # Dense case: extract in column-major order matching structure + for (idx, (i, j)) in enumerate(zip(nlp.jac_rows, nlp.jac_cols)) + vals[idx] = nlp.jac_buffer[i, j] + end end end @@ -150,7 +157,7 @@ function NLPModels.hess_coord!( nlp.cache.f.hess(nlp.hess_buffer, x) nlp.hess_buffer .*= obj_weight - if !isnothing(nlp.cache.f.cons_h) + if !isnothing(nlp.cache.f.cons_h) && !isempty(y) # Add weighted constraint Hessians cons_hessians = [similar(nlp.hess_buffer) for _ in 1:length(y)] nlp.cache.f.cons_h(cons_hessians, x) @@ -160,9 +167,11 @@ function NLPModels.hess_coord!( end end - # Extract lower triangle values - for (idx, (i, j)) in enumerate(zip(nlp.hess_rows, nlp.hess_cols)) - H[idx] = nlp.hess_buffer[i, j] + if !isempty(H) + # Extract lower triangle values + for (idx, (i, j)) in enumerate(zip(nlp.hess_rows, nlp.hess_cols)) + H[idx] = nlp.hess_buffer[i, j] + end end return H @@ -194,7 +203,7 @@ end additional_options::Dict{Symbol, Any} = Dict{Symbol, Any}() end -OptimizationBase.supports_opt_cache_interface(opt::MadNLPOptimizer) = true +SciMLBase.supports_opt_cache_interface(opt::MadNLPOptimizer) = true function SciMLBase.requiresgradient(opt::MadNLPOptimizer) true @@ -245,8 +254,20 @@ function map_madnlp_status(status::MadNLP.Status) end end +function _get_nnzj(f) + jac_prototype = f.cons_jac_prototype + + if isnothing(jac_prototype) + return 0 + elseif jac_prototype isa SparseMatrixCSC + nnz(jac_prototype) + else + prod(size(jac_prototype)) + end +end + function _get_nnzh(f) - hess_proto = something(f.lag_hess_prototype, f.hess_prototype, nothing) + hess_proto = f.lag_hess_prototype if isnothing(hess_proto) return 0 # Unknown, let NLPModels compute it @@ -271,24 +292,30 @@ function __map_optimizer_args(cache, progress::Bool = false, callback = nothing) nvar = length(cache.u0) - ncon = length(cache.lcons) + ncon = !isnothing(cache.lcons) ? length(cache.lcons) : 0 if !isnothing(progress) || !isnothing(callback) @warn("MadNLP doesn't currently support user defined callbacks.") end # TODO: add support for user callbacks in MadNLP + T = eltype(cache.u0) + lvar = something(cache.lb, fill(-Inf, nvar)) + uvar = something(cache.ub, fill(Inf, nvar)) + lcon = something(cache.lcons, T[]) + ucon = something(cache.ucons, T[]) + meta = NLPModels.NLPModelMeta( nvar; ncon, - nnzj = nnz(cache.f.cons_jac_prototype), + nnzj = _get_nnzj(cache.f), nnzh = _get_nnzh(cache.f), x0 = cache.u0, y0 = zeros(eltype(cache.u0), ncon), - lvar = cache.lb, - uvar = cache.ub, - lcon = cache.lcons, - ucon = cache.ucons, + lvar, + uvar, + lcon, + ucon, minimize = cache.sense == MinSense ) From 40b6cbcb76d98e9ec71d0bad461047ca259836c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Miclu=C8=9Ba-C=C3=A2mpeanu?= Date: Wed, 15 Oct 2025 22:00:08 +0300 Subject: [PATCH 05/12] assume dense Hessian if no prototype is provided Co-authored-by: Claude --- lib/OptimizationMadNLP/src/OptimizationMadNLP.jl | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl index 91a06e0fb..9052c82f6 100644 --- a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl +++ b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl @@ -103,6 +103,7 @@ end function NLPModels.grad!(nlp::NLPModelsAdaptor, x::AbstractVector, g::AbstractVector) nlp.cache.f.grad(g, x, nlp.cache.p) + return g end function NLPModels.cons!(nlp::NLPModelsAdaptor, x::AbstractVector, c::AbstractVector) @@ -266,11 +267,13 @@ function _get_nnzj(f) end end -function _get_nnzh(f) - hess_proto = f.lag_hess_prototype +function _get_nnzh(f, ncon, nvar) + # For constrained problems, use Lagrangian Hessian; for unconstrained, use objective Hessian + hess_proto = ncon > 0 ? f.lag_hess_prototype : f.hess_prototype if isnothing(hess_proto) - return 0 # Unknown, let NLPModels compute it + # No prototype provided - assume dense Hessian + return div(nvar * (nvar + 1), 2) elseif hess_proto isa SparseMatrixCSC # Only count lower triangle I, J, _ = findnz(hess_proto) @@ -309,7 +312,7 @@ function __map_optimizer_args(cache, nvar; ncon, nnzj = _get_nnzj(cache.f), - nnzh = _get_nnzh(cache.f), + nnzh = _get_nnzh(cache.f, ncon, nvar), x0 = cache.u0, y0 = zeros(eltype(cache.u0), ncon), lvar, From 739b8a695f1e1bb41e0af898641f8ef1330297db Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Miclu=C8=9Ba-C=C3=A2mpeanu?= Date: Wed, 15 Oct 2025 22:12:15 +0300 Subject: [PATCH 06/12] fix callback warning --- lib/OptimizationMadNLP/src/OptimizationMadNLP.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl index 9052c82f6..4cca3503e 100644 --- a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl +++ b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl @@ -293,11 +293,11 @@ function __map_optimizer_args(cache, reltol::Union{Number, Nothing} = nothing, verbose = false, progress::Bool = false, - callback = nothing) + callback = DEFAULT_CALLBACK) nvar = length(cache.u0) ncon = !isnothing(cache.lcons) ? length(cache.lcons) : 0 - if !isnothing(progress) || !isnothing(callback) + if callback !== DEFAULT_CALLBACK || progress @warn("MadNLP doesn't currently support user defined callbacks.") end # TODO: add support for user callbacks in MadNLP From 13e1ac54c273c089fc40fb5a21ba31c1f2224579 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Miclu=C8=9Ba-C=C3=A2mpeanu?= Date: Wed, 15 Oct 2025 23:40:00 +0300 Subject: [PATCH 07/12] Fix MadNLP sparse Hessian and constraint Jacobian handling Fixes dimension mismatch errors when using sparse Lagrangian Hessians & corrects constraint Jacobian structure initialization Co-authored-by: Claude --- .../src/OptimizationMadNLP.jl | 40 +++++++++++++------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl index 4cca3503e..62076b0fe 100644 --- a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl +++ b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl @@ -1,13 +1,14 @@ module OptimizationMadNLP using OptimizationBase +using OptimizationBase: MinSense, MaxSense, DEFAULT_CALLBACK using MadNLP using NLPModels using SparseArrays export MadNLPOptimizer -struct NLPModelsAdaptor{C, T} <: NLPModels.AbstractNLPModel{T, Vector{T}} +struct NLPModelsAdaptor{C, T, HB} <: NLPModels.AbstractNLPModel{T, Vector{T}} cache::C meta::NLPModels.NLPModelMeta{T, Vector{T}} counters::NLPModels.Counters @@ -16,7 +17,7 @@ struct NLPModelsAdaptor{C, T} <: NLPModels.AbstractNLPModel{T, Vector{T}} jac_buffer::AbstractMatrix{T} hess_rows::Vector{Int} hess_cols::Vector{Int} - hess_buffer::AbstractMatrix{T} + hess_buffer::HB # Can be Vector{T} or Matrix{T} end function _enumerate_dense_structure(ncon, nvar) @@ -79,11 +80,13 @@ function NLPModelsAdaptor( lower_mask = I .>= J hess_rows = I[lower_mask] hess_cols = J[lower_mask] - hess_buffer = similar(hess_proto) + # Create a values buffer matching the number of lower triangle elements + hess_buffer = zeros(T, sum(lower_mask)) elseif !isnothing(hess_proto) # Dense Hessian n = size(hess_proto, 1) hess_rows, hess_cols = _enumerate_lower_triangle(n) + # For dense, store the full matrix but we'll extract values later hess_buffer = similar(hess_proto) else # No prototype - create dense structure @@ -92,7 +95,7 @@ function NLPModelsAdaptor( hess_buffer = zeros(T, n, n) end - return NLPModelsAdaptor{C, T}(cache, meta, counters, + return NLPModelsAdaptor{C, T, typeof(hess_buffer)}(cache, meta, counters, jac_rows, jac_cols, jac_buffer, hess_rows, hess_cols, hess_buffer) end @@ -152,7 +155,13 @@ function NLPModels.hess_coord!( nlp::NLPModelsAdaptor, x, y, H::AbstractVector; obj_weight = 1.0) if !isnothing(nlp.cache.f.lag_h) # Use Lagrangian Hessian directly - nlp.cache.f.lag_h(nlp.hess_buffer, x, obj_weight, y) + if nlp.hess_buffer isa AbstractVector + # For sparse prototypes, hess_buffer is already a values vector + nlp.cache.f.lag_h(nlp.hess_buffer, x, obj_weight, y) + else + # For dense matrices, we need to pass the full matrix and extract values + nlp.cache.f.lag_h(nlp.hess_buffer, x, obj_weight, y) + end else # Manual computation: objective + constraint Hessians nlp.cache.f.hess(nlp.hess_buffer, x) @@ -169,9 +178,15 @@ function NLPModels.hess_coord!( end if !isempty(H) - # Extract lower triangle values - for (idx, (i, j)) in enumerate(zip(nlp.hess_rows, nlp.hess_cols)) - H[idx] = nlp.hess_buffer[i, j] + # Extract values depending on buffer type + if nlp.hess_buffer isa AbstractVector + # For sparse, hess_buffer already contains just the values + copyto!(H, nlp.hess_buffer) + else + # For dense matrices, extract lower triangle values + for (idx, (i, j)) in enumerate(zip(nlp.hess_rows, nlp.hess_cols)) + H[idx] = nlp.hess_buffer[i, j] + end end end @@ -255,11 +270,12 @@ function map_madnlp_status(status::MadNLP.Status) end end -function _get_nnzj(f) +function _get_nnzj(f, ncon, nvar) jac_prototype = f.cons_jac_prototype if isnothing(jac_prototype) - return 0 + # No prototype - assume dense structure if there are constraints + return ncon > 0 ? ncon * nvar : 0 elseif jac_prototype isa SparseMatrixCSC nnz(jac_prototype) else @@ -311,7 +327,7 @@ function __map_optimizer_args(cache, meta = NLPModels.NLPModelMeta( nvar; ncon, - nnzj = _get_nnzj(cache.f), + nnzj = _get_nnzj(cache.f, ncon, nvar), nnzh = _get_nnzh(cache.f, ncon, nvar), x0 = cache.u0, y0 = zeros(eltype(cache.u0), ncon), @@ -319,7 +335,7 @@ function __map_optimizer_args(cache, uvar, lcon, ucon, - minimize = cache.sense == MinSense + minimize = cache.sense !== MaxSense # Default to minimization when sense is nothing or MinSense ) nlp = NLPModelsAdaptor(cache, meta, NLPModels.Counters()) From 42cd5fd48e81e896dd4aaf78128c36678a7ba040 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Miclu=C8=9Ba-C=C3=A2mpeanu?= Date: Thu, 16 Oct 2025 00:33:58 +0300 Subject: [PATCH 08/12] fix option priority for MadNLP --- lib/OptimizationMadNLP/src/OptimizationMadNLP.jl | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl index 62076b0fe..84176cb46 100644 --- a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl +++ b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl @@ -345,12 +345,14 @@ function __map_optimizer_args(cache, else print_level = verbose end - # use MadNLP defaults - tol = isnothing(reltol) ? 1e-8 : reltol + + !isnothing(reltol) && @warn "reltol not supported by MadNLP." + tol = isnothing(abstol) ? 1e-8 : abstol max_iter = isnothing(maxiters) ? 3000 : maxiters max_wall_time = isnothing(maxtime) ? 1e6 : maxtime MadNLP.MadNLPSolver(nlp; + opt.additional_options..., print_level, tol, max_iter, max_wall_time, opt.rethrow_error, opt.disable_garbage_collector, @@ -363,7 +365,6 @@ function __map_optimizer_args(cache, opt.hessian_constant, opt.hessian_approximation, opt.mu_init, - opt.additional_options... ) end From 735edb971982a8297cfc29d6b5d55a57deeba355 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Miclu=C8=9Ba-C=C3=A2mpeanu?= Date: Thu, 16 Oct 2025 04:55:20 +0300 Subject: [PATCH 09/12] Add support for NLPModels.jtprod!, NLPModels.jprod! & LBFGS options Co-Authored-By: Claude --- .../src/OptimizationMadNLP.jl | 41 ++++++++++++++++++- 1 file changed, 40 insertions(+), 1 deletion(-) diff --git a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl index 84176cb46..be0611987 100644 --- a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl +++ b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl @@ -193,6 +193,22 @@ function NLPModels.hess_coord!( return H end +function NLPModels.jtprod!(nlp::NLPModelsAdaptor, x::AbstractVector, v::AbstractVector, Jtv::AbstractVector) + # Compute J^T * v using the AD-provided VJP (Vector-Jacobian Product) + if !isnothing(nlp.cache.f.cons_vjp) && !isempty(Jtv) + nlp.cache.f.cons_vjp(Jtv, x, v) + end + return Jtv +end + +function NLPModels.jprod!(nlp::NLPModelsAdaptor, x::AbstractVector, v::AbstractVector, Jv::AbstractVector) + # Compute J * v using the AD-provided JVP (Jacobian-Vector Product) + if !isnothing(nlp.cache.f.cons_jvp) && !isempty(Jv) + nlp.cache.f.cons_jvp(Jv, x, v) + end + return Jv +end + @kwdef struct MadNLPOptimizer{T} # General options rethrow_error::Bool = true @@ -215,6 +231,13 @@ end # Barrier mu_init::T = 1e-1 + # Quasi-Newton options (used when hessian_approximation is CompactLBFGS, BFGS, or DampedBFGS) + max_history::Int = 6 # Number of past gradients to store for L-BFGS + init_strategy::MadNLP.BFGSInitStrategy = MadNLP.SCALAR1 # How to initialize Hessian + init_value::T = 1.0 # Initial scaling value + sigma_min::T = 1e-8 # Minimum allowed σ (safeguard) + sigma_max::T = 1e+8 # Maximum allowed σ (safeguard) + # Additional MadNLP options additional_options::Dict{Symbol, Any} = Dict{Symbol, Any}() end @@ -225,7 +248,7 @@ function SciMLBase.requiresgradient(opt::MadNLPOptimizer) true end function SciMLBase.requireshessian(opt::MadNLPOptimizer) - true + opt.hessian_approximation === MadNLP.ExactHessian end function SciMLBase.allowsbounds(opt::MadNLPOptimizer) true @@ -237,6 +260,12 @@ function SciMLBase.requiresconsjac(opt::MadNLPOptimizer) true end function SciMLBase.requireslagh(opt::MadNLPOptimizer) + opt.hessian_approximation === MadNLP.ExactHessian +end +function SciMLBase.allowsconsvjp(opt::MadNLPOptimizer) + true +end +function SciMLBase.allowsconsjvp(opt::MadNLPOptimizer) true end @@ -351,6 +380,15 @@ function __map_optimizer_args(cache, max_iter = isnothing(maxiters) ? 3000 : maxiters max_wall_time = isnothing(maxtime) ? 1e6 : maxtime + # Create QuasiNewtonOptions if using quasi-Newton methods + quasi_newton_options = MadNLP.QuasiNewtonOptions{T}(; + opt.init_strategy, + opt.max_history, + opt.init_value, + opt.sigma_min, + opt.sigma_max + ) + MadNLP.MadNLPSolver(nlp; opt.additional_options..., print_level, tol, max_iter, max_wall_time, @@ -365,6 +403,7 @@ function __map_optimizer_args(cache, opt.hessian_constant, opt.hessian_approximation, opt.mu_init, + quasi_newton_options = quasi_newton_options, ) end From e61fd955732ca2393e17f650ee748c7af478f062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Miclu=C8=9Ba-C=C3=A2mpeanu?= Date: Thu, 16 Oct 2025 04:57:21 +0300 Subject: [PATCH 10/12] Fix Hessian buffer types to use Float64 instead of Bool MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Ensure Hessian buffers are created with numeric type T instead of inheriting Bool from sparsity prototypes. Fixes InexactError when storing Hessian values. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude --- lib/OptimizationMadNLP/src/OptimizationMadNLP.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl index be0611987..b538b11f1 100644 --- a/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl +++ b/lib/OptimizationMadNLP/src/OptimizationMadNLP.jl @@ -87,7 +87,7 @@ function NLPModelsAdaptor( n = size(hess_proto, 1) hess_rows, hess_cols = _enumerate_lower_triangle(n) # For dense, store the full matrix but we'll extract values later - hess_buffer = similar(hess_proto) + hess_buffer = similar(hess_proto, T) else # No prototype - create dense structure n = meta.nvar @@ -169,7 +169,7 @@ function NLPModels.hess_coord!( if !isnothing(nlp.cache.f.cons_h) && !isempty(y) # Add weighted constraint Hessians - cons_hessians = [similar(nlp.hess_buffer) for _ in 1:length(y)] + cons_hessians = [similar(nlp.hess_buffer, eltype(nlp.hess_buffer)) for _ in 1:length(y)] nlp.cache.f.cons_h(cons_hessians, x) for (λ, H_cons) in zip(y, cons_hessians) nlp.hess_buffer .+= λ .* H_cons From 6415752fb132b5d124ae8600855af3f86f7e6d42 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Miclu=C8=9Ba-C=C3=A2mpeanu?= Date: Thu, 16 Oct 2025 04:58:17 +0300 Subject: [PATCH 11/12] Add comprehensive tests for OptimizationMadNLP Co-Authored-By: Claude --- lib/OptimizationMadNLP/Project.toml | 6 +- lib/OptimizationMadNLP/test/runtests.jl | 588 +++++++++++++++++++++++- 2 files changed, 576 insertions(+), 18 deletions(-) diff --git a/lib/OptimizationMadNLP/Project.toml b/lib/OptimizationMadNLP/Project.toml index 8a6f3f7c0..bd10f33b4 100644 --- a/lib/OptimizationMadNLP/Project.toml +++ b/lib/OptimizationMadNLP/Project.toml @@ -13,6 +13,8 @@ SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" SymbolicIndexingInterface = "2efcf032-c050-4f8e-a9bb-153293bab1f5" [compat] +DifferentiationInterface = "0.7" +ForwardDiff = "1.2.1" LinearAlgebra = "1.10.0" MadNLP = "0.8.12" ModelingToolkit = "10.23" @@ -26,6 +28,8 @@ julia = "1.10" [extras] Aqua = "4c88cf16-eb10-579e-8560-4a9242c79595" +DifferentiationInterface = "a0c0ee7d-e4b9-4e03-894e-1c5f64a51d63" +ForwardDiff = "f6369f11-7733-5829-9624-2563aa707210" ModelingToolkit = "961ee093-0014-501f-94e3-6117800e7a78" Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" ReverseDiff = "37e2e3b7-166d-5795-8a7a-e32c996b4267" @@ -34,4 +38,4 @@ Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" Zygote = "e88e6eb3-aa80-5325-afca-941959d7151f" [targets] -test = ["Aqua", "ModelingToolkit", "Random", "ReverseDiff", "Test", "Symbolics", "Zygote"] +test = ["Aqua", "DifferentiationInterface", "ForwardDiff", "ModelingToolkit", "Random", "ReverseDiff", "Test", "Symbolics", "Zygote"] diff --git a/lib/OptimizationMadNLP/test/runtests.jl b/lib/OptimizationMadNLP/test/runtests.jl index 3b641fd89..801336773 100644 --- a/lib/OptimizationMadNLP/test/runtests.jl +++ b/lib/OptimizationMadNLP/test/runtests.jl @@ -1,28 +1,582 @@ using OptimizationMadNLP using OptimizationBase +using MadNLP using Test -import Zygote +import Zygote, ForwardDiff, ReverseDiff +using SparseArrays +using DifferentiationInterface +using Random -function objective(x, ::Any) - return x[1] * x[4] * (x[1] + x[2] + x[3]) + x[3] +@testset "rosenbrock" begin + rosenbrock(x, p) = (p[1] - x[1])^2 + p[2] * (x[2] - x[1]^2)^2 + x0 = zeros(2) + _p = [1.0, 100.0] + l1 = rosenbrock(x0, _p) + + # MadNLP requires second-order derivatives + ad = SecondOrder(ADTypes.AutoForwardDiff(), ADTypes.AutoZygote()) + optfunc = OptimizationFunction( + (x, p) -> -rosenbrock(x, p), ad) + prob = OptimizationProblem(optfunc, x0, _p; sense = OptimizationBase.MaxSense) + + sol = solve(prob, MadNLPOptimizer(), verbose = true) + + @test sol ≈ [1, 1] +end + +@testset "tutorial" begin + rosenbrock(x, p) = (p[1] - x[1])^2 + p[2] * (x[2] - x[1]^2)^2 + x0 = zeros(2) + _p = [1.0, 1.0] + + cons(res, x, p) = (res .= [x[1]^2 + x[2]^2, x[1] * x[2]]) + + function lagh(res, x, sigma, mu, p) + lH = sigma * [2 + 8(x[1]^2) * p[2]-4(x[2] - (x[1]^2)) * p[2] -4p[2]*x[1] + -4p[2]*x[1] 2p[2]] .+ [2mu[1] mu[2] + mu[2] 2mu[1]] + # MadNLP uses lower triangle. For symmetric sparse([1 1; 1 1]), lower triangle has [1,1], [2,1], and [2,2] + res[1] = lH[1, 1] # Position [1,1] + res[2] = lH[2, 1] # Position [2,1] (off-diagonal) + res[3] = lH[2, 2] # Position [2,2] + end + lag_hess_prototype = sparse([1 1; 1 1]) # Symmetric sparse pattern for Hessian + + # Use SecondOrder AD for MadNLP + ad = SecondOrder(ADTypes.AutoForwardDiff(), ADTypes.AutoZygote()) + optprob = OptimizationFunction(rosenbrock, ad; + cons = cons, lag_h = lagh, lag_hess_prototype) + prob = OptimizationProblem(optprob, x0, _p, lcons = [1.0, 0.5], ucons = [1.0, 0.5]) + + opts = [ + MadNLPOptimizer(), + MadNLPOptimizer(additional_options = Dict{Symbol, Any}( + :linear_solver => LapackCPUSolver,)) + ] + + for opt in opts + sol = solve(prob, opt) + @test SciMLBase.successful_retcode(sol) + + # compare against Ipopt results + @test sol ≈ [0.7071678163428006, 0.7070457460302945] rtol=1e-4 + end end -function constraints(res, x, ::Any) - res .= [ - x[1] * x[2] * x[3] * x[4], - x[1]^2 + x[2]^2 + x[3]^2 + x[4]^2 + +@testset "cache" begin + objective(x, p) = (p[1] - x[1])^2 + x0 = zeros(1) + p = [1.0] + + # Use SecondOrder AD for MadNLP + @testset "$ad" for ad in [ + SecondOrder(AutoZygote(), AutoZygote()), + SecondOrder(AutoForwardDiff(), AutoZygote()), + SecondOrder(AutoForwardDiff(), AutoReverseDiff()), ] + optf = OptimizationFunction(objective, ad) + prob = OptimizationProblem(optf, x0, p) + cache = OptimizationBase.init(prob, MadNLPOptimizer()) + sol = OptimizationBase.solve!(cache) + @test sol.retcode == ReturnCode.Success + @test sol.u≈[1.0] atol=1e-3 + + cache = OptimizationBase.reinit!(cache; p = [2.0]) + sol = OptimizationBase.solve!(cache) + # @test sol.retcode == ReturnCode.Success + @test sol.u≈[2.0] atol=1e-3 + end +end + +@testset "constraints & AD" begin + function objective(x, ::Any) + return x[1] * x[4] * (x[1] + x[2] + x[3]) + x[3] + end + function constraints(res, x, ::Any) + res .= [ + x[1] * x[2] * x[3] * x[4], + x[1]^2 + x[2]^2 + x[3]^2 + x[4]^2 + ] + end + + x0 = [1.0, 5.0, 5.0, 1.0] + + @testset "$ad" for ad in [ + AutoSparse(SecondOrder(AutoForwardDiff(), AutoZygote())), + AutoSparse(SecondOrder(AutoForwardDiff(), AutoForwardDiff())), + AutoSparse(SecondOrder(AutoForwardDiff(), AutoReverseDiff())), + ] + optfunc = OptimizationFunction(objective, ad, cons = constraints) + prob = OptimizationProblem(optfunc, x0; sense = OptimizationBase.MinSense, + lb = [1.0, 1.0, 1.0, 1.0], + ub = [5.0, 5.0, 5.0, 5.0], + lcons = [25.0, 40.0], + ucons = [Inf, 40.0]) + + cache = init(prob, MadNLPOptimizer()) + + sol = OptimizationBase.solve!(cache) + + @test SciMLBase.successful_retcode(sol) + + @test isapprox(sol.objective, 17.014017145179164; atol = 1e-6) + x = [1.0, 4.7429996418092970, 3.8211499817883077, 1.3794082897556983] + @test isapprox(sol.u, x; atol = 1e-6) + @test prod(sol.u) >= 25.0 - 1e-6 + @test isapprox(sum(sol.u .^ 2), 40.0; atol = 1e-6) + end + + # dense + @testset "$ad" for ad in [ + SecondOrder(AutoForwardDiff(), AutoZygote()), + SecondOrder(AutoForwardDiff(), AutoForwardDiff()), + SecondOrder(AutoForwardDiff(), AutoReverseDiff()), + ] + optfunc = OptimizationFunction(objective, ad, cons = constraints) + prob = OptimizationProblem(optfunc, x0; sense = OptimizationBase.MinSense, + lb = [1.0, 1.0, 1.0, 1.0], + ub = [5.0, 5.0, 5.0, 5.0], + lcons = [25.0, 40.0], + ucons = [Inf, 40.0]) + + cache = init(prob, MadNLPOptimizer(additional_options = Dict{Symbol, Any}( + :kkt_system => MadNLP.DenseKKTSystem, + :linear_solver => LapackCPUSolver,))) + + sol = OptimizationBase.solve!(cache) + + @test SciMLBase.successful_retcode(sol) + + @test isapprox(sol.objective, 17.014017145179164; atol = 1e-6) + x = [1.0, 4.7429996418092970, 3.8211499817883077, 1.3794082897556983] + @test isapprox(sol.u, x; atol = 1e-6) + @test prod(sol.u) >= 25.0 - 1e-6 + @test isapprox(sum(sol.u .^ 2), 40.0; atol = 1e-6) + end +end + +@testset "Larger sparse Hessian" begin + # Test with a 4x4 sparse Hessian matrix + # min x1^2 + 2*x2^2 + x3^2 + x1*x3 + x2*x4 + # s.t. x1 + x2 + x3 + x4 = 4 + # x1*x3 >= 1 + + function objective_sparse(x, p) + return x[1]^2 + 2*x[2]^2 + x[3]^2 + x[1]*x[3] + x[2]*x[4] + end + + function cons_sparse(res, x, p) + res[1] = x[1] + x[2] + x[3] + x[4] # Equality constraint + res[2] = x[1] * x[3] # Inequality constraint + end + + function lag_hess_sparse(res, x, sigma, mu, p) + # Sparse Hessian structure (symmetric): + # [2 0 1 0 ] + # [0 4 0 1 ] + # [1 0 2 0 ] + # [0 1 0 0 ] + # + # Lower triangle indices: [1,1], [3,1], [2,2], [4,2], [3,3] + # Total: 5 non-zero elements in lower triangle + + # Objective Hessian contribution + res[1] = sigma * 2.0 # H[1,1] from x1^2 + res[2] = sigma * 1.0 # H[3,1] from x1*x3 + res[3] = sigma * 4.0 # H[2,2] from 2*x2^2 + res[4] = sigma * 1.0 # H[4,2] from x2*x4 + res[5] = sigma * 2.0 # H[3,3] from x3^2 + + # Constraint contributions + # First constraint (x1+x2+x3+x4=4) has zero Hessian + # Second constraint (x1*x3>=1) has d²c/dx1dx3 = 1 + res[2] += mu[2] * 1.0 # Add to H[3,1] + end + + # Create sparse prototype with the correct structure + # We need 1s at positions: [1,1], [1,3], [2,2], [2,4], [3,1], [3,3], [4,2] + hess_proto_4x4 = sparse( + [1, 3, 2, 4, 1, 3, 2], # row indices + [1, 1, 2, 2, 3, 3, 4], # column indices + [1, 1, 1, 1, 1, 1, 1] # values (just placeholder 1s) + ) + + x0 = [1.0, 1.0, 1.0, 1.0] + p = Float64[] + + # Use SecondOrder AD for MadNLP + ad = SecondOrder(ADTypes.AutoForwardDiff(), ADTypes.AutoZygote()) + optprob = OptimizationFunction(objective_sparse, ad; + cons = cons_sparse, lag_h = lag_hess_sparse, lag_hess_prototype = hess_proto_4x4) + + prob = OptimizationProblem(optprob, x0, p, + lcons = [4.0, 1.0], # x1+x2+x3+x4 = 4, x1*x3 >= 1 + ucons = [4.0, Inf]) # x1+x2+x3+x4 = 4, x1*x3 <= Inf + + sol = solve(prob, MadNLPOptimizer()) + + @test SciMLBase.successful_retcode(sol) + + # Check constraints + cons_vals = zeros(2) + cons_sparse(cons_vals, sol.u, p) + @test isapprox(cons_vals[1], 4.0, atol=1e-6) # Sum constraint + @test cons_vals[2] >= 1.0 - 1e-6 # Product constraint +end + +@testset "MadNLP Options and Common Interface" begin + rosenbrock(x, p) = (p[1] - x[1])^2 + p[2] * (x[2] - x[1]^2)^2 + x0 = zeros(2) + p = [1.0, 100.0] + ad = SecondOrder(AutoForwardDiff(), AutoForwardDiff()) + + @testset "MadNLP struct options" begin + optfunc = OptimizationFunction(rosenbrock, ad) + prob = OptimizationProblem(optfunc, x0, p) + + # Test with MadNLP-specific struct fields + opt = MadNLPOptimizer( + acceptable_tol = 1e-6, + acceptable_iter = 10, + blas_num_threads = 2, + mu_init = 0.01 + ) + sol = solve(prob, opt) + @test SciMLBase.successful_retcode(sol) + + # Test with hessian approximation + opt2 = MadNLPOptimizer( + hessian_approximation = MadNLP.CompactLBFGS, + jacobian_constant = false, + hessian_constant = false + ) + sol2 = solve(prob, opt2) + @test SciMLBase.successful_retcode(sol2) + end + + @testset "additional_options dictionary" begin + optfunc = OptimizationFunction(rosenbrock, ad) + prob = OptimizationProblem(optfunc, x0, p) + + # Test passing MadNLP options via additional_options + opt = MadNLPOptimizer( + additional_options = Dict{Symbol, Any}( + :linear_solver => MadNLP.UmfpackSolver, + :max_iter => 200, + :tol => 1e-7 + ) + ) + sol = solve(prob, opt) + @test SciMLBase.successful_retcode(sol) + + # Test with different options + opt2 = MadNLPOptimizer( + additional_options = Dict{Symbol, Any}( + :inertia_correction_method => MadNLP.InertiaFree, + :fixed_variable_treatment => MadNLP.RelaxBound + ) + ) + sol2 = solve(prob, opt2) + @test SciMLBase.successful_retcode(sol2) + end + + @testset "Common interface arguments" begin + optfunc = OptimizationFunction(rosenbrock, ad) + prob = OptimizationProblem(optfunc, x0, p) + + # Test that abstol overrides default tolerance + sol1 = solve(prob, MadNLPOptimizer(); abstol = 1e-12) + @test SciMLBase.successful_retcode(sol1) + @test sol1.u ≈ [1.0, 1.0] atol=1e-10 + + # Test that maxiters limits iterations + sol2 = solve(prob, MadNLPOptimizer(); maxiters = 5) + # May not converge with only 5 iterations + @test sol2.stats.iterations <= 5 + + # Test verbose options (MadNLP supports bool and LogLevels) + for verbose in [false, true, MadNLP.ERROR, MadNLP.WARN, MadNLP.INFO] + sol = solve(prob, MadNLPOptimizer(); verbose = verbose, maxiters = 20) + @test sol isa SciMLBase.OptimizationSolution + end + end + + @testset "Priority: struct < additional_options < solve args" begin + optfunc = OptimizationFunction(rosenbrock, ad) + prob = OptimizationProblem(optfunc, x0, p) + + # Struct field is overridden by additional_options and solve arguments + opt = MadNLPOptimizer( + acceptable_tol = 1e-4, # Struct field + additional_options = Dict{Symbol, Any}( + :max_iter => 10, # Will be overridden by maxiters + :tol => 1e-6 # Will be overridden by abstol + ) + ) + + sol = solve(prob, opt; + maxiters = 5, # Should override additional_options[:max_iter] + abstol = 1e-10) # Should override additional_options[:tol] + + @test sol.stats.iterations <= 5 + @test sol.retcode == SciMLBase.ReturnCode.MaxIters + end end -x0 = [1.0, 5.0, 5.0, 1.0] -optfunc = OptimizationFunction(objective, AutoSparse(OptimizationBase.AutoZygote()), cons=constraints) -prob = OptimizationProblem(optfunc, x0; sense = OptimizationBase.MinSense, - lb = [1.0, 1.0, 1.0, 1.0], - ub = [5.0, 5.0, 5.0, 5.0], - lcons = [25.0, 40.0], - ucons = [Inf, 40.0]) +@testset "LBFGS Hessian Approximation" begin + # Based on https://madsuite.org/MadNLP.jl/dev/tutorials/lbfgs/ + + @testset "Unconstrained LBFGS" begin + # Extended Rosenbrock function (n-dimensional) + function extended_rosenbrock(x, p) + n = length(x) + sum(100 * (x[2i] - x[2i-1]^2)^2 + (1 - x[2i-1])^2 for i in 1:div(n, 2)) + end + + n = 10 # Problem dimension + x0 = zeros(n) + x0[1:2:end] .= -1.2 # Starting point from tutorial + x0[2:2:end] .= 1.0 + + # Test different LBFGS configurations + @testset "LBFGS variant: $variant" for variant in [ + MadNLP.CompactLBFGS, + MadNLP.ExactHessian # For comparison + ] + # Only provide gradients, no Hessian needed for LBFGS + ad = AutoForwardDiff() # First-order AD is sufficient + optfunc = OptimizationFunction(extended_rosenbrock, ad) + prob = OptimizationProblem(optfunc, x0, nothing) + + if variant == MadNLP.ExactHessian + # Use second-order AD for exact Hessian + ad = SecondOrder(AutoForwardDiff(), AutoForwardDiff()) + optfunc = OptimizationFunction(extended_rosenbrock, ad) + prob = OptimizationProblem(optfunc, x0, nothing) + end + + opt = MadNLPOptimizer( + hessian_approximation = variant + ) + + sol = solve(prob, opt; maxiters=100, verbose = false) + + @test SciMLBase.successful_retcode(sol) + @test all(isapprox.(sol.u, 1.0, atol = 1e-6)) # Solution should be all ones + @test sol.objective < 1e-10 # Should be close to zero + end + + @testset "LBFGS memory size $memory_size" for memory_size in [5, 10, 20] + # Test different memory sizes for L-BFGS + ad = AutoForwardDiff() + optfunc = OptimizationFunction(extended_rosenbrock, ad) + prob = OptimizationProblem(optfunc, x0, nothing) + + opt = MadNLPOptimizer( + hessian_approximation = MadNLP.CompactLBFGS, + max_history = memory_size + ) + + sol = solve(prob, opt; maxiters=100, verbose = false) + + @test SciMLBase.successful_retcode(sol) + @test all(isapprox.(sol.u, 1.0, atol = 1e-6)) + end + end + + @testset "Constrained LBFGS - Electrons on Sphere" begin + # Quasi-uniform distribution of electrons on a unit sphere + # Minimize electrostatic potential energy (Coulomb potential) + # Variables are organized as [x1, x2, ..., xn, y1, y2, ..., yn, z1, z2, ..., zn] + # based on https://madsuite.org/MadNLP.jl/dev/tutorials/lbfgs + + function coulomb_potential(vars, p) + # vars = [x1...xn, y1...yn, z1...zn] + np = div(length(vars), 3) + x = @view vars[1:np] + y = @view vars[np+1:2*np] + z = @view vars[2*np+1:3*np] + + # Sum of 1/r_ij for all electron pairs + energy = 0.0 + for i in 1:np-1 + for j in i+1:np + dist_sq = (x[i] - x[j])^2 + (y[i] - y[j])^2 + (z[i] - z[j])^2 + energy += 1.0 / sqrt(dist_sq) + end + end + return energy + end + + function unit_sphere_constraints(res, vars, p) + # Each electron must lie on the unit sphere + np = div(length(vars), 3) + x = @view vars[1:np] + y = @view vars[np+1:2*np] + z = @view vars[2*np+1:3*np] + + for i in 1:np + res[i] = x[i]^2 + y[i]^2 + z[i]^2 - 1.0 + end + end + + # Function to generate initial points on sphere + function init_electrons_on_sphere(np) + # Random.seed!(1) + theta = 2π .* rand(np) + phi = π .* rand(np) + + x0 = zeros(3*np) + # x coordinates + x0[1:np] = cos.(theta) .* sin.(phi) + # y coordinates + x0[np+1:2*np] = sin.(theta) .* sin.(phi) + # z coordinates + x0[2*np+1:3*np] = cos.(phi) + + return x0 + end + + @testset "N=$np electrons with $approx" for + np in [6, 8, 10], + approx in [MadNLP.CompactLBFGS, MadNLP.ExactHessian] -cache = init(prob, MadNLPOptimizer()) + x0 = init_electrons_on_sphere(np) -sol = OptimizationBase.solve!(cache) + if approx == MadNLP.CompactLBFGS + # For LBFGS variants, only first-order derivatives needed + ad = AutoForwardDiff() + else + # For exact Hessian, need second-order + ad = SecondOrder(AutoForwardDiff(), AutoForwardDiff()) + end -@test SciMLBase.successful_retcode(sol) + optfunc = OptimizationFunction( + coulomb_potential, ad, + cons = unit_sphere_constraints + ) + + # Equality constraints: each electron on unit sphere + lcons = zeros(np) + ucons = zeros(np) + + prob = OptimizationProblem(optfunc, x0; + lcons = lcons, + ucons = ucons + ) + + opt = MadNLPOptimizer( + additional_options = Dict{Symbol, Any}(:linear_solver=>LapackCPUSolver), + hessian_approximation = approx + ) + + sol = solve(prob, opt; abstol=1e-7, maxiters=200, verbose = false) + + @test SciMLBase.successful_retcode(sol) + + # Check that all electrons are on the unit sphere + cons_vals = zeros(np) + unit_sphere_constraints(cons_vals, sol.u, nothing) + @test all(abs.(cons_vals) .< 1e-5) + + # Known optimal energies for small electron numbers + # Reference: https://en.wikipedia.org/wiki/Thomson_problem + # Note: These are the minimum Coulomb potential energies for N electrons on unit sphere + expected_energies = Dict( + 6 => 9.985281374, # Octahedron (Oh symmetry) + 8 => 19.675287861, # Square antiprism (D4d) + 10 => 32.716949460 # Gyroelongated square dipyramid (D4d) + ) + + if haskey(expected_energies, np) + @test isapprox(sol.objective, expected_energies[np], rtol = 1e-3) + end + + # Verify minimum distance between electrons + x = sol.u[1:np] + y = sol.u[np+1:2*np] + z = sol.u[2*np+1:3*np] + + min_dist = Inf + for i in 1:np-1 + for j in i+1:np + dist = sqrt((x[i]-x[j])^2 + (y[i]-y[j])^2 + (z[i]-z[j])^2) + min_dist = min(min_dist, dist) + end + end + @test min_dist > 0.5 # Electrons should be well-separated + end + + @testset "Performance comparison: LBFGS vs Exact Hessian" begin + # Test with moderate size to show LBFGS efficiency + np = 12 # Icosahedron configuration + x0 = init_electrons_on_sphere(np) + + results = Dict() + + for (name, approx, ad) in [ + ("CompactLBFGS", MadNLP.CompactLBFGS, AutoForwardDiff()) + ("ExactHessian", MadNLP.ExactHessian, SecondOrder(AutoForwardDiff(), AutoForwardDiff())) + ] + optfunc = OptimizationFunction( + coulomb_potential, ad, + cons = unit_sphere_constraints + ) + + prob = OptimizationProblem(optfunc, x0; + lcons = zeros(np), + ucons = zeros(np) + ) + + opt = MadNLPOptimizer( + hessian_approximation = approx + ) + + sol = solve(prob, opt; abstol = 1e-6, maxiters = 300, verbose = false) + results[name] = ( + objective = sol.objective, + iterations = sol.stats.iterations, + success = SciMLBase.successful_retcode(sol) + ) + end + + # All methods should converge + @test all(r.success for r in values(results)) + + # All should find similar objective values (icosahedron energy) + # Reference: https://en.wikipedia.org/wiki/Thomson_problem + objectives = [r.objective for r in values(results)] + @test all(abs.(objectives .- 49.165253058) .< 0.1) + + # LBFGS methods typically need more iterations but less cost per iteration + @test results["CompactLBFGS"].iterations > 0 + @test results["ExactHessian"].iterations > 0 + end + end + + @testset "LBFGS with damped update" begin + # Test the damped BFGS update option + function simple_quadratic(x, p) + return sum(x.^2) + end + + x0 = randn(5) + + ad = AutoForwardDiff() + optfunc = OptimizationFunction(simple_quadratic, ad) + prob = OptimizationProblem(optfunc, x0, nothing) + + opt = MadNLPOptimizer( + hessian_approximation = MadNLP.DampedBFGS, # Use damped BFGS variant + additional_options = Dict{Symbol,Any}( + :linear_solver => MadNLP.LapackCPUSolver, + :kkt_system=>MadNLP.DenseKKTSystem) + ) + + sol = solve(prob, opt; maxiters=50, verbose = false) + + @test SciMLBase.successful_retcode(sol) + @test all(abs.(sol.u) .< 1e-6) # Solution should be at origin + @test sol.objective < 1e-10 + end +end From 525fb64b193090cfba83b8645f4f9fe3ea5faaa1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sebastian=20Miclu=C8=9Ba-C=C3=A2mpeanu?= Date: Thu, 16 Oct 2025 15:51:14 +0300 Subject: [PATCH 12/12] bump OptimizationBase for the required fixes --- lib/OptimizationMadNLP/Project.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/OptimizationMadNLP/Project.toml b/lib/OptimizationMadNLP/Project.toml index bd10f33b4..12adca9d0 100644 --- a/lib/OptimizationMadNLP/Project.toml +++ b/lib/OptimizationMadNLP/Project.toml @@ -19,7 +19,7 @@ LinearAlgebra = "1.10.0" MadNLP = "0.8.12" ModelingToolkit = "10.23" NLPModels = "0.21.5" -OptimizationBase = "3" +OptimizationBase = "3.3.1" SciMLBase = "2.90.0" SparseArrays = "1.10.0" SymbolicIndexingInterface = "0.3.40"