diff --git a/src/moi_nlp_model.jl b/src/moi_nlp_model.jl index 3942558..19eb9b3 100644 --- a/src/moi_nlp_model.jl +++ b/src/moi_nlp_model.jl @@ -6,7 +6,7 @@ mutable struct MathOptNLPModel <: AbstractNLPModel{Float64, Vector{Float64}} lincon::LinearConstraints quadcon::QuadraticConstraints nlcon::NonLinearStructure - oracles::Vector{Tuple{MOI.VectorOfVariables,_VectorNonlinearOracleCache}} + oracles_data::OraclesData λ::Vector{Float64} hv::Vector{Float64} obj::Objective @@ -36,7 +36,7 @@ function nlp_model(moimodel::MOI.ModelLike; hessian::Bool = true, name::String = nlp_data = _nlp_block(moimodel) nnln, nlcon, nl_lcon, nl_ucon = parser_NL(nlp_data, hessian = hessian) - oracles = Tuple{MOI.VectorOfVariables,_VectorNonlinearOracleCache}[] + oracles_data = parser_oracles(moimodel) counters = Counters() λ = zeros(Float64, nnln) # Lagrange multipliers for hess_coord! and hprod! without y hv = zeros(Float64, nvar) # workspace for ghjvprod! @@ -47,13 +47,12 @@ function nlp_model(moimodel::MOI.ModelLike; hessian::Bool = true, name::String = obj = parser_objective_MOI(moimodel, nvar, index_map) end - # Update ncon, lcon, ucon, nnzj and nnzh for the oracles - ncon = nlin + quadcon.nquad + nnln - lcon = vcat(lin_lcon, quad_lcon, nl_lcon) - ucon = vcat(lin_ucon, quad_ucon, nl_ucon) - nnzj = lincon.nnzj + quadcon.nnzj + nlcon.nnzj - nnzh = obj.nnzh + quadcon.nnzh + nlcon.nnzh - + # Total counts + ncon = nlin + quadcon.nquad + nnln + oracles_data.noracle + lcon = vcat(lin_lcon, quad_lcon, nl_lcon, oracles_data.l_oracle) + ucon = vcat(lin_ucon, quad_ucon, nl_ucon, oracles_data.u_oracle) + nnzj = lincon.nnzj + quadcon.nnzj + nlcon.nnzj + oracles_data.nnzj_oracle + nnzh = obj.nnzh + quadcon.nnzh + nlcon.nnzh + oracles_data.nnzh_oracle meta = NLPModelMeta( nvar, x0 = x0, @@ -67,17 +66,17 @@ function nlp_model(moimodel::MOI.ModelLike; hessian::Bool = true, name::String = nnzh = nnzh, lin = collect(1:nlin), lin_nnzj = lincon.nnzj, - nln_nnzj = quadcon.nnzj + nlcon.nnzj, + nln_nnzj = quadcon.nnzj + nlcon.nnzj + oracles_data.nnzj_oracle, minimize = MOI.get(moimodel, MOI.ObjectiveSense()) == MOI.MIN_SENSE, islp = (obj.type == "LINEAR") && (nnln == 0) && (quadcon.nquad == 0), name = name, - jprod_available = isempty(oracles), - jtprod_available = isempty(oracles), - hprod_available = isempty(oracles) && hessian, + jprod_available = isempty(oracles_data.oracles), + jtprod_available = isempty(oracles_data.oracles), + hprod_available = isempty(oracles_data.oracles) && hessian, hess_available = hessian, ) - return MathOptNLPModel(meta, nlp_data.evaluator, lincon, quadcon, nlcon, oracles, λ, hv, obj, counters), + return MathOptNLPModel(meta, nlp_data.evaluator, lincon, quadcon, nlcon, oracles_data, λ, hv, obj, counters), index_map end @@ -128,8 +127,9 @@ function NLPModels.cons_nln!(nlp::MathOptNLPModel, x::AbstractVector, c::Abstrac end end if nlp.meta.nnln > nlp.quadcon.nquad - offset = nlp.quadcon.nquad - for (f, s) in nlp.oracles + offset = nlp.quadcon.nquad + nlp.nlcon.nnln + MOI.eval_constraint(nlp.eval, view(c, (nlp.quadcon.nquad + 1):(offset)), x) + for (f, s) in nlp.oracles_data.oracles for i in 1:s.set.input_dimension s.x[i] = x[f.variables[i].value] end @@ -138,8 +138,6 @@ function NLPModels.cons_nln!(nlp::MathOptNLPModel, x::AbstractVector, c::Abstrac s.set.eval_f(c_oracle, s.x) offset += s.set.output_dimension end - index_nnln = (offset + 1):(nlp.meta.nnln) - MOI.eval_constraint(nlp.eval, view(c, index_nnln), x) end return c end @@ -158,8 +156,9 @@ function NLPModels.cons!(nlp::MathOptNLPModel, x::AbstractVector, c::AbstractVec end end if nlp.meta.nnln > nlp.quadcon.nquad - offset = nlp.meta.nlin + nlp.quadcon.nquad - for (f, s) in nlp.oracles + offset = nlp.meta.nlin + nlp.quadcon.nquad + nlp.nlcon.nnln + MOI.eval_constraint(nlp.eval, view(c, (nlp.meta.nlin + nlp.quadcon.nquad + 1):(offset)), x) + for (f, s) in nlp.oracles_data.oracles for i in 1:s.set.input_dimension s.x[i] = x[f.variables[i].value] end @@ -168,8 +167,6 @@ function NLPModels.cons!(nlp::MathOptNLPModel, x::AbstractVector, c::AbstractVec s.set.eval_f(c_oracle, s.x) offset += s.set.output_dimension end - index_nnln = (offset + 1):(nlp.meta.ncon) - MOI.eval_constraint(nlp.eval, view(c, index_nnln), x) end end return c @@ -204,15 +201,26 @@ function NLPModels.jac_nln_structure!( end if nlp.meta.nnln > nlp.quadcon.nquad offset = nlp.quadcon.nnzj - # for (f, s) in nlp.oracles - # for (i, j) in s.set.jacobian_structure - # ... - # offset += 1 - # end - # end - ind_nnln = (offset + 1):(nlp.quadcon.nnzj + nlp.nlcon.nnzj) + # non-oracle nonlinear constraints + ind_nnln = (offset + 1):(offset + nlp.nlcon.nnzj) view(rows, ind_nnln) .= nlp.quadcon.nquad .+ nlp.nlcon.jac_rows view(cols, ind_nnln) .= nlp.nlcon.jac_cols + offset += nlp.nlcon.nnzj + # structure of oracle Jacobians + for (k, (f, s)) in enumerate(nlp.oracles_data.oracles) + # Shift row index by quadcon.nquad + # plus previous oracle outputs. + row_offset = nlp.quadcon.nquad + for i in 1:(k - 1) + row_offset += nlp.oracles_data.oracles[i][2].set.output_dimension + end + + for (r, c) in s.set.jacobian_structure + offset += 1 + rows[offset] = row_offset + r + cols[offset] = f.variables[c].value + end + end end return rows, cols end @@ -240,16 +248,22 @@ function NLPModels.jac_structure!( end end if nlp.meta.nnln > nlp.quadcon.nquad + # non-oracle nonlinear constraints offset = nlp.lincon.nnzj + nlp.quadcon.nnzj - # for (f, s) in nlp.oracles - # for (i, j) in s.set.jacobian_structure - # ... - # offset += 1 - # end - # end - ind_nnln = (offset + 1):(nlp.meta.nnzj) + ind_nnln = (offset + 1):(offset + nlp.nlcon.nnzj) view(rows, ind_nnln) .= nlp.meta.nlin .+ nlp.quadcon.nquad .+ nlp.nlcon.jac_rows view(cols, ind_nnln) .= nlp.nlcon.jac_cols + offset += nlp.nlcon.nnzj + # structure of oracle Jacobians + row_offset = nlp.meta.nlin + nlp.quadcon.nquad + nlp.nlcon.nnln + for (f, s) in nlp.oracles_data.oracles + for (i, j) in s.set.jacobian_structure + offset += 1 + rows[offset] = row_offset + i + cols[offset] = f.variables[j].value + end + row_offset += s.set.output_dimension + end end end return rows, cols @@ -290,16 +304,17 @@ function NLPModels.jac_nln_coord!(nlp::MathOptNLPModel, x::AbstractVector, vals: end if nlp.meta.nnln > nlp.quadcon.nquad offset = nlp.quadcon.nnzj - # for (f, s) in nlp.oracles - # for i in 1:s.set.input_dimension - # s.x[i] = x[f.variables[i].value] - # end - # nnz_oracle = length(s.set.jacobian_structure) - # s.set.eval_jacobian(view(values, (offset + 1):(oracle + nnz_oracle)), s.x) - # offset += nnz_oracle - # end - ind_nnln = (offset + 1):(nlp.quadcon.nnzj + nlp.nlcon.nnzj) + ind_nnln = (offset + 1):(offset + nlp.nlcon.nnzj) MOI.eval_constraint_jacobian(nlp.eval, view(vals, ind_nnln), x) + offset += nlp.nlcon.nnzj + for (f, s) in nlp.oracles_data.oracles + for i in 1:s.set.input_dimension + s.x[i] = x[f.variables[i].value] + end + nnz_oracle = length(s.set.jacobian_structure) + s.set.eval_jacobian(view(values, (offset + 1):(offset + nnz_oracle)), s.x) + offset += nnz_oracle + end end return vals end @@ -337,18 +352,20 @@ function NLPModels.jac_coord!(nlp::MathOptNLPModel, x::AbstractVector, vals::Abs end if nlp.meta.nnln > nlp.quadcon.nquad offset = nlp.lincon.nnzj + nlp.quadcon.nnzj - # for (f, s) in nlp.oracles - # for i in 1:s.set.input_dimension - # s.x[i] = x[f.variables[i].value] - # end - # nnz_oracle = length(s.set.jacobian_structure) - # s.set.eval_jacobian(view(values, (offset + 1):(oracle + nnz_oracle)), s.x) - # offset += nnz_oracle - # end - ind_nnln = (offset + 1):(nlp.meta.nnzj) + ind_nnln = (offset + 1):(offset + nlp.nlcon.nnzj) MOI.eval_constraint_jacobian(nlp.eval, view(vals, ind_nnln), x) + offset += nlp.nlcon.nnzj + for (f, s) in nlp.oracles_data.oracles + for i in 1:s.set.input_dimension + s.x[i] = x[f.variables[i].value] + end + nnz_oracle = length(s.set.jacobian_structure) + s.set.eval_jacobian(view(values, (offset + 1):(offset + nnz_oracle)), s.x) + offset += nnz_oracle + end end end + @assert offset == nlp.meta.nnzj return vals end @@ -516,12 +533,8 @@ function NLPModels.hess_structure!( view(rows, 1:(nlp.obj.nnzh)) .= nlp.obj.hessian.rows view(cols, 1:(nlp.obj.nnzh)) .= nlp.obj.hessian.cols end - if (nlp.obj.type == "NONLINEAR") || (nlp.meta.nnln > nlp.quadcon.nquad) - view(rows, (nlp.obj.nnzh + nlp.quadcon.nnzh + 1):(nlp.meta.nnzh)) .= nlp.nlcon.hess_rows - view(cols, (nlp.obj.nnzh + nlp.quadcon.nnzh + 1):(nlp.meta.nnzh)) .= nlp.nlcon.hess_cols - end + index = nlp.obj.nnzh if nlp.quadcon.nquad > 0 - index = nlp.obj.nnzh for i = 1:(nlp.quadcon.nquad) qcon = nlp.quadcon.constraints[i] view(rows, (index + 1):(index + qcon.nnzh)) .= qcon.A.rows @@ -529,13 +542,20 @@ function NLPModels.hess_structure!( index += qcon.nnzh end end - # if !isempty(nlp.oracles) - # for (f, s) in nlp.oracles - # for (i, j) in s.set.hessian_lagrangian_structure - # ... - # end - # end - # end + if (nlp.obj.type == "NONLINEAR") || (nlp.meta.nnln > nlp.quadcon.nquad) + view(rows, (index + 1):(index + nlp.nlcon.nnzh)) .= nlp.nlcon.hess_rows + view(cols, (index + 1):(index + nlp.nlcon.nnzh)) .= nlp.nlcon.hess_cols + index += nlp.nlcon.nnzh + end + if !isempty(nlp.oracles_data.oracles) + for (f, s) in nlp.oracles_data.oracles + for (i_local, j_local) in s.set.hessian_lagrangian_structure + index += 1 + rows[index] = f.variables[i_local].value + cols[index] = f.variables[j_local].value + end + end + end return rows, cols end @@ -547,9 +567,16 @@ function NLPModels.hess_coord!( obj_weight::Float64 = 1.0, ) increment!(nlp, :neval_hess) + + # Running index over Hessian nonzeros we've filled so far. + index = 0 + + # Objective Hessian block if nlp.obj.type == "QUADRATIC" view(vals, 1:(nlp.obj.nnzh)) .= obj_weight .* nlp.obj.hessian.vals + index += nlp.obj.nnzh end + # Nonlinear objective Hessian block if (nlp.obj.type == "NONLINEAR") || (nlp.meta.nnln > nlp.quadcon.nquad) λ = view(y, (nlp.meta.nlin + nlp.quadcon.nquad + 1):(nlp.meta.ncon)) MOI.eval_hessian_lagrangian( @@ -560,23 +587,42 @@ function NLPModels.hess_coord!( λ, ) end + # Quadratic constraint Hessian blocks if nlp.quadcon.nquad > 0 - index = nlp.obj.nnzh for i = 1:(nlp.quadcon.nquad) qcon = nlp.quadcon.constraints[i] view(vals, (index + 1):(index + qcon.nnzh)) .= y[nlp.meta.nlin + i] .* qcon.A.vals index += qcon.nnzh end end - # if !isempty(nlp.oracles) - # for (f, s) in nlp.oracles - # for i in 1:s.set.input_dimension - # s.x[i] = x[f.variables[i].value] - # end - # H_nnz = length(s.set.hessian_lagrangian_structure) - # ... - # end - # end + # Oracle Hessian blocks + if !isempty(nlp.oracles_data.oracles) + λ_oracle_all = view( + y, + (nlp.meta.nlin + nlp.quadcon.nquad + nlp.nlcon.nnln + 1): + (nlp.meta.nlin + nlp.quadcon.nquad + nlp.nlcon.nnln + nlp.oracles_data.noracle), + ) + + λ_offset = 0 + for (f, s) in nlp.oracles_data.oracles + # build local x for this oracle + for i in 1:s.set.input_dimension + s.x[i] = x[f.variables[i].value] + end + + nout = s.set.output_dimension + μ = view(λ_oracle_all, (λ_offset + 1):(λ_offset + nout)) + + H_nnz = length(s.set.hessian_lagrangian_structure) + ind = (index + 1):(index + H_nnz) + s.set.eval_hessian_lagrangian(view(vals, ind), s.x, μ) + + index += H_nnz + λ_offset += nout + end + end + + @assert index == nlp.meta.nnzh return vals end @@ -631,9 +677,55 @@ function NLPModels.jth_hess_coord!( ) nlp.λ[j - nlp.meta.nlin - nlp.quadcon.nquad] = 0.0 end - # if !isempty(nlp.oracles) - # ... - # end + # check for oracle constraints + if !isempty(nlp.oracles_data.oracles) + n_nl = nlp.nlcon.nnln + noracle = nlp.oracles_data.noracle + + # check if j is an oracle constraint index + first_oracle = nlp.meta.nlin + nlp.quadcon.nquad + n_nl + 1 + last_oracle = first_oracle + noracle - 1 + + if first_oracle ≤ j ≤ last_oracle + # local oracle constraint index k ∈ 1: noracle + k = j - (nlp.meta.nlin + nlp.quadcon.nquad + n_nl) + + # starting index of this oracle's Hessian block + index = nlp.obj.nnzh + nlp.quadcon.nnzh + nlp.nlcon.nnzh + + # walk through oracles to find which one owns component k + offset_outputs = 0 + for (f, s) in nlp.oracles_data.oracles + nout = s.set.output_dimension + H_nnz = length(s.set.hessian_lagrangian_structure) + + if k > offset_outputs + nout + # skip this oracle's block + index += H_nnz + offset_outputs += nout + continue + end + + # this oracle owns constraint k; local index ℓ + ℓ = k - offset_outputs + + # build local x + for i in 1:s.set.input_dimension + s.x[i] = x[f.variables[i].value] + end + + # local multipliers μ: one-hot at ℓ + μ = zeros(eltype(x), nout) + μ[ℓ] = 1.0 + + # fill this oracle's Hessian block + ind = (index + 1):(index + H_nnz) + s.set.eval_hessian_lagrangian(view(vals, ind), s.x, μ) + + break + end + end + end return vals end diff --git a/src/utils.jl b/src/utils.jl index 84fe4a7..54f29c0 100644 --- a/src/utils.jl +++ b/src/utils.jl @@ -84,7 +84,20 @@ mutable struct QuadraticConstraints nnzh::Int end +""" + NonLinearStructure + +Structure containing Jacobian and Hessian structures of nonlinear constraints: +- nnln: number of nonlinear constraints +- jac_rows: row indices of the Jacobian in Coordinate format (COO) format +- jac_cols: column indices of the Jacobian in COO format +- nnzj: number of non-zero entries in the Jacobian +- hess_rows: row indices of the Hessian in COO format +- hess_cols: column indices of the Hessian in COO format +- nnzh: number of non-zero entries in the Hessian +""" mutable struct NonLinearStructure + nnln::Int jac_rows::Vector{Int} jac_cols::Vector{Int} nnzj::Int @@ -107,6 +120,26 @@ mutable struct Objective nnzh::Int end +""" + OraclesData + +Structure containing nonlinear oracles data: +- oracles: vector of tuples (MOI.VectorOfVariables, _VectorNonlinearOracleCache) +- noracle: number of scalar constraints represented by all oracles +- l_oracle: lower bounds of oracle constraints +- u_oracle: upper bounds of oracle constraints +- nnzj_oracle: number of non-zero entries in the Jacobian of all oracles +- nnzh_oracle: number of non-zero entries in the Hessian of all oracles +""" +mutable struct OraclesData + oracles::Vector{Tuple{MOI.VectorOfVariables,_VectorNonlinearOracleCache}} + noracle::Int + l_oracle::Vector{Float64} + u_oracle::Vector{Float64} + nnzj_oracle::Int + nnzh_oracle::Int +end + """ replace!(ex, x) @@ -417,6 +450,10 @@ function parser_MOI(moimodel, index_map, nvar) contypes = MOI.get(moimodel, MOI.ListOfConstraintTypesPresent()) for (F, S) in contypes + # Ignore VectorNonlinearOracle here, we'll parse it separately + if F == MOI.VectorOfVariables && S <: MOI.VectorNonlinearOracle{Float64} + continue + end (F == VNF) && error( "The function $F is not supported. Please use `.<=`, `.==`, and `.>=` in your constraints to ensure compatibility with ScalarNonlinearFunction.", ) @@ -538,6 +575,12 @@ end parser_NL(nlp_data; hessian) Parse nonlinear constraints of an `nlp_data`. + +Returns: +- nnln: number of nonlinear constraints +- nlcon: NonLinearStructure containing Jacobian and Hessian structures +- nl_lcon: lower bounds of nonlinear constraints +- nl_ucon: upper bounds of nonlinear constraints """ function parser_NL(nlp_data; hessian::Bool = true) nnln = length(nlp_data.constraint_bounds) @@ -555,11 +598,52 @@ function parser_NL(nlp_data; hessian::Bool = true) hess_rows = hessian ? getindex.(hess, 1) : Int[] hess_cols = hessian ? getindex.(hess, 2) : Int[] nnzh = length(hess) - nlcon = NonLinearStructure(jac_rows, jac_cols, nnzj, hess_rows, hess_cols, nnzh) + nlcon = NonLinearStructure(nnln, jac_rows, jac_cols, nnzj, hess_rows, hess_cols, nnzh) return nnln, nlcon, nl_lcon, nl_ucon end +""" + parser_oracles(moimodel) + +Parse nonlinear oracles of a `MOI.ModelLike`. +""" +function parser_oracles(moimodel) + oracles = Tuple{MOI.VectorOfVariables,_VectorNonlinearOracleCache}[] + l_oracle = Float64[] + u_oracle = Float64[] + + # We know this pair exists from ListOfConstraintTypesPresent + for ci in MOI.get( + moimodel, + MOI.ListOfConstraintIndices{MOI.VectorOfVariables, MOI.VectorNonlinearOracle{Float64}}(), + ) + f = MOI.get(moimodel, MOI.ConstraintFunction(), ci) # ::MOI.VectorOfVariables + set = MOI.get(moimodel, MOI.ConstraintSet(), ci) # ::MOI.VectorNonlinearOracle{Float64} + + cache = _VectorNonlinearOracleCache(set) + push!(oracles, (f, cache)) + + # Bounds: MOI.VectorNonlinearOracle stores them internally (l, u) + append!(l_oracle, set.l) + append!(u_oracle, set.u) + end + + # Sizes: number of scalar constraints represented by all oracles + noracle = length(l_oracle) + + # Sparsity: + nnzj_oracle = 0 + nnzh_oracle = 0 + for (_, cache) in oracles + nnzj_oracle += length(cache.set.jacobian_structure) + # there may or may not be Hessian info + nnzh_oracle += length(cache.set.hessian_lagrangian_structure) + end + + return OraclesData(oracles, noracle, l_oracle, u_oracle, nnzj_oracle, nnzh_oracle) +end + """ parser_variables(model) @@ -751,6 +835,7 @@ function parser_nonlinear_expression(cmodel, nvar, F; hessian::Bool = true) # Nonlinear least squares model F_is_array_of_containers = F isa Array{<:AbstractArray} + nnlnequ = 0 if F_is_array_of_containers nnlnequ = sum(sum(isa(Fi, NLE) for Fi in FF) for FF in F) if nnlnequ > 0 @@ -787,7 +872,7 @@ function parser_nonlinear_expression(cmodel, nvar, F; hessian::Bool = true) Fhess_cols = hessian && Feval ≠ nothing ? getindex.(Fhess, 2) : Int[] nl_Fnnzh = length(Fhess) - nlequ = NonLinearStructure(Fjac_rows, Fjac_cols, nl_Fnnzj, Fhess_rows, Fhess_cols, nl_Fnnzh) + nlequ = NonLinearStructure(nnlnequ, Fjac_rows, Fjac_cols, nl_Fnnzj, Fhess_rows, Fhess_cols, nl_Fnnzh) return Feval, nlequ, nnlnequ end