diff --git a/src/evaluator.jl b/src/evaluator.jl index 1d2e5a7..6b8d0d7 100644 --- a/src/evaluator.jl +++ b/src/evaluator.jl @@ -60,6 +60,20 @@ function MOI.eval_constraint(evaluator::Evaluator, g, x) return end +function eval_residual!(evaluator::Evaluator, F, x) + return eval_residual!(evaluator.backend, F, x) +end + +function eval_residual_jtprod!(evaluator::Evaluator, Jtv, x, v) + return eval_residual_jtprod!(evaluator.backend, Jtv, x, v) +end + +function eval_residual_jprod!(evaluator::Evaluator, Jv, x, v) + return eval_residual_jprod!(evaluator.backend, Jv, x, v) +end + +residual_dimension(evaluator::Evaluator) = residual_dimension(evaluator.backend) + function MOI.jacobian_structure(evaluator::Evaluator) return MOI.jacobian_structure(evaluator.backend) end diff --git a/src/mathoptinterface_api.jl b/src/mathoptinterface_api.jl index 1278fff..3043176 100644 --- a/src/mathoptinterface_api.jl +++ b/src/mathoptinterface_api.jl @@ -36,6 +36,7 @@ function MOI.initialize(d::NLPEvaluator, requested_features::Vector{Symbol}) largest_user_input_dimension = max(largest_user_input_dimension, op.N) end d.objective = nothing + d.residual = nothing d.user_output_buffer = zeros(largest_user_input_dimension) d.jac_storage = zeros(max(N, largest_user_input_dimension)) d.constraints = _FunctionStorage[] @@ -47,6 +48,9 @@ function MOI.initialize(d::NLPEvaluator, requested_features::Vector{Symbol}) max_expr_with_sub_length = 0 # main_expressions = [c.expression.nodes for (_, c) in d.data.constraints] + if d.data.residual !== nothing + pushfirst!(main_expressions, something(d.data.residual).nodes) + end if d.data.objective !== nothing pushfirst!(main_expressions, something(d.data.objective).nodes) end @@ -102,7 +106,9 @@ function MOI.initialize(d::NLPEvaluator, requested_features::Vector{Symbol}) end max_chunk = 1 shared_partials_storage_ϵ = Float64[] + main_offset = 0 if d.data.objective !== nothing + main_offset += 1 expr = something(d.data.objective) subexpr, linearity = _subexpression_and_linearity( expr, @@ -116,7 +122,7 @@ function MOI.initialize(d::NLPEvaluator, requested_features::Vector{Symbol}) coloring_storage, d.want_hess, d.subexpressions, - individual_order[1], + individual_order[main_offset], subexpression_edgelist, subexpression_variables, linearity, @@ -125,8 +131,32 @@ function MOI.initialize(d::NLPEvaluator, requested_features::Vector{Symbol}) max_chunk = max(max_chunk, size(objective.seed_matrix, 2)) d.objective = objective end + if d.data.residual !== nothing + main_offset += 1 + expr = something(d.data.residual) + subexpr, linearity = _subexpression_and_linearity( + expr, + moi_index_to_consecutive_index, + shared_partials_storage_ϵ, + d, + ) + residual = _FunctionStorage( + subexpr, + N, + coloring_storage, + d.want_hess, + d.subexpressions, + individual_order[main_offset], + subexpression_edgelist, + subexpression_variables, + linearity, + ) + max_expr_length = max(max_expr_length, length(expr.nodes)) + max_chunk = max(max_chunk, size(residual.seed_matrix, 2)) + d.residual = residual + end for (k, (_, constraint)) in enumerate(d.data.constraints) - idx = d.data.objective !== nothing ? k + 1 : k + idx = main_offset + k expr = constraint.expression subexpr, linearity = _subexpression_and_linearity( expr, @@ -211,6 +241,110 @@ function MOI.eval_constraint(d::NLPEvaluator, g, x) return end +# Forward-evaluate subexpressions and the residual at `x`. +function _forward_pass_residual!(d::NLPEvaluator, x) + for k in d.subexpression_order + d.subexpression_forward_values[k] = + _forward_eval(d.subexpressions[k], d, x) + end + _forward_eval(something(d.residual).expr, d, x) + return +end + +# Read the residual values from forward storage into `F`. +function _read_residual!(F::AbstractVector, d::NLPEvaluator) + res = something(d.residual) + range = _storage_range(res.expr.sizes, 1) + @assert length(F) == length(range) + for (i, j) in enumerate(range) + F[i] = res.expr.forward_storage[j] + end + return +end + +""" + eval_residual!(d::NLPEvaluator, F, x) + +Forward-evaluate the residual at `x` and write the values into `F`. +""" +function eval_residual!(d::NLPEvaluator, F::AbstractVector, x::AbstractVector) + @assert !isnothing(d.residual) + _forward_pass_residual!(d, x) + _read_residual!(F, d) + return F +end + +""" + eval_residual_jtprod!(d::NLPEvaluator, Jtv, x, v) + +Compute `J(x)' * v` for the residual `F`, writing the result into `Jtv`. +This is one reverse-mode pass with `v` as the seed at the residual root. +""" +function eval_residual_jtprod!( + d::NLPEvaluator, + Jtv::AbstractVector, + x::AbstractVector, + v::AbstractVector, +) + @assert !isnothing(d.residual) + res = something(d.residual) + _forward_pass_residual!(d, x) + for k in d.subexpression_order + _reverse_eval(d.subexpressions[k]) + end + _reverse_eval(res.expr, v) + fill!(Jtv, 0.0) + _extract_reverse_pass(Jtv, d, res) + return Jtv +end + +""" + residual_dimension(d::NLPEvaluator) -> Int + +Number of residual components (i.e. the length of the residual vector). +""" +function residual_dimension(d::NLPEvaluator) + return length(_storage_range(something(d.residual).expr.sizes, 1)) +end + +""" + eval_residual_jprod!(d::NLPEvaluator, Jv, x, v) + +Compute `J(x) * v` for the residual `F`. ArrayDiff doesn't yet have a +forward-mode JVP, so this materializes the Jacobian via `nresid` reverse-mode +passes (each with a unit-basis seed) and then takes `J * v`. +""" +function eval_residual_jprod!( + d::NLPEvaluator, + Jv::AbstractVector, + x::AbstractVector, + v::AbstractVector, +) + @assert !isnothing(d.residual) + res = something(d.residual) + nresid = residual_dimension(d) + _forward_pass_residual!(d, x) + for k in d.subexpression_order + _reverse_eval(d.subexpressions[k]) + end + seed = zeros(Float64, nresid) + row = zeros(Float64, length(v)) + fill!(Jv, 0.0) + for i in 1:nresid + fill!(seed, 0.0) + seed[i] = 1.0 + _reverse_eval(res.expr, seed) + fill!(row, 0.0) + _extract_reverse_pass(row, d, res) + s = 0.0 + for j in eachindex(v) + s += row[j] * v[j] + end + Jv[i] = s + end + return Jv +end + function MOI.jacobian_structure(d::NLPEvaluator) J = Tuple{Int64,Int64}[] for (row, constraint) in enumerate(d.constraints) diff --git a/src/model.jl b/src/model.jl index 6010ae6..ce775ea 100644 --- a/src/model.jl +++ b/src/model.jl @@ -6,6 +6,11 @@ function set_objective(model::Model, obj) return end +function set_residual!(model::Model, residual) + model.residual = parse_expression(model, residual) + return +end + function add_constraint( model::Model{T}, func, diff --git a/src/reverse_mode.jl b/src/reverse_mode.jl index 159c092..3a1dfab 100644 --- a/src/reverse_mode.jl +++ b/src/reverse_mode.jl @@ -568,7 +568,9 @@ function _forward_eval( f.partials_storage[rhs] = zero(T) end end - @assert f.sizes.ndims[1] == 0 "Final result must be scalar, got ndims = $(f.sizes.ndims[1])" + # Caller is responsible for reading the right range of `f.forward_storage` + # for vector-valued roots (use `_storage_range(f.sizes, 1)`); the scalar + # return is only meaningful when the root is scalar. return f.forward_storage[1] end @@ -580,15 +582,26 @@ Reverse-mode evaluation of an expression tree given in `f`. * This function assumes `f.partials_storage` is already updated. * This function assumes that `f.reverse_storage` has been initialized with 0.0. """ -function _reverse_eval(f::_SubexpressionStorage) +function _reverse_eval( + f::_SubexpressionStorage, + seed::Union{Nothing,AbstractVector{Float64}} = nothing, +) @assert length(f.reverse_storage) >= _length(f.sizes) @assert length(f.partials_storage) >= _length(f.sizes) # f.nodes is already in order such that parents always appear before # children so a forward pass through nodes is a backwards pass through the # tree. children_arr = SparseArrays.rowvals(f.adj) - for i in _storage_range(f.sizes, 1) - f.reverse_storage[i] = one(Float64) + root_range = _storage_range(f.sizes, 1) + if seed === nothing + for i in root_range + f.reverse_storage[i] = one(Float64) + end + else + @assert length(seed) == length(root_range) + for (j, i) in enumerate(root_range) + f.reverse_storage[i] = seed[j] + end end for k in 1:length(f.nodes) node = f.nodes[k] diff --git a/src/types.jl b/src/types.jl index d3601fb..58358b2 100644 --- a/src/types.jl +++ b/src/types.jl @@ -194,6 +194,9 @@ It has the following fields: """ mutable struct Model{T} objective::Union{Nothing,Expression{T}} + # Vector residual for nonlinear least-squares objectives. When set, callers + # can query its value, `J*v`, `J'*v`, etc. via the evaluator. + residual::Union{Nothing,Expression{T}} expressions::Vector{Expression{T}} constraints::OrderedCollections.OrderedDict{ConstraintIndex,Constraint{T}} parameters::Vector{T} @@ -202,6 +205,7 @@ mutable struct Model{T} last_constraint_index::Int64 function Model{T}() where {T} return new{T}( + nothing, nothing, Expression{T}[], OrderedCollections.OrderedDict{ConstraintIndex,Constraint{T}}(), @@ -294,6 +298,7 @@ mutable struct NLPEvaluator <: MOI.AbstractNLPEvaluator ordered_variables::Vector{MOI.VariableIndex} objective::Union{Nothing,_FunctionStorage} + residual::Union{Nothing,_FunctionStorage} constraints::Vector{_FunctionStorage} subexpressions::Vector{_SubexpressionStorage} subexpression_order::Vector{Int} diff --git a/test/ArrayDiff.jl b/test/ArrayDiff.jl index 06fc357..1a888e7 100644 --- a/test/ArrayDiff.jl +++ b/test/ArrayDiff.jl @@ -787,6 +787,36 @@ function test_model_typed_float32_evaluator_runs() return end +function test_residual_with_subexpression() + # Residual references a subexpression `e = x1 * x2`, so the evaluator's + # subexpression-iteration loops in `_forward_pass_residual!` and + # `eval_residual_jprod!` are exercised. + model = ArrayDiff.Model() + x1 = MOI.VariableIndex(1) + x2 = MOI.VariableIndex(2) + e = ArrayDiff.add_expression(model, :($x1 * $x2)) + # F = [x1 + e, x2 - e] + ArrayDiff.set_residual!(model, :([$x1 + $e, $x2 - $e])) + evaluator = ArrayDiff.Evaluator(model, ArrayDiff.Mode(), [x1, x2]) + MOI.initialize(evaluator, [:Grad, :Jac, :JacVec]) + @test ArrayDiff.residual_dimension(evaluator) == 2 + x = [3.0, 4.0] + # e = 12, F = [3 + 12, 4 - 12] = [15, -8] + F = zeros(2) + ArrayDiff.eval_residual!(evaluator, F, x) + @test F == [15.0, -8.0] + # J = [1+x2 x1 ; -x2 1-x1] = [5 3 ; -4 -2] + Jtv = zeros(2) + ArrayDiff.eval_residual_jtprod!(evaluator, Jtv, x, [1.0, 1.0]) + @test Jtv == [1.0, 1.0] + Jv = zeros(2) + ArrayDiff.eval_residual_jprod!(evaluator, Jv, x, [1.0, 0.0]) + @test Jv == [5.0, -4.0] + ArrayDiff.eval_residual_jprod!(evaluator, Jv, x, [0.0, 1.0]) + @test Jv == [3.0, -2.0] + return +end + end # module TestArrayDiff.runtests() diff --git a/test/NLPModelsJuMP.jl b/test/NLPModelsJuMP.jl index 6c14615..8e8dc65 100644 --- a/test/NLPModelsJuMP.jl +++ b/test/NLPModelsJuMP.jl @@ -19,12 +19,12 @@ function runtests() return end -function test_neural_nlpmodels_jump() +function _test_neural_nlpmodels_jump(solver) n = 2 X = [1.0 0.5; 0.3 0.8] target = [0.5 0.2; 0.1 0.7] model = Model(NLPModelsJuMP.Optimizer) - set_attribute(model, "solver", JSOSolvers.LBFGSSolver) + set_attribute(model, "solver", solver) set_attribute( model, MOI.AutomaticDifferentiationBackend(), @@ -48,6 +48,18 @@ function test_neural_nlpmodels_jump() return end +function test_neural_lbfgs() + return _test_neural_nlpmodels_jump(JSOSolvers.LBFGSSolver) +end + +function test_neural_trunkls() + return _test_neural_nlpmodels_jump(JSOSolvers.TrunkSolverNLS) +end + +function test_neural_tronls() + return _test_neural_nlpmodels_jump(JSOSolvers.TronSolverNLS) +end + end TestWithNLPModelsJuMP.runtests()