From a30f62e4a7222821b79d4ce1d4b06c633f9bca82 Mon Sep 17 00:00:00 2001 From: Manuel Berkemeier Date: Fri, 13 Sep 2024 11:52:34 +0200 Subject: [PATCH] safe-guarding + logging and trial fixes --- src/algorithm_options.jl | 8 +++- src/evaluators/RBFModels/update.jl | 14 +++++-- src/set_algo.jl | 4 +- src/steepest_descent.jl | 21 +++++----- src/steps.jl | 21 +++++++--- src/trial.jl | 65 ++++++++++++++++++++++++++---- 6 files changed, 100 insertions(+), 33 deletions(-) diff --git a/src/algorithm_options.jl b/src/algorithm_options.jl index fda8201..5a725b8 100644 --- a/src/algorithm_options.jl +++ b/src/algorithm_options.jl @@ -53,7 +53,7 @@ Base.@kwdef struct AlgorithmOptions{T <: AbstractFloat, SC, SCALER_CFG_TYPE} "Trust region shrinking factor in criticality loops." crit_alpha :: T = 0.1 - backtrack_in_crit_routine :: Bool = true + backtrack_in_crit_routine :: Bool = false # initialization "Initial trust region radius." @@ -75,7 +75,9 @@ Base.@kwdef struct AlgorithmOptions{T <: AbstractFloat, SC, SCALER_CFG_TYPE} "Acceptance threshold." nu_accept :: T = 1e-2 # 1e-2 is suggested by Fletcher et. al. "Success threshold." - nu_success :: T = 0.9 # 0.9 is suggested by Fletcher et. al. + nu_success :: T = 0.9 # 0.9 is suggested by Fletcher et. al. + + trial_update :: Union{Val{:classic}, Val{:stepsize}} = Val(:classic) # compatibilty parameters "Factor for normal step compatibility test. The smaller `c_delta`, the stricter the test." @@ -125,6 +127,7 @@ function AlgorithmOptions( trial_mode, nu_accept, nu_success, + trial_update, c_delta, c_mu, mu, @@ -165,6 +168,7 @@ function AlgorithmOptions( trial_mode, nu_accept, nu_success, + trial_update, c_delta, c_mu, mu, diff --git a/src/evaluators/RBFModels/update.jl b/src/evaluators/RBFModels/update.jl index 5a64abf..01659c6 100644 --- a/src/evaluators/RBFModels/update.jl +++ b/src/evaluators/RBFModels/update.jl @@ -55,7 +55,7 @@ function update_rbf_model!( rbf, Δ, x0, fx0, global_lb, global_ub; delta_max, norm_p, log_level, indent ) - @ignoraise n_X = eval_missing_values!(rbf, op, x0, n_X) + @ignoraise n_X = eval_missing_values!(rbf, op, x0, n_X, global_lb, global_ub) if n_X < rbf.min_points @warn "$(pad_str)Cannot make a fully linear RBF model." @@ -435,15 +435,20 @@ function model_shape_parameter(rbf::RBFModel, Δ) return _shape_parameter(rbf.kernel) end -function eval_missing_values!(rbf, op, x0, n_X; ix1 = 2) +function eval_missing_values!(rbf, op, x0, n_X, global_lb=nothing, global_ub=nothing; ix1 = 2) @unpack buffers, params = rbf @unpack X = params @unpack db_index, sorting_flags, FX, xZ, fxZ = buffers - return eval_missing_values!(X, FX, xZ, fxZ, db_index, sorting_flags, ix1, n_X, op, x0) + return eval_missing_values!( + X, FX, xZ, fxZ, db_index, sorting_flags, ix1, n_X, op, x0, global_lb, global_ub + ) end -@views function eval_missing_values!(X, FX, xtmp, fxtmp, db_index, sorting_flags, istart, iend, op, x0) +@views function eval_missing_values!( + X, FX, xtmp, fxtmp, db_index, sorting_flags, istart, iend, op, x0, + global_lb=nothing, global_ub=nothing +) # `presort_eval_arrays!` will sort columns `istart:iend` in `X`, `FX` and `db_index` such that # indices `istart:i0` have database values already. @@ -457,6 +462,7 @@ end _FX = FX[:, i0:iend] _X .+= x0 # for evaluation, undo shift + project_into_box!.(eachcol(_X), Ref(global_lb), Ref(global_ub)) _FX .= NaN @ignoraise func_vals!(_FX, op, _X) diff --git a/src/set_algo.jl b/src/set_algo.jl index 8fee1b3..1168fbc 100644 --- a/src/set_algo.jl +++ b/src/set_algo.jl @@ -314,7 +314,7 @@ function step_tangentially!(sol, ndset, optimizer_caches, algo_opts; indent=0) @logmsg log_level "* Computing a descent step." - do_descent_step!( + @ignoraise do_descent_step!( step_cache, step_vals, delta, mop, mod, scaler, lin_cons, scaled_cons, vals, mod_vals; log_level ) @@ -944,7 +944,7 @@ function _test_trial_point!( is_good_trial_point = true delta_child = delta_new = gamma_shrink * delta - rho_success = minimum( diff_fx[is_pos] ./ diff_fx_mod[is_pos]; init=-Inf ) + rho_success = !any(is_pos) ? -Inf : minimum( diff_fx[is_pos] ./ diff_fx_mod[is_pos] ) if rho_success >= nu_success delta_child = min(delta_max, gamma_grow * delta) end diff --git a/src/steepest_descent.jl b/src/steepest_descent.jl index 5fbbe64..8eebea9 100644 --- a/src/steepest_descent.jl +++ b/src/steepest_descent.jl @@ -226,7 +226,7 @@ function compute_normal_step!( ) Base.copyto!(step_vals.n, n) catch err - @warn "Error in normal step computation." exception=(err, catch_backtrace()) + @warn "Error in normal step computation." exception=err #(err, catch_backtrace()) step_vals.n .= NaN end @@ -287,11 +287,11 @@ function finalize_step_vals!( @unpack lb_tr, ub_tr, Axn, Dgx_n = step_cache trust_region_bounds!(lb_tr, ub_tr, x, Δ, lb, ub) _, σ = initial_steplength(xn, d, lb_tr, ub_tr, Axn, A, gx, Dgx_n, Dgx) - if isnan(σ) || σ <= 0 - _χ = 0 + #src #del _χ = 0 + _χ = step_vals.crit_ref[] d .= 0 - else + else @unpack fxn, fx_tmp, rhs_factor, backtracking_mode, backtracking_factor, gxn, hxn = step_cache @unpack xs = step_vals χ = step_vals.crit_ref[] @@ -303,9 +303,7 @@ function finalize_step_vals!( ) end step_vals.crit_ref[] = _χ - @. step_vals.s = step_vals.n + d - @. step_vals.xs = step_vals.xn + d - @ignoraise objectives!(step_vals.fxs, mod, step_vals.xn) + return nothing end @@ -417,7 +415,7 @@ function solve_steepest_descent_problem( χ = -min(0, maximum(dir'Dfx)) catch err - @error "Exception in Descent Step Computation." exception=(err, catch_backtrace()) + @warn "Exception in Descent Step Computation." exception=err χ = 0 dir = zero(xn) end @@ -496,7 +494,7 @@ function backtrack!( if set_to_zero d .= 0 - return 0 + return χ end ## pre-compute RHS for Armijo test @@ -549,7 +547,6 @@ function backtrack!( if set_to_zero d .= 0 - return 0 end return χ @@ -649,7 +646,6 @@ function initial_steplength( T = eltype(xn) σ_min, σ_max = intersect_box(xn, d, lb_tr, ub_tr) - if !isnothing(A) for (i, ai) = enumerate(eachrow(A)) # Axn + σ * A*d .<= b @@ -659,7 +655,8 @@ function initial_steplength( end for (i, dgi) = enumerate(eachcol(Dgx)) # gx + Dgx_n + σ * Dgx * d .<= 0 - σl, σr = intersect_bound(gx[i] + Dgx_n[i], LA.dot(dgi, d), 0, T) + #src #del σl, σr = intersect_bound(@show(gx[i] + Dgx_n[i]), LA.dot(dgi, d), 0, T) + σl, σr = intersect_bound(Dgx_n[i], LA.dot(dgi, d), 0, T) σ_min, σ_max = intersect_intervals(σ_min, σ_max, σl, σr, T) end return σ_min, σ_max diff --git a/src/steps.jl b/src/steps.jl index 89f8ad7..adb8a5c 100644 --- a/src/steps.jl +++ b/src/steps.jl @@ -12,7 +12,7 @@ function compute_normal_step!( log_level ) ## set `step_vals.n` to hold normal step - nothing + return error("`compute_normal_step!` not yet implemented.") end function compute_descent_step!( @@ -22,7 +22,7 @@ function compute_descent_step!( ) ## set `step_vals.d` to hold descent step ## set `step_vals.crit_ref` to hold criticality measure - return nothing + return error("`compute_descent_step!` not yet implemented.") end function do_normal_step!( @@ -39,6 +39,9 @@ function do_normal_step!( scaled_cons, vals, mod_vals; log_level ) step_vals.xn .= cached_x(vals) .+ step_vals.n + project_into_box!(step_vals.xn, scaled_cons.lb, scaled_cons.ub) + step_vals.n .= step_vals.xn .- cached_x(vals) + @logmsg log_level """ $(pad_str) Found normal step $(pretty_row_vec(step_vals.n; cutoff=60)). $(pad_str) \t Hence xn=$(pretty_row_vec(step_vals.xn; cutoff=60)).""" @@ -56,10 +59,6 @@ function finalize_step_vals!( log_level ) ## find stepsize and scale `step_vals.d` - ## set `step_vals.s`, `step_vals.xs`, `step_vals.fxs` - @. step_vals.s = step_vals.n + step_vals.d - step_vals.xs .= cached_x(vals) .+ step_vals.s - @ignoraise objectives!(step_vals.fxs, mod, step_vals.xs) return nothing end @@ -77,6 +76,16 @@ function do_descent_step!( step_cache, step_vals, Δ, mop, mod, scaler, lin_cons, scaled_cons, vals, mod_vals; log_level ) + + @. step_vals.s = step_vals.n + step_vals.d + @. step_vals.xs = step_vals.xn + step_vals.d + + project_into_box!(step_vals.xs, scaled_cons.lb, scaled_cons.ub) + step_vals.s .= step_vals.xs .- cached_x(vals) + step_vals.d .= step_vals.s .- step_vals.n + + @ignoraise objectives!(step_vals.fxs, mod, step_vals.xn) + end return nothing end \ No newline at end of file diff --git a/src/trial.jl b/src/trial.jl index 4709041..04afe57 100644 --- a/src/trial.jl +++ b/src/trial.jl @@ -38,7 +38,8 @@ function test_trial_point!( delta = iteration_scalars.delta @unpack gamma_grow, gamma_shrink, gamma_shrink_much, delta_max = algo_opts delta_new, radius_update = _update_radius( - delta, + algo_opts.trial_update, + delta, LA.norm(step_vals.s, Inf), iteration_classification, rho, rho_classification, gamma_grow, gamma_shrink, gamma_shrink_much, delta_max, nu_accept, nu_success @@ -163,7 +164,7 @@ function _trial_analysis( fx, fxs, fx_mod, fxs_mod, diff_fx, diff_fx_mod, f_step_test_rhs ) - return __trial_analysis(<=, Inf, fxs, fxs_mod, diff_fx, diff_fx_mod, f_step_test_rhs) + return __trial_analysis(<=, Inf, fx, fxs, fxs_mod, diff_fx, diff_fx_mod, f_step_test_rhs) end function _trial_analysis( @@ -171,12 +172,12 @@ function _trial_analysis( fx, fxs, fx_mod, fxs_mod, diff_fx, diff_fx_mod, f_step_test_rhs ) - return __trial_analysis(>=, -Inf, fxs, fxs_mod, diff_fx, diff_fx_mod, f_step_test_rhs) + return __trial_analysis(>=, -Inf, fx, fxs, fxs_mod, diff_fx, diff_fx_mod, f_step_test_rhs) end function __trial_analysis( comp_op, rho0, - fxs, fxs_mod, diff_fx, diff_fx_mod, + fx, fxs, fxs_mod, diff_fx, diff_fx_mod, f_step_test_rhs ) ## following suggestions from “Trust Region Methods” by Conn et. al., @@ -200,10 +201,9 @@ function __trial_analysis( else diff_func / diff_mod end - rho_i = fx_i / mx_i if comp_op(rho_i , rho) _i = i - rho = rhoi + rho = rho_i end end @@ -238,9 +238,60 @@ function _log_radius_update(delta, delta_new, radius_update; indent, log_level) end return nothing end +function _update_radius( + ::Val{:stepsize}, + delta, len_s, + iteration_classification, rho, rho_classification, + gamma_grow, gamma_shrink, gamma_shrink_much, delta_max, + nu_accept, nu_success +) + radius_update = nothing + if iteration_classification == IT_F_STEP + #iteration_classification == IT_THETA_STEP + if rho_classification == RHO_ACCEPT + gamma_factor = gamma_shrink + if nu_success > nu_accept + rho_frac = (rho - nu_accept) / (nu_success - nu_accept) + gamma_factor += rho_frac * (1 - gamma_shrink) + end + delta_new = max( + gamma_factor * delta, + len_s + ) + radius_update = RADIUS_SHRINK + elseif rho_classification == RHO_SUCCESS + delta_new = min( + delta_max, + max( + gamma_grow * len_s, + (1 + (1 - gamma_grow)/2) * delta + ) + ) + if delta_new > delta + radius_update = RADIUS_GROW + else + radius_update = RADIUS_GROW_FAIL + end + end + elseif iteration_classification == IT_THETA_STEP + delta_new = delta + radius_update = RADIUS_NO_CHANGE + end + + if isnothing(radius_update) + if rho >= 0 + delta_new = len_s * gamma_shrink + else + delta_new = len_s * gamma_shrink_much + end + radius_update = RADIUS_SHRINK + end + return delta_new, radius_update +end function _update_radius( - delta, + ::Val{:classic}, + delta, len_s, iteration_classification, rho, rho_classification, gamma_grow, gamma_shrink, gamma_shrink_much, delta_max, nu_accept, nu_success