safe-guarding + logging and trial fixes

manuelbb-upb · Sep 13, 2024 · a30f62e · a30f62e
1 parent d4080fd
commit a30f62e
Showing 6 changed files with 100 additions and 33 deletions.
diff --git a/src/algorithm_options.jl b/src/algorithm_options.jl
@@ -53,7 +53,7 @@ Base.@kwdef struct AlgorithmOptions{T <: AbstractFloat, SC, SCALER_CFG_TYPE}
 	"Trust region shrinking factor in criticality loops."
 	crit_alpha :: T = 0.1
 
-	backtrack_in_crit_routine :: Bool = true
+	backtrack_in_crit_routine :: Bool = false
 
 	# initialization
 	"Initial trust region radius."
@@ -75,7 +75,9 @@ Base.@kwdef struct AlgorithmOptions{T <: AbstractFloat, SC, SCALER_CFG_TYPE}
 	"Acceptance threshold."
 	nu_accept :: T = 1e-2 			# 1e-2 is suggested by Fletcher et. al. 
 	"Success threshold."
-	nu_success :: T = 0.9 			# 0.9 is suggested by Fletcher et. al. 
+	nu_success :: T = 0.9 			# 0.9 is suggested by Fletcher et. al.
+
+	trial_update :: Union{Val{:classic}, Val{:stepsize}} = Val(:classic)
 
 	# compatibilty parameters
 	"Factor for normal step compatibility test. The smaller `c_delta`, the stricter the test."
@@ -125,6 +127,7 @@ function AlgorithmOptions(
 	trial_mode,
 	nu_accept,
 	nu_success,
+	trial_update,
 	c_delta,
 	c_mu,
 	mu,
@@ -165,6 +168,7 @@ function AlgorithmOptions(
 		trial_mode,
 		nu_accept,
 		nu_success,
+		trial_update,
 		c_delta,
 		c_mu,
 		mu,

diff --git a/src/evaluators/RBFModels/update.jl b/src/evaluators/RBFModels/update.jl
@@ -55,7 +55,7 @@ function update_rbf_model!(
             rbf, Δ, x0, fx0, global_lb, global_ub; 
             delta_max, norm_p, log_level, indent
         )
-        @ignoraise n_X = eval_missing_values!(rbf, op, x0, n_X)
+        @ignoraise n_X = eval_missing_values!(rbf, op, x0, n_X, global_lb, global_ub)
 
         if n_X < rbf.min_points
             @warn "$(pad_str)Cannot make a fully linear RBF model."
@@ -435,15 +435,20 @@ function model_shape_parameter(rbf::RBFModel, Δ)
     return _shape_parameter(rbf.kernel)
 end
 
-function eval_missing_values!(rbf, op, x0, n_X; ix1 = 2)
+function eval_missing_values!(rbf, op, x0, n_X, global_lb=nothing, global_ub=nothing; ix1 = 2)
     @unpack buffers, params = rbf
     @unpack X = params
     @unpack db_index, sorting_flags, FX, xZ, fxZ = buffers
 
-    return eval_missing_values!(X, FX, xZ, fxZ, db_index, sorting_flags, ix1, n_X, op, x0)
+    return eval_missing_values!(
+        X, FX, xZ, fxZ, db_index, sorting_flags, ix1, n_X, op, x0, global_lb, global_ub
+    )
 end
 
-@views function eval_missing_values!(X, FX, xtmp, fxtmp, db_index, sorting_flags, istart, iend, op, x0)
+@views function eval_missing_values!(
+    X, FX, xtmp, fxtmp, db_index, sorting_flags, istart, iend, op, x0,
+    global_lb=nothing, global_ub=nothing    
+)
 
     # `presort_eval_arrays!` will sort columns `istart:iend` in `X`, `FX` and `db_index` such that
     # indices `istart:i0` have database values already.
@@ -457,6 +462,7 @@ end
     _FX = FX[:, i0:iend]
 
     _X .+= x0       # for evaluation, undo shift
+    project_into_box!.(eachcol(_X), Ref(global_lb), Ref(global_ub))
     _FX .= NaN
 
     @ignoraise func_vals!(_FX, op, _X)

diff --git a/src/set_algo.jl b/src/set_algo.jl
@@ -314,7 +314,7 @@ function step_tangentially!(sol, ndset, optimizer_caches, algo_opts; indent=0)
 
     @logmsg log_level "* Computing a descent step."
 
-    do_descent_step!(
+    @ignoraise do_descent_step!(
         step_cache, step_vals, delta, mop, mod, scaler, lin_cons, scaled_cons, vals, mod_vals;
         log_level
     )
@@ -944,7 +944,7 @@ function _test_trial_point!(
                     is_good_trial_point = true
 
                     delta_child = delta_new = gamma_shrink * delta
-                    rho_success = minimum( diff_fx[is_pos] ./ diff_fx_mod[is_pos]; init=-Inf )
+                    rho_success = !any(is_pos) ? -Inf : minimum( diff_fx[is_pos] ./ diff_fx_mod[is_pos] )
                     if rho_success >= nu_success
                         delta_child = min(delta_max, gamma_grow * delta)
                     end

diff --git a/src/steepest_descent.jl b/src/steepest_descent.jl
@@ -226,7 +226,7 @@ function compute_normal_step!(
         )
         Base.copyto!(step_vals.n, n)
     catch err
-        @warn "Error in normal step computation." exception=(err, catch_backtrace())
+        @warn "Error in normal step computation." exception=err #(err, catch_backtrace())
         step_vals.n .= NaN
     end
 
@@ -287,11 +287,11 @@ function finalize_step_vals!(
     @unpack lb_tr, ub_tr, Axn, Dgx_n = step_cache
     trust_region_bounds!(lb_tr, ub_tr, x, Δ, lb, ub)
     _, σ = initial_steplength(xn, d, lb_tr, ub_tr, Axn, A, gx, Dgx_n, Dgx)
-
     if isnan(σ) || σ <= 0
-        _χ = 0
+        #src #del _χ = 0
+        _χ = step_vals.crit_ref[]
         d .= 0
-    else    
+    else
         @unpack fxn, fx_tmp, rhs_factor, backtracking_mode, backtracking_factor, gxn, hxn = step_cache
         @unpack xs = step_vals
         χ = step_vals.crit_ref[]
@@ -303,9 +303,7 @@ function finalize_step_vals!(
         )
     end
     step_vals.crit_ref[] = _χ
-    @. step_vals.s = step_vals.n + d
-    @. step_vals.xs = step_vals.xn + d
-    @ignoraise objectives!(step_vals.fxs, mod, step_vals.xn)
+
     return nothing
 end
 
@@ -417,7 +415,7 @@ function solve_steepest_descent_problem(
         χ = -min(0, maximum(dir'Dfx))
 
     catch err
-        @error "Exception in Descent Step Computation." exception=(err, catch_backtrace())
+        @warn "Exception in Descent Step Computation." exception=err
         χ = 0
         dir = zero(xn)
     end
@@ -496,7 +494,7 @@ function backtrack!(
 
     if set_to_zero
         d .= 0
-        return 0
+        return χ
     end
 
     ## pre-compute RHS for Armijo test
@@ -549,7 +547,6 @@ function backtrack!(
 
     if set_to_zero
         d .= 0
-        return 0
     end
 
     return χ
@@ -649,7 +646,6 @@ function initial_steplength(
 
     T = eltype(xn)
     σ_min, σ_max = intersect_box(xn, d, lb_tr, ub_tr)
-
     if !isnothing(A)
         for (i, ai) = enumerate(eachrow(A))
             # Axn + σ * A*d .<= b
@@ -659,7 +655,8 @@ function initial_steplength(
     end
     for (i, dgi) = enumerate(eachcol(Dgx))
         # gx + Dgx_n + σ * Dgx * d .<= 0
-        σl, σr = intersect_bound(gx[i] + Dgx_n[i], LA.dot(dgi, d), 0, T)
+        #src #del σl, σr = intersect_bound(@show(gx[i] + Dgx_n[i]), LA.dot(dgi, d), 0, T)
+        σl, σr = intersect_bound(Dgx_n[i], LA.dot(dgi, d), 0, T)
         σ_min, σ_max = intersect_intervals(σ_min, σ_max, σl, σr, T)
     end
     return σ_min, σ_max

diff --git a/src/steps.jl b/src/steps.jl
@@ -12,7 +12,7 @@ function compute_normal_step!(
     log_level
 )
     ## set `step_vals.n` to hold normal step
-    nothing
+    return error("`compute_normal_step!` not yet implemented.")
 end
 
 function compute_descent_step!(
@@ -22,7 +22,7 @@ function compute_descent_step!(
 )
     ## set `step_vals.d` to hold descent step
     ## set `step_vals.crit_ref` to hold criticality measure
-    return nothing
+    return error("`compute_descent_step!` not yet implemented.")
 end
 
 function do_normal_step!(
@@ -39,6 +39,9 @@ function do_normal_step!(
             scaled_cons, vals, mod_vals; log_level
         )
         step_vals.xn .= cached_x(vals) .+ step_vals.n
+        project_into_box!(step_vals.xn, scaled_cons.lb, scaled_cons.ub)
+        step_vals.n .= step_vals.xn .- cached_x(vals)
+
         @logmsg log_level """
             $(pad_str) Found normal step $(pretty_row_vec(step_vals.n; cutoff=60)). 
             $(pad_str) \t Hence xn=$(pretty_row_vec(step_vals.xn; cutoff=60)).""" 
@@ -56,10 +59,6 @@ function finalize_step_vals!(
     log_level
 )
     ## find stepsize and scale `step_vals.d`
-    ## set `step_vals.s`, `step_vals.xs`, `step_vals.fxs`
-    @. step_vals.s = step_vals.n + step_vals.d
-    step_vals.xs .= cached_x(vals) .+ step_vals.s
-    @ignoraise objectives!(step_vals.fxs, mod, step_vals.xs)
     return nothing
 end
 
@@ -77,6 +76,16 @@ function do_descent_step!(
             step_cache, step_vals, Δ, mop, mod, scaler, lin_cons,
             scaled_cons, vals, mod_vals; log_level
         )
+
+        @. step_vals.s = step_vals.n + step_vals.d
+        @. step_vals.xs = step_vals.xn + step_vals.d
+
+        project_into_box!(step_vals.xs, scaled_cons.lb, scaled_cons.ub)
+        step_vals.s .= step_vals.xs .- cached_x(vals)
+        step_vals.d .= step_vals.s .- step_vals.n
+
+        @ignoraise objectives!(step_vals.fxs, mod, step_vals.xn)
+
     end
     return nothing
 end
diff --git a/src/trial.jl b/src/trial.jl
@@ -38,7 +38,8 @@ function test_trial_point!(
     delta = iteration_scalars.delta
     @unpack gamma_grow, gamma_shrink, gamma_shrink_much, delta_max = algo_opts
     delta_new, radius_update = _update_radius(
-        delta, 
+        algo_opts.trial_update,
+        delta, LA.norm(step_vals.s, Inf),
         iteration_classification, rho, rho_classification,
         gamma_grow, gamma_shrink, gamma_shrink_much, delta_max,
         nu_accept, nu_success
@@ -163,20 +164,20 @@ function _trial_analysis(
     fx, fxs, fx_mod, fxs_mod, diff_fx, diff_fx_mod,
     f_step_test_rhs
 )
-    return __trial_analysis(<=, Inf, fxs, fxs_mod, diff_fx, diff_fx_mod, f_step_test_rhs)
+    return __trial_analysis(<=, Inf, fx, fxs, fxs_mod, diff_fx, diff_fx_mod, f_step_test_rhs)
 end
 
 function _trial_analysis(
     mode::Val{:max_rho}, 
     fx, fxs, fx_mod, fxs_mod, diff_fx, diff_fx_mod,
     f_step_test_rhs
 )
-    return __trial_analysis(>=, -Inf, fxs, fxs_mod, diff_fx, diff_fx_mod, f_step_test_rhs)
+    return __trial_analysis(>=, -Inf, fx, fxs, fxs_mod, diff_fx, diff_fx_mod, f_step_test_rhs)
 end
 
 function __trial_analysis(
     comp_op, rho0, 
-    fxs, fxs_mod, diff_fx, diff_fx_mod,
+    fx, fxs, fxs_mod, diff_fx, diff_fx_mod,
     f_step_test_rhs
 )
     ## following suggestions from “Trust Region Methods” by Conn et. al.,
@@ -200,10 +201,9 @@ function __trial_analysis(
         else
             diff_func / diff_mod
         end
-        rho_i = fx_i / mx_i
         if comp_op(rho_i , rho)
             _i = i
-            rho = rhoi
+            rho = rho_i
         end
     end
 
@@ -238,9 +238,60 @@ function _log_radius_update(delta, delta_new, radius_update; indent, log_level)
     end
     return nothing
 end
+function _update_radius(
+    ::Val{:stepsize},
+    delta, len_s,
+    iteration_classification, rho, rho_classification,
+    gamma_grow, gamma_shrink, gamma_shrink_much, delta_max,
+    nu_accept, nu_success
+)
+    radius_update = nothing
+    if iteration_classification == IT_F_STEP 
+        #iteration_classification == IT_THETA_STEP
+        if rho_classification == RHO_ACCEPT
+            gamma_factor = gamma_shrink
+            if nu_success > nu_accept
+                rho_frac = (rho - nu_accept) / (nu_success - nu_accept)
+                gamma_factor += rho_frac * (1 - gamma_shrink)
+            end
+            delta_new = max(
+                gamma_factor * delta,
+                len_s
+            )
+            radius_update = RADIUS_SHRINK
+        elseif rho_classification == RHO_SUCCESS
+            delta_new = min(
+                delta_max,
+                max(
+                    gamma_grow * len_s,
+                    (1 + (1 - gamma_grow)/2) * delta
+                )
+            )
+            if delta_new > delta
+                radius_update = RADIUS_GROW
+            else
+                radius_update = RADIUS_GROW_FAIL
+            end
+        end
+    elseif iteration_classification == IT_THETA_STEP
+        delta_new = delta
+        radius_update = RADIUS_NO_CHANGE
+    end
+
+    if isnothing(radius_update)
+        if rho >= 0
+            delta_new = len_s * gamma_shrink
+        else
+            delta_new = len_s * gamma_shrink_much
+        end 
+        radius_update = RADIUS_SHRINK
+    end
 
+    return delta_new, radius_update
+end
 function _update_radius(
-    delta,
+    ::Val{:classic},
+    delta, len_s,
     iteration_classification, rho, rho_classification,
     gamma_grow, gamma_shrink, gamma_shrink_much, delta_max,
     nu_accept, nu_success