From 905424f1786277cf19f7d81b3de847f7b107d04b Mon Sep 17 00:00:00 2001 From: Johan Larsson Date: Mon, 11 Dec 2023 12:20:36 +0100 Subject: [PATCH] fix: update to libslope 0.3.0 --- src/main.cpp | 33 +-- src/slope/cd.h | 28 +- src/slope/fit_slope.h | 278 ------------------ src/slope/parameters.h | 92 ------ src/slope/pgd.h | 21 +- src/slope/results.h | 30 -- src/slope/slope.cpp | 142 +++++++++- src/slope/slope.h | 581 ++++++++++++++++++++++++++++++++++---- src/slope/standardize.cpp | 7 +- src/slope/standardize.h | 9 +- 10 files changed, 719 insertions(+), 502 deletions(-) delete mode 100644 src/slope/fit_slope.h delete mode 100644 src/slope/parameters.h delete mode 100644 src/slope/results.h diff --git a/src/main.cpp b/src/main.cpp index c9b52a9..1e7fffe 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,4 +1,3 @@ -#include "slope/parameters.h" #include "slope/slope.h" #include #include @@ -18,24 +17,26 @@ fit_slope(const T& x, const Eigen::ArrayXd& alpha, const py::dict& args) { - slope::SlopeParameters params; + slope::Slope model; - params.intercept = args["intercept"].cast(); - params.standardize = args["standardize"].cast(); - params.update_clusters = args["update_clusters"].cast(); - params.alpha_min_ratio = args["alpha_min_ratio"].cast(); - params.objective = args["objective"].cast(); - params.path_length = args["path_length"].cast(); - params.pgd_freq = args["pgd_freq"].cast(); - params.tol = args["tol"].cast(); - params.max_it = args["max_it"].cast(); - params.max_it_outer = args["max_it_outer"].cast(); - params.print_level = args["print_level"].cast(); + model.setIntercept(args["intercept"].cast()); + model.setStandardize(args["standardize"].cast()); + model.setUpdateClusters(args["update_clusters"].cast()); + model.setAlphaMinRatio(args["alpha_min_ratio"].cast()); + model.setObjective(args["objective"].cast()); + model.setPathLength(args["path_length"].cast()); + model.setPgdFreq(args["pgd_freq"].cast()); + model.setTol(args["tol"].cast()); + model.setMaxIt(args["max_it"].cast()); + model.setMaxItOuter(args["max_it_outer"].cast()); + model.setPrintLevel(args["print_level"].cast()); - auto result = slope::slope(x, y, alpha, lambda, params); + model.fit(x, y, alpha, lambda); - return py::make_tuple( - result.beta0s, result.betas, result.lambda, result.alpha); + return py::make_tuple(model.getIntercepts(), + model.getCoefs(), + model.getLambda(), + model.getAlpha()); } pybind11::tuple diff --git a/src/slope/cd.h b/src/slope/cd.h index 8628830..9340cc5 100644 --- a/src/slope/cd.h +++ b/src/slope/cd.h @@ -7,7 +7,6 @@ #pragma once #include "clusters.h" -#include "parameters.h" #include "slope_threshold.h" #include "sorted_l1_norm.h" #include @@ -26,14 +25,22 @@ namespace slope { * @param beta0 The intercept * @param beta The coefficients * @param residual The residual vector - * @param clusters The cluster information + * @param clusters The cluster information, stored in a Cluster object. * @param x The design matrix * @param w The weight vector * @param z The response vector * @param sl1_norm The sorted L1 norm object * @param x_centers The center values of the data matrix columns * @param x_scales The scale values of the data matrix columns - * @param params The SLOPE parameters + * @param intercept Shuold an intervept be fit? + * @param standardize Flag indicating whether to standardize the data matrix + * columns + * @param update_clusters Flag indicating whether to update the cluster + * information + * @param print_level The level of verbosity for printing debug information + * + * @see Clusters + * @see SortedL1Norm */ template void @@ -47,7 +54,10 @@ coordinateDescent(double& beta0, const SortedL1Norm& sl1_norm, const Eigen::VectorXd& x_centers, const Eigen::VectorXd& x_scales, - const SlopeParameters& params) + const bool intercept, + const bool standardize, + const bool update_clusters, + const int print_level) { using namespace Eigen; @@ -75,7 +85,7 @@ coordinateDescent(double& beta0, double s_k = sign(beta(k)); s.emplace_back(s_k); - if (params.standardize) { + if (standardize) { gradient_j = -s_k * (x.col(k).cwiseProduct(w).dot(residual) - w.dot(residual) * x_centers(k)) / @@ -102,7 +112,7 @@ coordinateDescent(double& beta0, double s_k = sign(beta(k)); s.emplace_back(s_k); - if (params.standardize) { + if (standardize) { x_s += x.col(k) * (s_k / x_scales(k)); x_s.array() -= x_centers(k) * s_k / x_scales(k); } else { @@ -132,7 +142,7 @@ coordinateDescent(double& beta0, if (cluster_size == 1) { int k = *clusters.cbegin(j); - if (params.standardize) { + if (standardize) { residual += x.col(k) * (s[0] * c_diff / x_scales(k)); residual.array() -= x_centers(k) * s[0] * c_diff / x_scales(k); } else { @@ -143,13 +153,13 @@ coordinateDescent(double& beta0, } } - if (params.update_clusters) { + if (update_clusters) { clusters.update(j, new_index, std::abs(c_tilde)); } else { clusters.setCoeff(j, std::abs(c_tilde)); } - if (params.intercept) { + if (intercept) { double beta0_update = residual.dot(w) / w.sum(); residual.array() -= beta0_update; beta0 += beta0_update; diff --git a/src/slope/fit_slope.h b/src/slope/fit_slope.h deleted file mode 100644 index 409adbe..0000000 --- a/src/slope/fit_slope.h +++ /dev/null @@ -1,278 +0,0 @@ -/** - * @file - * @brief The actual function that fits SLOPE - */ - -#pragma once - -#include "cd.h" -#include "clusters.h" -#include "helpers.h" -#include "math.h" -#include "objectives.h" -#include "parameters.h" -#include "pgd.h" -#include "regularization_sequence.h" -#include "results.h" -#include "sorted_l1_norm.h" -#include "standardize.h" -#include -#include -#include -#include -#include -#include - -namespace slope { - -/** - * Calculates the slope coefficients for a linear regression model using the - * SortedL1Norm regularization. - * - * @param x The dense input matrix of size n x p, where n is the number of - * observations and p is the number of predictors. - * @param y The response matrix of size n x 1. - * @param alpha The regularization parameter sequence. If not provided, it will - * be generated automatically. - * @param lambda The regularization parameter for the SortedL1Norm - * regularization. If not provided, it will be set to zero. - * @param params A struct containing the parameters for the calculation. See - * SlopeParameters. - * @return The slope coefficients, intercept values, and primal values for each - * step in the regularization path. - * @see SlopeParameters - */ -template -Results -fitSlope(const T& x, - const Eigen::MatrixXd& y, - Eigen::ArrayXd alpha = Eigen::ArrayXd::Zero(0), - Eigen::ArrayXd lambda = Eigen::ArrayXd::Zero(0), - const SlopeParameters& params = SlopeParameters()) -{ - using Eigen::VectorXd; - - const int n = x.rows(); - const int p = x.cols(); - - auto [x_centers, x_scales] = standardize(x, params.standardize); - - std::unique_ptr objective = setupObjective(params.objective); - - // initialize coeficients - double beta0 = 0.0; - VectorXd beta = VectorXd::Zero(p); - - VectorXd eta = x * beta; - eta.array() += beta0; - - VectorXd w(n); // weights - VectorXd z(n); // working response - - objective->updateWeightsAndWorkingResponse(w, z, eta, y); - - VectorXd residual = z - eta; - - // Setup the regularization sequence and path - SortedL1Norm sl1_norm{ lambda }; - - if (lambda.size() == 0) { - lambda = lambdaSequence(p, params.q, params.lambda_type); - } else { - if (lambda.size() != p) { - throw std::invalid_argument( - "lambda must be the same length as the number of predictors"); - } - if (lambda.minCoeff() < 0) { - throw std::invalid_argument("lambda must be non-negative"); - } - } - - int path_length = params.path_length; - - if (alpha.size() == 0) { - alpha = regularizationPath(x, - w, - z, - x_centers, - x_scales, - sl1_norm, - params.path_length, - params.alpha_min_ratio, - params.intercept, - params.standardize); - } else { - path_length = alpha.size(); - } - - VectorXd beta0s(path_length); - std::vector> beta_triplets; - - std::vector primals; - std::vector dual_gaps; - std::vector> primals_path; - std::vector> dual_gaps_path; - - double learning_rate = 1.0; - - VectorXd beta_old_outer = beta; - - Gaussian subprob_objective; - - Clusters clusters(beta); - - int it_total = 0; - - // Regularization path loop - for (int path_step = 0; path_step < path_length; ++path_step) { - if (params.print_level > 0) { - std::cout << "Path step: " << path_step << ", alpha: " << alpha(path_step) - << std::endl; - } - - sl1_norm.setAlpha(alpha(path_step)); - - // IRLS loop - for (int it_outer = 0; it_outer < params.max_it_outer; ++it_outer) { - // The residual is kept up to date, but not eta. So we need to compute it - // here. - eta = z - residual; - - // Compute primal, dual, and gap - double primal = objective->loss(eta, y) + sl1_norm.eval(beta); - primals.emplace_back(primal); - - VectorXd gen_residual = objective->residual(eta, y); - - VectorXd gradient = computeGradient( - x, gen_residual, x_centers, x_scales, params.standardize); - VectorXd theta = gen_residual; - theta.array() /= std::max(1.0, sl1_norm.dualNorm(gradient)); - double dual = objective->dual(theta, y); - - double dual_gap = primal - dual; - - dual_gaps.emplace_back(dual_gap); - - double tol = std::abs(primal) * params.tol; - - if (params.print_level > 1) { - std::cout << indent(1) << "IRLS iteration: " << it_outer << std::endl - << indent(2) << "primal (main problem): " << primal - << std::endl - << indent(2) << "duality gap (main problem): " << dual_gap - << ", tol: " << tol << std::endl; - } - - if (std::max(dual_gap, 0.0) <= tol) { - break; - } - - // Update weights and working response - beta_old_outer = beta; - - objective->updateWeightsAndWorkingResponse(w, z, eta, y); - residual = z - eta; - - if (params.print_level > 3) { - printContents(w, " weights"); - printContents(z, " working response"); - } - - for (int it = 0; it < params.max_it; ++it) { - if (it % params.pgd_freq == 0) { - double g = (0.5 / n) * residual.cwiseAbs2().dot(w); - double h = sl1_norm.eval(beta); - double primal_inner = g + h; - - VectorXd gradient = computeGradient( - x, residual, x_centers, x_scales, params.standardize); - - // Obtain a feasible dual point by dual scaling - theta = residual; - theta.array() /= std::max(1.0, sl1_norm.dualNorm(gradient)); - double dual_inner = subprob_objective.dual(theta, z); - - double dual_gap_inner = primal_inner - dual_inner; - - double tol_inner = std::abs(primal_inner) * params.tol; - - if (params.print_level > 2) { - std::cout << indent(2) << "iteration: " << it << std::endl - << indent(3) << "primal (inner): " << primal_inner - << std::endl - << indent(3) << "duality gap (inner): " << dual_gap_inner - << ", tol: " << tol_inner << std::endl; - } - - if (std::max(dual_gap_inner, 0.0) <= tol_inner) { - break; - } - - VectorXd beta_old = beta; - - if (params.print_level > 2) { - std::cout << indent(3) << "Running PGD step" << std::endl; - } - - proximalGradientDescent(beta0, - beta, - residual, - learning_rate, - gradient, - x, - w, - z, - sl1_norm, - x_centers, - x_scales, - g, - params); - - clusters.update(beta); - } else { - if (params.print_level > 2) { - std::cout << indent(3) << "Running CD step" << std::endl; - } - - coordinateDescent(beta0, - beta, - residual, - clusters, - x, - w, - z, - sl1_norm, - x_centers, - x_scales, - params); - } - } - it_total++; - } - - // Store everything for this step of the path - auto [beta0_out, beta_out] = - rescaleCoefficients(beta0, beta, x_centers, x_scales, params); - - beta0s(path_step) = std::move(beta0_out); - - for (int j = 0; j < p; ++j) { - if (beta_out(j) != 0) { - beta_triplets.emplace_back(j, path_step, beta_out(j)); - } - } - - primals_path.emplace_back(primals); - dual_gaps_path.emplace_back(dual_gaps); - } - - Eigen::SparseMatrix betas(p, path_length); - betas.setFromTriplets(beta_triplets.begin(), beta_triplets.end()); - - return { - beta0s, betas, alpha, lambda, primals_path, dual_gaps_path, it_total - }; -} - -} // namespace slope diff --git a/src/slope/parameters.h b/src/slope/parameters.h deleted file mode 100644 index 3ceb1aa..0000000 --- a/src/slope/parameters.h +++ /dev/null @@ -1,92 +0,0 @@ -/** - * @file - * @brief The declaration of the SlopeParameters struct - */ - -#pragma once - -#include - -namespace slope { - -/** - * @struct SlopeParameters - * @brief A struct that holds the parameters for the Slope algorithm. - * - * This struct holds the parameters that can be used to configure the behavior - * of the Slope algorithm. The Slope algorithm is used for regression and - * variable selection. - * - * @var SlopeParameters::intercept - * A boolean indicating whether an intercept term should be included in the - * model. - * - * @var SlopeParameters::standardize - * A boolean indicating whether the input data should be standardized before - * fitting the model. - * - * @var SlopeParameters::update_clusters - * A boolean indicating whether the cluster assignments should be updated during - * the optimization process. - * - * @var SlopeParameters::alpha_min_ratio - * A double representing the minimum value of the regularization parameter - * alpha. If -1, the value will be set to 1e-4 if n > p and 1e-2 otherwise. - * - * @var SlopeParameters::learning_rate_decr - * Sets the learning rate decrement for the line search in the proximal gradient - * descent step. - * - * @var SlopeParameters::q - * A double representing the quantile level for the L1 penalty. - * - * @var SlopeParameters::tol - * A double representing the convergence tolerance for the optimization - * algorithm. - * - * @var SlopeParameters::max_it - * An integer representing the maximum number of iterations for the optimization - * algorithm. - * - * @var SlopeParameters::max_it_outer - * An integer representing the maximum number of outer iterations for the - * optimization algorithm. - * - * @var SlopeParameters::path_length - * An integer representing the number of points on the regularization path. - * - * @var SlopeParameters::pgd_freq - * An integer representing the frequency of the proximal gradient descent - * updates. - * - * @var SlopeParameters::print_level - * An integer representing the level of verbosity for the optimization - * algorithm. - * - * @var SlopeParameters::lambda_type - * A string representing the type of regularization penalty to be used. - * Currently only "bh" (Benjamini-Hochberg) is supported. - * - * @var SlopeParameters::objective - * A string representing the choice of objective function for the optimization - * algorithm. Currently only "gaussian" is supported. - */ -struct SlopeParameters -{ - bool intercept = true; - bool standardize = true; - bool update_clusters = false; - double alpha_min_ratio = -1; - double learning_rate_decr = 0.5; - double q = 0.1; - double tol = 1e-8; - int max_it = 1e6; - int max_it_outer = 100; - int path_length = 100; - int pgd_freq = 10; - int print_level = 0; - std::string lambda_type = "bh"; - std::string objective = "gaussian"; -}; - -} // namespace slope diff --git a/src/slope/pgd.h b/src/slope/pgd.h index e347f5f..efca1d0 100644 --- a/src/slope/pgd.h +++ b/src/slope/pgd.h @@ -7,7 +7,6 @@ #include "helpers.h" #include "math.h" -#include "parameters.h" #include "sorted_l1_norm.h" #include #include @@ -35,7 +34,12 @@ namespace slope { * @param x_centers The center values of the input data. * @param x_scales The scale values of the input data. * @param g_old The previous value of the objective function. - * @param params The slope parameters. + * @param intercept Flag indicating whether to include an intercept term. + * @param standardize Flag indicating whether to standardize the input data. + * @param learning_rate_decr The learning rate decrement factor. + * @param print_level The level of verbosity for printing progress. + * + * @see SortedL1Norm */ template void @@ -51,13 +55,16 @@ proximalGradientDescent(double& beta0, const Eigen::VectorXd& x_centers, const Eigen::VectorXd& x_scales, const double g_old, - const SlopeParameters& params) + const bool intercept, + const bool standardize, + const double learning_rate_decr, + const int print_level) { const int n = x.rows(); const int p = x.cols(); // Proximal gradient descent with line search - if (params.print_level > 2) { + if (print_level > 2) { std::cout << " Starting line search" << std::endl; } @@ -68,14 +75,14 @@ proximalGradientDescent(double& beta0, Eigen::VectorXd beta_diff = beta - beta_old; - if (params.standardize) { + if (standardize) { residual = z - x * beta.cwiseQuotient(x_scales); residual.array() += x_centers.cwiseQuotient(x_scales).dot(beta); } else { residual = z - x * beta; } - if (params.intercept) { + if (intercept) { beta0 = residual.dot(w) / w.sum(); residual.array() -= beta0; } @@ -87,7 +94,7 @@ proximalGradientDescent(double& beta0, if (q >= g * (1 - 1e-12)) { break; } else { - learning_rate *= params.learning_rate_decr; + learning_rate *= learning_rate_decr; } } } diff --git a/src/slope/results.h b/src/slope/results.h deleted file mode 100644 index 48d82f5..0000000 --- a/src/slope/results.h +++ /dev/null @@ -1,30 +0,0 @@ -/** - * @file - * @brief The declaration of the Results struct. - */ - -#pragma once - -#include -#include - -namespace slope { - -/** - * @brief Struct to store the results of a calculation. - */ -struct Results -{ - const Eigen::VectorXd beta0s; /**< Intercept values for each calculation. */ - const Eigen::SparseMatrix - betas; /**< Coefficient matrix for each calculation. */ - const Eigen::ArrayXd alpha; /**< Array of alpha values. */ - const Eigen::ArrayXd lambda; /**< Array of lambda values. */ - const std::vector> - primals; /**< Vector of primal values for each calculation. */ - const std::vector> - dual_gaps; /**< Vector of dual gap values for each calculation. */ - const int it_total; /**< Total number of iterations. */ -}; - -} // namespace slope diff --git a/src/slope/slope.cpp b/src/slope/slope.cpp index 44d2972..23c3e96 100644 --- a/src/slope/slope.cpp +++ b/src/slope/slope.cpp @@ -1,25 +1,137 @@ -#include "fit_slope.h" +#include "slope.h" +#include +#include namespace slope { -Results -slope(const Eigen::MatrixXd& x, - const Eigen::MatrixXd& y, - Eigen::ArrayXd alpha = Eigen::ArrayXd::Zero(0), - Eigen::ArrayXd lambda = Eigen::ArrayXd::Zero(0), - const SlopeParameters& params = SlopeParameters()) +void +Slope::fit(const Eigen::MatrixXd& x, + const Eigen::MatrixXd& y, + const Eigen::ArrayXd& alpha, + const Eigen::ArrayXd& lambda) { - return fitSlope(x, y, alpha, lambda, params); + return fitImpl(x, y, alpha, lambda); } -Results -slope(const Eigen::SparseMatrix& x, - const Eigen::MatrixXd& y, - Eigen::ArrayXd alpha = Eigen::ArrayXd::Zero(0), - Eigen::ArrayXd lambda = Eigen::ArrayXd::Zero(0), - const SlopeParameters& params = SlopeParameters()) +void +Slope::fit(const Eigen::SparseMatrix& x, + const Eigen::MatrixXd& y, + const Eigen::ArrayXd& alpha, + const Eigen::ArrayXd& lambda) { - return fitSlope(x, y, alpha, lambda, params); + return fitImpl(x, y, alpha, lambda); } +void +Slope::setIntercept(bool intercept) +{ + this->intercept = intercept; +} +void +Slope::setStandardize(bool standardize) +{ + this->standardize = standardize; +} +void +Slope::setUpdateClusters(bool update_clusters) +{ + this->update_clusters = update_clusters; +} +void +Slope::setAlphaMinRatio(double alpha_min_ratio) +{ + this->alpha_min_ratio = alpha_min_ratio; +} +void +Slope::setLearningRateDecr(double learning_rate_decr) +{ + this->learning_rate_decr = learning_rate_decr; +} +void +Slope::setQ(double q) +{ + this->q = q; +} +void +Slope::setTol(double tol) +{ + this->tol = tol; +} +void +Slope::setMaxIt(int max_it) +{ + this->max_it = max_it; +} +void +Slope::setMaxItOuter(int maxItOuter) +{ + this->max_it_outer = maxItOuter; +} +void +Slope::setPathLength(int path_length) +{ + this->path_length = path_length; +} +void +Slope::setPgdFreq(int pgd_freq) +{ + this->pgd_freq = pgd_freq; +} +void +Slope::setPrintLevel(int print_level) +{ + this->print_level = print_level; +} +void +Slope::setLambdaType(const std::string& lambda_type) +{ + this->lambda_type = lambda_type; +} +void +Slope::setObjective(const std::string& objective) +{ + this->objective = objective; +} + +const Eigen::ArrayXd& +Slope::getAlpha() const +{ + return alpha_out; +} +const Eigen::ArrayXd& +Slope::getLambda() const +{ + return lambda_out; +} + +const Eigen::SparseMatrix& +Slope::getCoefs() const +{ + return betas; +} + +const Eigen::VectorXd& +Slope::getIntercepts() const +{ + return beta0s; +} + +int +Slope::getTotalIterations() const +{ + return it_total; +} + +const std::vector>& +Slope::getDualGaps() const +{ + return dual_gaps_path; +} + +const std::vector>& +Slope::getPrimals() const +{ + return primals_path; } + +} // namespace slope diff --git a/src/slope/slope.h b/src/slope/slope.h index d3fb588..5a70d6d 100644 --- a/src/slope/slope.h +++ b/src/slope/slope.h @@ -1,62 +1,547 @@ /** * @file - * @brief The declaration of the main functions in the slope package + * @brief The actual function that fits SLOPE */ -#include "parameters.h" -#include "results.h" +#pragma once + +#include "cd.h" +#include "clusters.h" +#include "helpers.h" +#include "math.h" +#include "objectives.h" +#include "pgd.h" +#include "regularization_sequence.h" +#include "sorted_l1_norm.h" +#include "standardize.h" +#include #include +#include +#include +#include +#include namespace slope { /** - * Calculates the slope coefficients for a linear regression model using the - * SortedL1Norm regularization. + * Abstract class representing an objective function. * - * @param x The dense input matrix of size n x p, where n is the number of - * observations and p is the number of predictors. - * @param y The response matrix of size n x 1. - * @param alpha The regularization parameter sequence. If not provided, it will - * be generated automatically. - * @param lambda The regularization parameter for the SortedL1Norm - * regularization. If not provided, it will be set to zero. - * @param params A struct containing the parameters for the calculation. See - * SlopeParameters. - * @return The slope coefficients, intercept values, and primal values for each - * step in the regularization path. - * @see SlopeParameters - * @ingroup estimators + * This class defines the interface for an objective function, which is used in + * optimization algorithms. The objective function calculates the loss, dual, + * residual, and updates the weights and working response. */ -Results -slope(const Eigen::MatrixXd& x, - const Eigen::MatrixXd& y, - Eigen::ArrayXd alpha = Eigen::ArrayXd::Zero(0), - Eigen::ArrayXd lambda = Eigen::ArrayXd::Zero(0), - const SlopeParameters& params = SlopeParameters()); +class Slope +{ +public: + /** + * Default constructor for the Slope class. + * + * Initializes the Slope object with default parameter values. + */ + Slope() + : intercept(true) + , standardize(true) + , update_clusters(false) + , alpha_min_ratio(-1) + , learning_rate_decr(0.5) + , q(0.1) + , tol(1e-4) + , max_it(1e6) + , max_it_outer(30) + , path_length(100) + , pgd_freq(10) + , print_level(0) + , lambda_type("bh") + , objective("gaussian") + { + } -/** - * Calculates the slope coefficients for a linear regression model using the - * SortedL1Norm regularization. - * - * @param x Sparse input matrix of size n x p, where n is the number of - * observations and p is the number of predictors. - * @param y The response matrix of size n x 1. - * @param alpha The regularization parameter sequence. If not provided, it will - * be generated automatically. - * @param lambda The regularization parameter for the SortedL1Norm - * regularization. If not provided, it will be set to zero. - * @param params A struct containing the parameters for the calculation. See - * SlopeParamters. - * @return The slope coefficients, intercept values, and primal values for each - * step in the regularization path. - * @see SlopeParameters - * @ingroup estimators - */ -Results -slope(const Eigen::SparseMatrix& x, - const Eigen::MatrixXd& y, - Eigen::ArrayXd alpha = Eigen::ArrayXd::Zero(0), - Eigen::ArrayXd lambda = Eigen::ArrayXd::Zero(0), - const SlopeParameters& params = SlopeParameters()); + /** + * Sorted L-One Penalized Estimation. + * + * This function fits a linear regression model to the given data using the + * ordinary least squares method. + * + * @param x The design matrix, where each column represents a feature. + * @param y The response matrix. Each row represents an observation. + * @param alpha Sequence of multipliers for the sorted l1 norm along the + * regularization path. + * @param lambda The regularization parameter for the sorted l1 norm, which is + * a positive nonincreasing array of weights. + */ + void fit(const Eigen::MatrixXd& x, + const Eigen::MatrixXd& y, + const Eigen::ArrayXd& alpha = Eigen::ArrayXd::Zero(0), + const Eigen::ArrayXd& lambda = Eigen::ArrayXd::Zero(0)); + + /** + * Sorted L-One Penalized Estimation. + * + * This function fits a linear regression model to the given data using the + * ordinary least squares method. + * + * @param x The design matrix, where each column represents a feature. + * @param y The response matrix. Each row represents an observation. + * @param alpha Sequence of multipliers for the sorted l1 norm along the + * regularization path. + * @param lambda The regularization parameter for the sorted l1 norm, which is + * a positive nonincreasing array of weights. + */ + void fit(const Eigen::SparseMatrix& x, + const Eigen::MatrixXd& y, + const Eigen::ArrayXd& alpha = Eigen::ArrayXd::Zero(0), + const Eigen::ArrayXd& lambda = Eigen::ArrayXd::Zero(0)); + + /** + * @brief Sets the intercept flag. + * + * @param intercept Should an intercept be fitted? + */ + void setIntercept(bool intercept); + + /** + * @brief Sets the standardize flag. + * + * @param standardize Should the design matrix be standardized? + */ + void setStandardize(bool standardize); + + /** + * @brief Sets the update clusters flag. + * + * @param update_clusters Selects whether the coordinate descent keeps the + * clusters updated. + */ + void setUpdateClusters(bool update_clusters); + + /** + * @brief Sets the alpha min ratio. + * + * @param alpha_min_ratio The value to set for the alpha min ratio. A negative + * value means that the program automatically chooses 1e-4 if the number of + * observations is larger than the number of features and 1e-2 otherwise. + */ + void setAlphaMinRatio(double alpha_min_ratio); + + /** + * @brief Sets the learning rate decrement. + * + * @param learning_rate_decr The value to set for the learning rate decrement + * for the proximal gradient descent step. + */ + void setLearningRateDecr(double learning_rate_decr); + + /** + * @brief Sets the q value. + * + * @param q The value to set for the q value for use in automatically + * generating the lambda sequence. values between 0 and 1 are allowed.. + */ + void setQ(double q); + + /** + * @brief Sets the tolerance value. + * + * @param tol The value to set for the tolerance value. Must be positive. + */ + void setTol(double tol); + + /** + * @brief Sets the maximum number of iterations. + * + * @param max_it The value to set for the maximum number of iterations. Must + * be positive. + */ + void setMaxIt(int max_it); + + /** + * @brief Sets the maximum number of outer iterations. + * + * @param max_it_outer The value to set for the maximum number of outer + * iterations for the iterative repeated least-squares step. Must be positive. + * Has no real effect when the objective is Gaussian. + */ + void setMaxItOuter(int max_it_outer); + + /** + * @brief Sets the path length. + * + * @param path_length The value to set for the path length. + */ + void setPathLength(int path_length); + + /** + * @brief Sets the frequence of proximal gradient descent steps. + * + * @param pgd_freq The frequency of the proximal gradient descent steps (or + * the inverse of that actually). A value of 1 means that the algorithm only + * runs proximal gradient descent steps. + */ + void setPgdFreq(int pgd_freq); + + /** + * @brief Sets the print level. + * + * @param print_level The value to set for the print level. A print level of 1 + * prints values from the outer loop, a level of 2 from the inner loop, and a + * level of 3 some extra debugging information. A level of 0 means no + * printing. + */ + void setPrintLevel(int print_level); + + /** + * @brief Sets the lambda type. + * + * @param lambda_type The value to set for the lambda type. Only "bh" is + * allowed at the moment, which computed the Benjamini-Hochberg sequence. + * @see setQ + */ + void setLambdaType(const std::string& lambda_type); + + /** + * @brief Sets the objective. + * + * @param objective The value to set for the objective. Only "gaussian" is + * allowed at the moment, which results in the standard SLOPE. + */ + void setObjective(const std::string& objective); + + /** + * @brief Get the alpha sequence. + * + * @return The sequence of weights for the regularization path. + */ + const Eigen::ArrayXd& getAlpha() const; + + /** + * @brief Get the lambda sequence. + * + * @return The sequence of lambda values for the weights of the sorted L1 + * norm. + */ + const Eigen::ArrayXd& getLambda() const; + + /** + * Get the coefficients from the path. + * + * @return The coefficients from the path, stored in a sparse matrix. + */ + const Eigen::SparseMatrix& getCoefs() const; + + /** + * Get the intercepts from the path. + * + * @return The coefficients from the path, stored in an Eigen vector. If no + * intercepts were fit, this is a vector of zeros. + */ + const Eigen::VectorXd& getIntercepts() const; + + /** + * Get the total number of (inner) iterations. + * + * @return The toral number of iterations from the inner loop, computed across + * the path. + */ + int getTotalIterations() const; + + /** + * Get the duality gaps. + * + * @return Get the duality gaps from the path. + */ + const std::vector>& getDualGaps() const; + + /** + * Get the primal objective values. + * + * @return Get the primal objective values from the path. + */ + const std::vector>& getPrimals() const; + +private: + /** + * Calculates the slope coefficients for a linear regression model using the + * SortedL1Norm regularization. + * + * @param x The dense input matrix of size n x p, where n is the number of + * observations and p is the number of predictors. + * @param y The response matrix of size n x 1. + * @param alpha The regularization parameter sequence. If not provided, it + * will be generated automatically. + * @param lambda The regularization parameter for the SortedL1Norm + * regularization. If not provided, it will be set to zero. + * @see SlopeParameters + */ + template + void fitImpl(const T& x, + const Eigen::VectorXd& y, + Eigen::ArrayXd alpha, + Eigen::ArrayXd lambda) + { + using Eigen::VectorXd; + + const int n = x.rows(); + const int p = x.cols(); + + if (n != y.rows()) { + throw std::invalid_argument("x and y must have the same number of rows"); + } + + auto [x_centers, x_scales] = computeCentersAndScales(x, this->standardize); + + std::unique_ptr objective = setupObjective(this->objective); + + beta0 = 0.0; + beta.resize(p); + beta.setZero(); + VectorXd eta = VectorXd::Zero(n); // linear predictor + VectorXd w = VectorXd::Ones(n); // weights + VectorXd z = y; // working response + + objective->updateWeightsAndWorkingResponse(w, z, eta, y); + + VectorXd residual = z; + + if (lambda.size() == 0) { + lambda = lambdaSequence(p, this->q, this->lambda_type); + } else { + if (lambda.size() != p) { + throw std::invalid_argument( + "lambda must be the same length as the number of predictors"); + } + if (lambda.minCoeff() < 0) { + throw std::invalid_argument("lambda must be non-negative"); + } + if (!lambda.isFinite().all()) { + throw std::invalid_argument("lambda must be finite"); + } + } + + // Setup the regularization sequence and path + SortedL1Norm sl1_norm{ lambda }; + + if (alpha.size() == 0) { + alpha = regularizationPath(x, + w, + z, + x_centers, + x_scales, + sl1_norm, + this->path_length, + this->alpha_min_ratio, + this->intercept, + this->standardize); + } else { + if (alpha.minCoeff() < 0) { + throw std::invalid_argument("alpha must be non-negative"); + } + if (!alpha.isFinite().all()) { + throw std::invalid_argument("alpha must be finite"); + } + path_length = alpha.size(); + } + + int path_length = this->path_length; + + std::vector> beta_triplets; + std::vector dual_gaps; + std::vector primals; + + beta0s.resize(path_length); + + double learning_rate = 1.0; + + VectorXd beta_old_outer = beta; + + Gaussian subprob_objective; + + Clusters clusters(beta); + + this->it_total = 0; + + // Regularization path loop + for (int path_step = 0; path_step < path_length; ++path_step) { + if (this->print_level > 0) { + std::cout << "Path step: " << path_step + << ", alpha: " << alpha(path_step) << std::endl; + } + + sl1_norm.setAlpha(alpha(path_step)); + + // IRLS loop + for (int it_outer = 0; it_outer < this->max_it_outer; ++it_outer) { + // The residual is kept up to date, but not eta. So we need to compute + // it here. + eta = z - residual; + + // Compute primal, dual, and gap + double primal = objective->loss(eta, y) + sl1_norm.eval(beta); + primals.emplace_back(primal); + + VectorXd gen_residual = objective->residual(eta, y); + + VectorXd gradient = computeGradient( + x, gen_residual, x_centers, x_scales, this->standardize); + VectorXd theta = gen_residual; + theta.array() /= std::max(1.0, sl1_norm.dualNorm(gradient)); + double dual = objective->dual(theta, y); + + double dual_gap = primal - dual; + + dual_gaps.emplace_back(dual_gap); + + double tol_scaled = std::abs(primal) * this->tol; + + if (this->print_level > 1) { + std::cout << indent(1) << "IRLS iteration: " << it_outer << std::endl + << indent(2) << "primal (main problem): " << primal + << std::endl + << indent(2) << "duality gap (main problem): " << dual_gap + << ", tol: " << tol_scaled << std::endl; + } + + if (std::max(dual_gap, 0.0) <= tol_scaled) { + break; + } + + // Update weights and working response + beta_old_outer = beta; + + objective->updateWeightsAndWorkingResponse(w, z, eta, y); + residual = z - eta; + + if (this->print_level > 3) { + printContents(w, " weights"); + printContents(z, " working response"); + } + + for (int it = 0; it < this->max_it; ++it) { + if (it % this->pgd_freq == 0) { + double g = (0.5 / n) * residual.cwiseAbs2().dot(w); + double h = sl1_norm.eval(beta); + double primal_inner = g + h; + + VectorXd gradient = computeGradient( + x, residual, x_centers, x_scales, this->standardize); + + // Obtain a feasible dual point by dual scaling + theta = residual; + theta.array() /= std::max(1.0, sl1_norm.dualNorm(gradient)); + double dual_inner = subprob_objective.dual(theta, z); + + double dual_gap_inner = primal_inner - dual_inner; + + double tol_inner = std::abs(primal_inner) * this->tol; + + if (this->print_level > 2) { + std::cout << indent(2) << "iteration: " << it << std::endl + << indent(3) << "primal (inner): " << primal_inner + << std::endl + << indent(3) + << "duality gap (inner): " << dual_gap_inner + << ", tol: " << tol_inner << std::endl; + } + + if (std::max(dual_gap_inner, 0.0) <= tol_inner) { + break; + } + + VectorXd beta_old = beta; + + if (this->print_level > 2) { + std::cout << indent(3) << "Running PGD step" << std::endl; + } + + proximalGradientDescent(beta0, + beta, + residual, + learning_rate, + gradient, + x, + w, + z, + sl1_norm, + x_centers, + x_scales, + g, + intercept, + standardize, + learning_rate_decr, + print_level); + + clusters.update(beta); + } else { + if (this->print_level > 2) { + std::cout << indent(3) << "Running CD step" << std::endl; + } + + coordinateDescent(beta0, + beta, + residual, + clusters, + x, + w, + z, + sl1_norm, + x_centers, + x_scales, + this->intercept, + this->standardize, + this->update_clusters, + this->print_level); + } + } + it_total++; + } + + // Store everything for this step of the path + auto [beta0_out, beta_out] = rescaleCoefficients( + beta0, beta, x_centers, x_scales, intercept, standardize); + + beta0s(path_step) = std::move(beta0_out); + + for (int j = 0; j < p; ++j) { + if (beta_out(j) != 0) { + beta_triplets.emplace_back(j, path_step, beta_out(j)); + } + } + + primals_path.emplace_back(primals); + dual_gaps_path.emplace_back(dual_gaps); + } + + betas.resize(p, path_length); + betas.setFromTriplets(beta_triplets.begin(), beta_triplets.end()); + alpha_out = alpha; + lambda_out = lambda; + } + + // parameters + bool intercept; + bool standardize; + bool update_clusters; + double alpha_min_ratio; + double learning_rate_decr; + double q; + double tol; + int max_it; + int max_it_outer; + int path_length; + int pgd_freq; + int print_level; + std::string lambda_type; + std::string objective; + + // estimates + Eigen::ArrayXd alpha_out; + Eigen::ArrayXd lambda_out; + Eigen::SparseMatrix betas; + Eigen::VectorXd beta0s; + Eigen::VectorXd beta; + double beta0; + int it_total; + std::vector> dual_gaps_path; + std::vector> primals_path; +}; } // namespace slope diff --git a/src/slope/standardize.cpp b/src/slope/standardize.cpp index 11e4647..7cd094c 100644 --- a/src/slope/standardize.cpp +++ b/src/slope/standardize.cpp @@ -7,19 +7,20 @@ rescaleCoefficients(double beta0, Eigen::VectorXd beta, const Eigen::VectorXd& x_centers, const Eigen::VectorXd& x_scales, - const SlopeParameters& params) + const bool intercept, + const bool standardize) { const int p = beta.rows(); double x_bar_beta_sum = 0; - if (params.standardize) { + if (standardize) { for (int j = 0; j < p; ++j) { beta(j) /= x_scales(j); x_bar_beta_sum += x_centers(j) * beta(j); } - if (params.intercept) { + if (intercept) { beta0 -= x_bar_beta_sum; } } diff --git a/src/slope/standardize.h b/src/slope/standardize.h index 4c3ce7a..94e69fb 100644 --- a/src/slope/standardize.h +++ b/src/slope/standardize.h @@ -5,7 +5,6 @@ #pragma once -#include "parameters.h" #include namespace slope { @@ -23,7 +22,7 @@ namespace slope { */ template std::tuple -standardize(const T& x, const bool standardize) +computeCentersAndScales(const T& x, const bool standardize) { const int n = x.rows(); const int p = x.cols(); @@ -67,7 +66,8 @@ standardize(const T& x, const bool standardize) * @param beta The vector of coefficients. * @param x_centers The vector of center values. * @param x_scales The vector of scale factors. - * @param params The slope parameters. + * @param intercept Should an intercept be fit? + * @param standardize Is the design matrix standardized? * @return A tuple containing the rescaled intercept and coefficients. * * @note The input vectors `beta`, `x_centers`, and `x_scales` must have the @@ -81,6 +81,7 @@ rescaleCoefficients(double beta0, Eigen::VectorXd beta, const Eigen::VectorXd& x_centers, const Eigen::VectorXd& x_scales, - const SlopeParameters& params); + const bool intercept, + const bool standardize); } // namespace slope