diff --git a/src/slope/standardize.h b/src/slope/standardize.h index 94e69fb..bca3703 100644 --- a/src/slope/standardize.h +++ b/src/slope/standardize.h @@ -36,17 +36,20 @@ computeCentersAndScales(const T& x, const bool standardize) int count = 0; for (typename T::InnerIterator it(x, j); it; ++it) { + count++; double delta = it.value() - mean; - mean += delta / (++count); - m2 += delta * (it.value() - mean); + mean += delta / count; + double delta2 = it.value() - mean; + m2 += delta * delta2; } // Account for zeros in the column - double delta = -mean; while (count < n) { count++; + double delta = -mean; mean += delta / count; - m2 -= mean * delta; + double delta2 = -mean; + m2 += delta * delta2; } x_means(j) = mean; diff --git a/tests/sparse.cpp b/tests/sparse.cpp index 1041463..c2c78a4 100644 --- a/tests/sparse.cpp +++ b/tests/sparse.cpp @@ -1,8 +1,9 @@ +#include "../src/slope/slope.h" +#include "../src/slope/standardize.h" #include "test_helpers.hpp" #include #include #include -#include TEST_CASE("Sparse and dense methods agree", "[gaussian][sparse]") { @@ -16,29 +17,37 @@ TEST_CASE("Sparse and dense methods agree", "[gaussian][sparse]") 0.0, 0.0, 0.0, 0.0, 0.0, 0.94280368, 0.0, 0.0, 0.3499374, 0.0, 0.22377115, 0.0, 0.0, 0.96893287, 0.95858229, 0.70486475, 0.60885162, 0.0, 0.0, 0.92902639, 0.0, 0.4978676, 0.0, 0.50022619; - Eigen::SparseMatrix x_sparse = x_dense.sparseView(); + Eigen::SparseMatrix x_sparse = x_dense.sparseView().eval(); + + auto [x_centers_sparse, x_scales_sparse] = + slope::computeCentersAndScales(x_sparse, true); + auto [x_centers_dense, x_scales_dense] = + slope::computeCentersAndScales(x_dense, true); + + REQUIRE_THAT(x_centers_dense, VectorApproxEqual(x_centers_sparse)); + REQUIRE_THAT(x_scales_dense, VectorApproxEqual(x_scales_sparse)); beta << 1, 2, -0.9; y = x_dense * beta; - Eigen::ArrayXd alpha = Eigen::ArrayXd::Zero(1); + Eigen::ArrayXd alpha = Eigen::ArrayXd::Ones(1); Eigen::Array lambda; lambda << 0.5, 0.5, 0.2; slope::Slope model; - model.setIntercept(true); + model.setIntercept(false); model.setStandardize(true); model.fit(x_sparse, y, alpha, lambda); - auto coefs_sparse = model.getCoefs(); + Eigen::VectorXd coefs_sparse = model.getCoefs(); model.fit(x_dense, y, alpha, lambda); - auto coefs_dense = model.getCoefs(); + Eigen::VectorXd coefs_dense = model.getCoefs(); Eigen::VectorXd coef_sparse = coefs_sparse.col(0); Eigen::VectorXd coef_dense = coefs_dense.col(0); - REQUIRE_THAT(coef_sparse, VectorApproxEqual(coef_dense, 1e-4)); + REQUIRE_THAT(coef_sparse, VectorApproxEqual(coef_dense, 1e-6)); } diff --git a/tests/standardize.cpp b/tests/standardize.cpp index a115dfd..5f707bb 100644 --- a/tests/standardize.cpp +++ b/tests/standardize.cpp @@ -1,9 +1,10 @@ +#include "../src/slope/standardize.h" +#include "../src/slope/math.h" #include "test_helpers.hpp" #include #include #include #include -#include std::tuple computeMeanAndStdDev(const Eigen::MatrixXd& x) @@ -25,19 +26,9 @@ computeMeanAndStdDev(const Eigen::MatrixXd& x) std::tuple computeMeanAndStdDev(const Eigen::SparseMatrix& x) { - const int n = x.rows(); - const int p = x.cols(); - - Eigen::VectorXd x_means(p); - Eigen::VectorXd x_stddevs(p); - - for (int j = 0; j < p; ++j) { - x_means(j) = x.col(j).sum() / n; - x_stddevs(j) = - std::sqrt(x.col(j).squaredNorm() / n - std::pow(x_means(j), 2)); - } + Eigen::MatrixXd x_dense = x; - return { x_means, x_stddevs }; + return computeMeanAndStdDev(x_dense); } TEST_CASE("Check that standardization algorithm works", @@ -67,3 +58,46 @@ TEST_CASE("Check that standardization algorithm works", REQUIRE_THAT(x_centers_dense, VectorApproxEqual(x_centers_ref)); REQUIRE_THAT(x_scales, VectorApproxEqual(x_scales_ref)); } + +TEST_CASE("Check that in-place standardization works", + "[utils][standardization]") +{ + using Catch::Matchers::WithinAbs; + + Eigen::MatrixXd x_dense(3, 3); + x_dense << 1, 0, 1000, 98.2, 0, 34, -1007, 0, 0; + Eigen::SparseMatrix x_sparse = x_dense.sparseView(); + Eigen::VectorXd beta(3); + beta << 1, 0, -1.8; + Eigen::VectorXd y = x_dense * beta; + + Eigen::VectorXd residual = y; + residual(0) += 1; + residual(1) += -0.2; + residual(2) += 0.9; + + auto [x_centers_dense, x_scales_dense] = computeMeanAndStdDev(x_dense); + auto [x_centers_sparse, x_scales_sparse] = computeMeanAndStdDev(x_sparse); + + REQUIRE_THAT(x_centers_dense, VectorApproxEqual(x_centers_sparse)); + REQUIRE_THAT(x_scales_dense, VectorApproxEqual(x_scales_sparse)); + + Eigen::VectorXd residual_dense = + y - x_dense * beta.cwiseQuotient(x_scales_dense); + residual_dense.array() += + x_centers_dense.cwiseQuotient(x_scales_dense).dot(beta); + + Eigen::VectorXd residual_sparse = + y - x_sparse * beta.cwiseQuotient(x_scales_sparse); + residual_sparse.array() += + x_centers_sparse.cwiseQuotient(x_scales_sparse).dot(beta); + + REQUIRE_THAT(residual_dense, VectorApproxEqual(residual_sparse)); + + auto gradient_dense = slope::computeGradient( + x_dense, residual_dense, x_centers_dense, x_scales_dense, true); + auto gradient_sparse = slope::computeGradient( + x_sparse, residual_sparse, x_centers_sparse, x_scales_sparse, true); + + REQUIRE_THAT(gradient_dense, VectorApproxEqual(gradient_sparse)); +}