diff --git a/shared/libebm/compute/avx2_ebm/avx2_32.cpp b/shared/libebm/compute/avx2_ebm/avx2_32.cpp index cdca33c32..3e3c176fa 100644 --- a/shared/libebm/compute/avx2_ebm/avx2_32.cpp +++ b/shared/libebm/compute/avx2_ebm/avx2_32.cpp @@ -34,12 +34,24 @@ namespace DEFINED_ZONE_NAME { #error DEFINED_ZONE_NAME must be defined #endif // DEFINED_ZONE_NAME -// this is super-special and included inside the zone namespace -#include "objective_registrations.hpp" - static constexpr size_t k_cAlignment = 32; - struct alignas(k_cAlignment) Avx2_32_Float; +struct alignas(k_cAlignment) Avx2_32_Int; + +template +inline Avx2_32_Float Exp(const Avx2_32_Float& val) noexcept; +template +inline Avx2_32_Float Log(const Avx2_32_Float& val) noexcept; + +// this is super-special and included inside the zone namespace +#include "objective_registrations.hpp" struct alignas(k_cAlignment) Avx2_32_Int final { friend Avx2_32_Float; @@ -138,18 +150,6 @@ struct alignas(k_cAlignment) Avx2_32_Int final { static_assert(std::is_standard_layout::value && std::is_trivially_copyable::value, "This allows offsetof, memcpy, memset, inter-language, GPU and cross-machine use where needed"); -template -inline Avx2_32_Float Exp(const Avx2_32_Float& val) noexcept; -template -inline Avx2_32_Float Log(const Avx2_32_Float& val) noexcept; - struct alignas(k_cAlignment) Avx2_32_Float final { template friend Avx2_32_Float Exp(const Avx2_32_Float& val) noexcept; diff --git a/shared/libebm/compute/avx512f_ebm/avx512f_32.cpp b/shared/libebm/compute/avx512f_ebm/avx512f_32.cpp index 75a3b2c57..fbfdcf446 100644 --- a/shared/libebm/compute/avx512f_ebm/avx512f_32.cpp +++ b/shared/libebm/compute/avx512f_ebm/avx512f_32.cpp @@ -34,12 +34,24 @@ namespace DEFINED_ZONE_NAME { #error DEFINED_ZONE_NAME must be defined #endif // DEFINED_ZONE_NAME -// this is super-special and included inside the zone namespace -#include "objective_registrations.hpp" - static constexpr size_t k_cAlignment = 64; - struct alignas(k_cAlignment) Avx512f_32_Float; +struct alignas(k_cAlignment) Avx512f_32_Int; + +template +inline Avx512f_32_Float Exp(const Avx512f_32_Float& val) noexcept; +template +inline Avx512f_32_Float Log(const Avx512f_32_Float& val) noexcept; + +// this is super-special and included inside the zone namespace +#include "objective_registrations.hpp" struct alignas(k_cAlignment) Avx512f_32_Int final { friend Avx512f_32_Float; @@ -152,18 +164,6 @@ struct alignas(k_cAlignment) Avx512f_32_Int final { static_assert(std::is_standard_layout::value && std::is_trivially_copyable::value, "This allows offsetof, memcpy, memset, inter-language, GPU and cross-machine use where needed"); -template -inline Avx512f_32_Float Exp(const Avx512f_32_Float& val) noexcept; -template -inline Avx512f_32_Float Log(const Avx512f_32_Float& val) noexcept; - struct alignas(k_cAlignment) Avx512f_32_Float final { template friend Avx512f_32_Float Exp(const Avx512f_32_Float& val) noexcept; diff --git a/shared/libebm/compute/cpu_ebm/cpu_64.cpp b/shared/libebm/compute/cpu_ebm/cpu_64.cpp index ff613a817..f452b2e11 100644 --- a/shared/libebm/compute/cpu_ebm/cpu_64.cpp +++ b/shared/libebm/compute/cpu_ebm/cpu_64.cpp @@ -31,11 +31,24 @@ namespace DEFINED_ZONE_NAME { #error DEFINED_ZONE_NAME must be defined #endif // DEFINED_ZONE_NAME +struct Cpu_64_Float; +struct Cpu_64_Int; + +template +inline Cpu_64_Float Exp(const Cpu_64_Float& val) noexcept; +template +inline Cpu_64_Float Log(const Cpu_64_Float& val) noexcept; + // this is super-special and included inside the zone namespace #include "objective_registrations.hpp" -struct Cpu_64_Float; - struct Cpu_64_Int final { friend Cpu_64_Float; friend inline Cpu_64_Float IfEqual(const Cpu_64_Int& cmp1, @@ -96,18 +109,6 @@ struct Cpu_64_Int final { static_assert(std::is_standard_layout::value && std::is_trivially_copyable::value, "This allows offsetof, memcpy, memset, inter-language, GPU and cross-machine use where needed"); -template -inline Cpu_64_Float Exp(const Cpu_64_Float& val) noexcept; -template -inline Cpu_64_Float Log(const Cpu_64_Float& val) noexcept; - struct Cpu_64_Float final { template friend Cpu_64_Float Exp(const Cpu_64_Float& val) noexcept; diff --git a/shared/libebm/compute/objectives/GammaDevianceRegressionObjective.hpp b/shared/libebm/compute/objectives/GammaDevianceRegressionObjective.hpp index 0cfc23557..1e419ce59 100644 --- a/shared/libebm/compute/objectives/GammaDevianceRegressionObjective.hpp +++ b/shared/libebm/compute/objectives/GammaDevianceRegressionObjective.hpp @@ -52,25 +52,23 @@ template struct GammaDevianceRegressionObjective : RegressionOb inline double FinishMetric(const double metricSum) const noexcept { return 2.0 * metricSum; } GPU_DEVICE inline TFloat CalcMetric(const TFloat& score, const TFloat& target) const noexcept { - const TFloat prediction = Exp(score); // log link function - const TFloat frac = target / prediction; + const TFloat invPrediction = Exp(score); // log link function + const TFloat frac = target * invPrediction; const TFloat metric = frac - 1.0 - Log(frac); return metric; } GPU_DEVICE inline TFloat CalcGradient(const TFloat& score, const TFloat& target) const noexcept { - const TFloat prediction = Exp(score); // log link function - const TFloat frac = target / prediction; - const TFloat gradient = 1.0 - frac; + const TFloat invPrediction = Exp(score); // log link function + const TFloat gradient = FusedNegateMultiplyAdd(target, invPrediction, 1.0); return gradient; } GPU_DEVICE inline GradientHessian CalcGradientHessian( const TFloat& score, const TFloat& target) const noexcept { - const TFloat prediction = Exp(score); // log link function - const TFloat frac = target / prediction; - const TFloat gradient = 1.0 - frac; - const TFloat hessian = frac; + const TFloat invPrediction = Exp(score); // log link function + const TFloat gradient = FusedNegateMultiplyAdd(target, invPrediction, 1.0); + const TFloat hessian = target * invPrediction; return MakeGradientHessian(gradient, hessian); } };