From 5962f83abbbeb5b726627778ed27716c6a80af10 Mon Sep 17 00:00:00 2001 From: Paul Koch Date: Tue, 24 Dec 2024 00:25:43 -0800 Subject: [PATCH] change metric for categorical sorting to match LightGBM --- .../tests/glassbox/ebm/test_ebm.py | 8 +++++ .../PartitionOneDimensionalBoosting.cpp | 29 +++++-------------- .../libebm/tests/boosting_unusual_inputs.cpp | 2 +- 3 files changed, 17 insertions(+), 22 deletions(-) diff --git a/python/interpret-core/tests/glassbox/ebm/test_ebm.py b/python/interpret-core/tests/glassbox/ebm/test_ebm.py index e567988be..5becc47f6 100644 --- a/python/interpret-core/tests/glassbox/ebm/test_ebm.py +++ b/python/interpret-core/tests/glassbox/ebm/test_ebm.py @@ -655,8 +655,13 @@ def test_eval_terms_multiclass(): def test_ebm_sample_weight(): + from interpret.develop import get_option, set_option + X, y, names, types = make_synthetic(classes=2, output_type="float") + cat_smooth = get_option("cat_smooth") + set_option("cat_smooth", 2.2250738585072014e-308) + ebm = ExplainableBoostingClassifier(names, types) ebm.fit(X, y) @@ -672,6 +677,9 @@ def test_ebm_sample_weight(): changed = ExplainableBoostingClassifier(names, types) changed.fit(X, y, sample_weight=weights) + # restore cat_smooth value + set_option("cat_smooth", cat_smooth) + assert not np.array_equal(ebm.predict_proba(X), changed.predict_proba(X)) diff --git a/shared/libebm/PartitionOneDimensionalBoosting.cpp b/shared/libebm/PartitionOneDimensionalBoosting.cpp index 23355c16a..4de6a050c 100644 --- a/shared/libebm/PartitionOneDimensionalBoosting.cpp +++ b/shared/libebm/PartitionOneDimensionalBoosting.cpp @@ -704,19 +704,12 @@ template class CompareNodeGain final { template class CompareBin final { bool m_bHessianRuntime; - FloatCalc m_regAlpha; - FloatCalc m_regLambda; - FloatCalc m_deltaStepMax; + FloatCalc m_categoricalSmoothing; public: - INLINE_ALWAYS CompareBin(const bool bHessianRuntime, - const FloatCalc regAlpha, - const FloatCalc regLambda, - const FloatCalc deltaStepMax) { + INLINE_ALWAYS CompareBin(const bool bHessianRuntime, FloatCalc categoricalSmoothing) { m_bHessianRuntime = bHessianRuntime; - m_regAlpha = regAlpha; - m_regLambda = regLambda; - m_deltaStepMax = deltaStepMax; + m_categoricalSmoothing = categoricalSmoothing; } INLINE_ALWAYS bool operator()( @@ -729,19 +722,13 @@ template class CompareBin final { const FloatCalc hess1 = static_cast(bUpdateWithHessian ? lhs->GetGradientPairs()[0].GetHess() : lhs->GetWeight()); - const FloatCalc val1 = CalcNegUpdate(static_cast(lhs->GetGradientPairs()[0].m_sumGradients), - hess1, - m_regAlpha, - m_regLambda, - m_deltaStepMax); + const FloatCalc val1 = + static_cast(lhs->GetGradientPairs()[0].m_sumGradients) / (hess1 + m_categoricalSmoothing); const FloatCalc hess2 = static_cast(bUpdateWithHessian ? rhs->GetGradientPairs()[0].GetHess() : rhs->GetWeight()); - const FloatCalc val2 = CalcNegUpdate(static_cast(rhs->GetGradientPairs()[0].m_sumGradients), - hess2, - m_regAlpha, - m_regLambda, - m_deltaStepMax); + const FloatCalc val2 = + static_cast(rhs->GetGradientPairs()[0].m_sumGradients) / (hess2 + m_categoricalSmoothing); if(val1 == val2) { return lhs < rhs; @@ -835,7 +822,7 @@ template class PartitionOneDimensionalBoo std::sort(apBins, ppBinsEnd, CompareBin( - !(TermBoostFlags_DisableNewtonUpdate & flags), regAlpha, regLambda, deltaStepMax)); + !(TermBoostFlags_DisableNewtonUpdate & flags), categoricalSmoothing)); } pRootTreeNode->BEFORE_SetBinFirst(apBins); diff --git a/shared/libebm/tests/boosting_unusual_inputs.cpp b/shared/libebm/tests/boosting_unusual_inputs.cpp index 71cf9d040..8aa683e96 100644 --- a/shared/libebm/tests/boosting_unusual_inputs.cpp +++ b/shared/libebm/tests/boosting_unusual_inputs.cpp @@ -2175,7 +2175,7 @@ static double RandomizedTesting(const AccelerationFlags acceleration) { } TEST_CASE("stress test, boosting") { - const double expected = 26838942758406.215; + const double expected = 26758407585917.129; double validationMetricExact = RandomizedTesting(AccelerationFlags_NONE); CHECK(validationMetricExact == expected);