Skip to content

Commit

Permalink
change default missing handling to "separate"
Browse files Browse the repository at this point in the history
  • Loading branch information
paulbkoch committed Dec 27, 2024
1 parent 4204754 commit 8581df3
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 48 deletions.
8 changes: 4 additions & 4 deletions python/interpret-core/interpret/glassbox/_ebm/_ebm.py
Original file line number Diff line number Diff line change
Expand Up @@ -2771,7 +2771,7 @@ class ExplainableBoostingClassifier(ClassifierMixin, EBMModel):
L2 regularization.
max_delta_step : float, default=0.0
Used to limit the max output of tree leaves. <=0.0 means no constraint.
missing: str, default="low"
missing: str, default="separate"
Method for handling missing values during boosting. The placement of the missing value bin can influence
the resulting model graphs. For example, placing the bin on the "low" side may cause missing values to
Expand Down Expand Up @@ -2944,7 +2944,7 @@ def __init__(
reg_alpha: Optional[float] = 0.0,
reg_lambda: Optional[float] = 0.0,
max_delta_step: Optional[float] = 0.0,
missing: str = "low",
missing: str = "separate",
max_leaves: int = 3,
monotone_constraints: Optional[Sequence[int]] = None,
objective: str = "log_loss",
Expand Down Expand Up @@ -3143,7 +3143,7 @@ class ExplainableBoostingRegressor(RegressorMixin, EBMModel):
L2 regularization.
max_delta_step : float, default=0.0
Used to limit the max output of tree leaves. <=0.0 means no constraint.
missing: str, default="low"
missing: str, default="separate"
Method for handling missing values during boosting. The placement of the missing value bin can influence
the resulting model graphs. For example, placing the bin on the "low" side may cause missing values to
Expand Down Expand Up @@ -3316,7 +3316,7 @@ def __init__(
reg_alpha: Optional[float] = 0.0,
reg_lambda: Optional[float] = 0.0,
max_delta_step: Optional[float] = 0.0,
missing: str = "low",
missing: str = "separate",
max_leaves: int = 2,
monotone_constraints: Optional[Sequence[int]] = None,
objective: str = "rmse",
Expand Down
87 changes: 43 additions & 44 deletions shared/libebm/PartitionOneDimensionalBoosting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -326,50 +326,7 @@ static ErrorEbm Flatten(BoosterShell* const pBoosterShell,

while(true) {
if(nullptr == pTreeNode) {
done:;
EBM_ASSERT(cSamplesTotalDebug == cSamplesExpectedDebug);

EBM_ASSERT(bNominal || pUpdateScore == aUpdateScore + cScores * cSlices);

EBM_ASSERT(bNominal || pSplit == cSlices - 1 + pInnerTermUpdate->GetSplitPointer(iDimension));

#ifndef NDEBUG
UIntSplit prevDebug = 0;
for(size_t iDebug = 0; iDebug < cSlices - 1; ++iDebug) {
UIntSplit curDebug = pInnerTermUpdate->GetSplitPointer(iDimension)[iDebug];
EBM_ASSERT(prevDebug < curDebug);
prevDebug = curDebug;
}
EBM_ASSERT(prevDebug < cBins);
#endif

EBM_ASSERT(nullptr == pMissingValueTreeNode || nullptr != pMissingBin);
if(nullptr != pMissingBin) {
EBM_ASSERT(bMissing);

FloatScore hess = static_cast<FloatCalc>(pMissingBin->GetWeight());
const auto* pGradientPair = pMissingBin->GetGradientPairs();
const auto* const pGradientPairEnd = pGradientPair + cScores;
FloatScore* pMissingUpdateScore = aUpdateScore;
do {
if(bUpdateWithHessian) {
hess = static_cast<FloatCalc>(pGradientPair->GetHess());
}
FloatCalc updateScore = -CalcNegUpdate<true>(static_cast<FloatCalc>(pGradientPair->m_sumGradients),
hess,
regAlpha,
regLambda,
deltaStepMax);

*pMissingUpdateScore = updateScore;
++pMissingUpdateScore;

++pGradientPair;
} while(pGradientPairEnd != pGradientPair);
}

LOG_0(Trace_Verbose, "Exited Flatten");
return Error_None;
goto done;
}
if(!pTreeNode->DECONSTRUCT_IsRightChildTraversal()) {
// we checked earlier that countBins could be converted to a UIntSplit
Expand Down Expand Up @@ -411,6 +368,48 @@ static ErrorEbm Flatten(BoosterShell* const pBoosterShell,
}
}
}

done:;
EBM_ASSERT(cSamplesTotalDebug == cSamplesExpectedDebug);

EBM_ASSERT(bNominal || pUpdateScore == aUpdateScore + cScores * cSlices);

EBM_ASSERT(bNominal || pSplit == cSlices - 1 + pInnerTermUpdate->GetSplitPointer(iDimension));

#ifndef NDEBUG
UIntSplit prevDebug = 0;
for(size_t iDebug = 0; iDebug < cSlices - 1; ++iDebug) {
UIntSplit curDebug = pInnerTermUpdate->GetSplitPointer(iDimension)[iDebug];
EBM_ASSERT(prevDebug < curDebug);
prevDebug = curDebug;
}
EBM_ASSERT(prevDebug < cBins);
#endif

EBM_ASSERT(nullptr == pMissingValueTreeNode || nullptr != pMissingBin);
if(nullptr != pMissingBin) {
EBM_ASSERT(bMissing);

FloatScore hess = static_cast<FloatCalc>(pMissingBin->GetWeight());
const auto* pGradientPair = pMissingBin->GetGradientPairs();
const auto* const pGradientPairEnd = pGradientPair + cScores;
FloatScore* pMissingUpdateScore = aUpdateScore;
do {
if(bUpdateWithHessian) {
hess = static_cast<FloatCalc>(pGradientPair->GetHess());
}
FloatCalc updateScore = -CalcNegUpdate<true>(
static_cast<FloatCalc>(pGradientPair->m_sumGradients), hess, regAlpha, regLambda, deltaStepMax);

*pMissingUpdateScore = updateScore;
++pMissingUpdateScore;

++pGradientPair;
} while(pGradientPairEnd != pGradientPair);
}

LOG_0(Trace_Verbose, "Exited Flatten");
return Error_None;
}
WARNING_POP

Expand Down

0 comments on commit 8581df3

Please sign in to comment.