Skip to content

Commit

Permalink
REMOVE flatten
Browse files Browse the repository at this point in the history
  • Loading branch information
paulbkoch committed Dec 25, 2024
1 parent da6649c commit 8f971b5
Showing 1 changed file with 18 additions and 269 deletions.
287 changes: 18 additions & 269 deletions shared/libebm/PartitionOneDimensionalBoosting.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,279 +119,28 @@ static ErrorEbm Flatten(BoosterShell* const pBoosterShell,
const TreeNode<bHessian>* pMissingValueTreeNode,
const size_t cSlices,
const size_t cBins) {
LOG_0(Trace_Verbose, "Entered Flatten");

EBM_ASSERT(nullptr != pBoosterShell);
EBM_ASSERT(iDimension <= k_cDimensionsMax);
EBM_ASSERT(nullptr != apBins);
EBM_ASSERT(1 <= cSlices);
EBM_ASSERT(2 <= cBins);
EBM_ASSERT(cSlices <= cBins);
EBM_ASSERT(!bNominal || cSlices == cBins);

ErrorEbm error;
UNUSED(bMissing);
UNUSED(bNominal);
UNUSED(flags);
UNUSED(regAlpha);
UNUSED(regLambda);
UNUSED(deltaStepMax);
UNUSED(iDimension);
UNUSED(apBins);
UNUSED(pMissingValueTreeNode);
UNUSED(cSlices);
UNUSED(cBins);

#ifndef NDEBUG
auto* const pRootTreeNodeDebug = pBoosterShell->GetTreeNodesTemp<bHessian>();
size_t cSamplesExpectedDebug = static_cast<size_t>(pRootTreeNodeDebug->GetBin()->GetCountSamples());
size_t cSamplesTotalDebug = 0;
#endif // NDEBUG

const Bin<FloatMain, UIntMain, true, true, bHessian>* pMissingBin = nullptr;

BoosterCore* const pBoosterCore = pBoosterShell->GetBoosterCore();
const size_t cScores = pBoosterCore->GetCountScores();
Tensor* const pInnerTermUpdate = pBoosterShell->GetInnerTermUpdate();

error = pInnerTermUpdate->SetCountSlices(iDimension, cSlices);
if(UNLIKELY(Error_None != error)) {
// already logged
return error;
}

const BoosterCore* const pBoosterCore = pBoosterShell->GetBoosterCore();
const size_t cScores = pBoosterCore->GetCountScores();

EBM_ASSERT(!IsMultiplyError(cScores, cSlices));
error = pInnerTermUpdate->EnsureTensorScoreCapacity(cScores * cSlices);
if(UNLIKELY(Error_None != error)) {
// already logged
return error;
}

UIntSplit* pSplit = pInnerTermUpdate->GetSplitPointer(iDimension);

FloatScore* const aUpdateScore = pInnerTermUpdate->GetTensorScoresPointer();
FloatScore* pUpdateScore;

UIntSplit prev = 0;
const Bin<FloatMain, UIntMain, true, true, bHessian>* const* ppBinCur = nullptr;
if(bNominal) {
UIntSplit iSplit = 1;
while(cSlices != iSplit) {
pSplit[iSplit - 1] = iSplit;
++iSplit;
}
ppBinCur = reinterpret_cast<const Bin<FloatMain, UIntMain, true, true, bHessian>* const*>(apBins);
} else {
pUpdateScore = aUpdateScore;

if(bMissing) {
EBM_ASSERT(2 <= cSlices); // no cuts if there was only missing bin

// always put a split on the missing bin
*pSplit = 1;
++pSplit;
prev = 1;

// pUpdateScore is overwritten later if bNominal
pUpdateScore += cScores;
}
}

const size_t cBytesPerBin = GetBinSize<FloatMain, UIntMain>(true, true, bHessian, cScores);
auto* const aBins = pBoosterShell->GetBoostingMainBins()->Specialize<FloatMain, UIntMain, true, true, bHessian>();

EBM_ASSERT(!IsOverflowTreeNodeSize(bHessian, cScores)); // we're accessing allocated memory
const size_t cBytesPerTreeNode = GetTreeNodeSize(bHessian, cScores);

auto* const pRootTreeNode = pBoosterShell->GetTreeNodesTemp<bHessian>();
auto* pTreeNode = pRootTreeNode;

const bool bUpdateWithHessian = bHessian && !(TermBoostFlags_DisableNewtonUpdate & flags);

TreeNode<bHessian>* pParent = nullptr;

while(true) {
if(UNPREDICTABLE(pTreeNode->AFTER_IsSplit())) {
auto* const pLeftChild = pTreeNode->DECONSTRUCT_TraverseLeftAndMark(pParent);
pParent = pTreeNode;
pTreeNode = pLeftChild;
} else {
const Bin<FloatMain, UIntMain, true, true, bHessian>* const* ppBinLast;
// if the pointer points to the space within the bins, then the TreeNode could not be split
// and this TreeNode never had children and we never wrote a pointer to the children in this memory
if(pTreeNode->AFTER_IsSplittable()) {
auto* const pChildren = pTreeNode->AFTER_GetChildren();

EBM_ASSERT(IndexTreeNode(pTreeNode, cBytesPerTreeNode) <= pChildren &&
pChildren <=
IndexTreeNode(pRootTreeNode, pBoosterCore->GetCountBytesTreeNodes() - cBytesPerTreeNode));

if(pMissingValueTreeNode == GetLeftNode(pChildren)) {
EBM_ASSERT(nullptr == pMissingBin);
pMissingBin = pTreeNode->GetBin();
}

// the node was examined and a gain calculated, so it has left and right children.
// We can retrieve the split location by looking at where the right child would end its range
const auto* const pRightChild = GetRightNode(pChildren, cBytesPerTreeNode);
ppBinLast = pRightChild->BEFORE_GetBinLast();

if(pMissingValueTreeNode == pRightChild) {
EBM_ASSERT(nullptr == pMissingBin);
pMissingBin = pTreeNode->GetBin();
}
} else {
ppBinLast = pTreeNode->BEFORE_GetBinLast();
if(pMissingValueTreeNode == pTreeNode) {
EBM_ASSERT(nullptr == pMissingBin);
pMissingBin = pTreeNode->GetBin();
}
}

EBM_ASSERT(apBins <= ppBinLast);
EBM_ASSERT(ppBinLast < apBins + (cBins - (nullptr != pMissingValueTreeNode ? size_t{1} : size_t{0})));

#ifndef NDEBUG
cSamplesTotalDebug += static_cast<size_t>(pTreeNode->GetBin()->GetCountSamples());
#endif // NDEBUG

size_t iEdge;
const auto* const aGradientPair = pTreeNode->GetBin()->GetGradientPairs();
size_t iScore;
if(nullptr != ppBinCur) {
goto determine_bin;
}
EBM_ASSERT(!bNominal);

iEdge = ppBinLast - apBins + 1 + (nullptr != pMissingValueTreeNode ? 1 : 0);

while(true) { // not a real loop
if(bMissing) {
if(TermBoostFlags_MissingLow & flags) {
if(nullptr == pMissingBin) {
pMissingBin = pTreeNode->GetBin();
}
if(1 == iEdge) {
// this cut would isolate the missing bin, but we handle those scores separately
break;
}
} else if(TermBoostFlags_MissingHigh & flags) {
++iEdge; // missing is at index 0 in the model, so we are offset by one
pMissingBin = pTreeNode->GetBin();
EBM_ASSERT(iEdge <= cBins + 1);
EBM_ASSERT(0 != prev);
if(cBins + 1 == iEdge && cBins == prev) {
// this cut would isolate the missing bin, but we handle those scores separately
break;
}
}
}

while(true) {
iScore = 0;
do {
FloatCalc updateScore;
if(bUpdateWithHessian) {
updateScore = -CalcNegUpdate<true>(static_cast<FloatCalc>(aGradientPair[iScore].m_sumGradients),
static_cast<FloatCalc>(aGradientPair[iScore].GetHess()),
regAlpha,
regLambda,
deltaStepMax);
} else {
updateScore = -CalcNegUpdate<true>(static_cast<FloatCalc>(aGradientPair[iScore].m_sumGradients),
static_cast<FloatCalc>(pTreeNode->GetBin()->GetWeight()),
regAlpha,
regLambda,
deltaStepMax);
}

*pUpdateScore = static_cast<FloatScore>(updateScore);
++pUpdateScore;

++iScore;
} while(cScores != iScore);
if(nullptr == ppBinCur) {
break;
}
EBM_ASSERT(bNominal);
++ppBinCur;
if(ppBinLast < ppBinCur) {
break;
}
determine_bin:;
const auto* const pBinCur = *ppBinCur;
const size_t iBin = CountBins(pBinCur, aBins, cBytesPerBin);
pUpdateScore = aUpdateScore + iBin * cScores;
}

break;
}

pTreeNode = pParent;

while(true) {
if(nullptr == pTreeNode) {
EBM_ASSERT(cSamplesTotalDebug == cSamplesExpectedDebug);

EBM_ASSERT(nullptr == pMissingValueTreeNode || nullptr != pMissingBin);
if(nullptr != pMissingBin) {
EBM_ASSERT(bMissing);

FloatScore hess = static_cast<FloatCalc>(pMissingBin->GetWeight());
const auto* pGradientPair = pMissingBin->GetGradientPairs();
const auto* const pGradientPairEnd = pGradientPair + cScores;
FloatScore* pMissingUpdateScore = aUpdateScore;
do {
if(bUpdateWithHessian) {
hess = static_cast<FloatCalc>(pGradientPair->GetHess());
}
FloatCalc updateScore = -CalcNegUpdate<true>(static_cast<FloatCalc>(pGradientPair->m_sumGradients),
hess,
regAlpha,
regLambda,
deltaStepMax);

*pMissingUpdateScore = updateScore;
++pMissingUpdateScore;

++pGradientPair;
} while(pGradientPairEnd != pGradientPair);
}

EBM_ASSERT(bNominal || pSplit == cSlices - 1 + pInnerTermUpdate->GetSplitPointer(iDimension));

LOG_0(Trace_Verbose, "Exited Flatten");
return Error_None;
}
if(!pTreeNode->DECONSTRUCT_IsRightChildTraversal()) {
// we checked earlier that countBins could be converted to a UIntSplit
if(nullptr == ppBinCur) {
EBM_ASSERT(!bNominal);

while(true) { // not a real loop
if(bMissing) {
if(TermBoostFlags_MissingLow & flags) {
if(1 == iEdge) {
// this cut would isolate the missing bin, but missing already has a cut
break;
}
} else if(TermBoostFlags_MissingHigh & flags) {
EBM_ASSERT(iEdge <= cBins);
if(cBins == iEdge) {
// this cut would isolate the missing bin, but missing already has a cut
break;
}
}
}

EBM_ASSERT(!IsConvertError<UIntSplit>(iEdge));
prev = static_cast<UIntSplit>(iEdge);
EBM_ASSERT(pSplit < cSlices - 1 + pInnerTermUpdate->GetSplitPointer(iDimension));
*pSplit = prev;
++pSplit;

break;
}
}
pParent = pTreeNode;
pTreeNode = pTreeNode->DECONSTRUCT_TraverseRightAndMark(cBytesPerTreeNode);
break;
} else {
pTreeNode = pTreeNode->DECONSTRUCT_GetParent();
}
}
}
}
pInnerTermUpdate->SetCountSlices(iDimension, 1);
pInnerTermUpdate->EnsureTensorScoreCapacity(cScores);
return Error_None;
}
WARNING_POP

Expand Down Expand Up @@ -1104,8 +853,8 @@ template<bool bHessian, size_t cCompilerScores> class PartitionOneDimensionalBoo
cSlices,
cBins);

EBM_ASSERT(!bMissing || 2 <= pBoosterShell->GetInnerTermUpdate()->GetCountSlices(iDimension));
EBM_ASSERT(!bMissing || *pBoosterShell->GetInnerTermUpdate()->GetSplitPointer(iDimension) == 1);
//EBM_ASSERT(!bMissing || 2 <= pBoosterShell->GetInnerTermUpdate()->GetCountSlices(iDimension));
//EBM_ASSERT(!bMissing || *pBoosterShell->GetInnerTermUpdate()->GetSplitPointer(iDimension) == 1);

return error;
}
Expand Down

0 comments on commit 8f971b5

Please sign in to comment.